In [None]:
# --------------------------------------------------- Test 1 (Without Mongo DB) --------------------------------------------------------

import os
import pandas as pd
from langchain_experimental.agents.agent_toolkits import create_csv_agent
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv


# Load the dataset
df = pd.read_csv("dataset/data.csv")

# Load environment variables from .env file
load_dotenv()


# Retrieve the keys from environment variables
uri = os.getenv("MONGODB_URI")
openai_api_key = os.getenv("OPENAI_API_KEY")

print(f"URI , Api_KEY{uri,openai_api_key}")

# Set OpenAI API key
os.environ["OPENAI_API_KEY"] = openai_api_key

#os.environ["OPENAI_API_KEY"] = "sk-j6c0dvsqe7csiZtu0TSofYROuYdPiDH7995GoHwYsPT3BlbkFJycN3RszMz86YcWAdh_Gr-sirXKIyUgdLrD6OydvckA"
# Initialize the ChatOpenAI with GPT-4-turbo
llm = ChatOpenAI(temperature=0.5, model="gpt-4-turbo")  # Set the model to GPT-4-turbo

# Create the CSV agent
agent_executor = create_csv_agent(llm, "dataset/data.csv", allow_dangerous_code=True, verbose=True)

# Invoke the agent with a prompt and get the result
result = agent_executor.invoke(input("Enter your prompt: "))

# Assuming the result is in a dictionary or similar structure, extract and print the plain text output
if isinstance(result, dict):
    # If the result is a dictionary, you might extract the key containing the plain text
    print(result.get("output", "No output found"))  # Modify 'output' to the actual key used by the agent for the text
else:
    # If it's already in plain text, just print it
    print(result)

# Total number of orders created during a specific time period (e.g., month, quarter).
# Identification of the quarter with the highest spending.
# Analysis of frequently ordered line items.

In [3]:
# --------------------------------------- Testing Paramters to verify model answer accuracy -----------------------------------

import pandas as pd

# Load the dataset
df = pd.read_csv("dataset/data.csv")

number_of_entries = df[df['Fiscal Year'] == '2014-2015'].shape[0]

# Display the result
print(f"Number of entries for the fiscal year 2014-2015: {number_of_entries}")

# Get the most frequently mentioned item
most_frequent_item = df['Item Name'].value_counts().idxmax()

# Get the count of that item
most_frequent_count = df['Item Name'].value_counts().max()

# Display the result
print(f"The most frequently mentioned item is: {most_frequent_item} with {most_frequent_count} mentions")


# Count the occurrences of each item
item_counts = df['Item Name'].value_counts()

# Get the least 3 ordered items
least_three_items = item_counts.nsmallest(3)

# Display the result
print("The three least frequently ordered items are:")
print(least_three_items)


# Group by 'Item Name' and sum the 'Quantity' to get total sales per item
item_sales = df.groupby('Item Name')['Quantity'].sum().reset_index()

# Find the most sold item
most_sold = item_sales.loc[item_sales['Quantity'].idxmax()]
print("Most Sold Item:")
print(most_sold)

# Find the least sold item
least_sold = item_sales.loc[item_sales['Quantity'].idxmin()]
print("Least Sold Item:")
print(least_sold)   

Number of entries for the fiscal year 2014-2015: 116537
The most frequently mentioned item is: Medical Supplies with 2916 mentions
The three least frequently ordered items are:
Item Name
Extron pc video interface              1
Water Damage Remediation and Repair    1
FSR for Cal-Access - Amdt #3           1
Name: count, dtype: int64
Most Sold Item:
Item Name    3-2019 #10 Double Window Envelope
Quantity                            70000000.0
Name: 7721, dtype: object
Least Sold Item:
Item Name    Beans (dried)
Quantity            0.0001
Name: 25939, dtype: object


In [None]:
# ---------------------------------------------- Test 2 (with Mongo DB) -----------------------------------------------------------------

import os
import pandas as pd
from langchain_experimental.agents.agent_toolkits import create_csv_agent
from langchain_openai import ChatOpenAI
import pymongo
#from pymongo import MongoClient
import certifi
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

#ca = certifi.where()

from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()


# Retrieve the keys from environment variables
uri = os.getenv("MONGODB_URI")
openai_api_key = os.getenv("OPENAI_API_KEY")

print(f"URI , Api_KEY{uri,openai_api_key}")

# Set OpenAI API key
os.environ["OPENAI_API_KEY"] = openai_api_key
# # Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))

# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)




# # Connect to MongoDB
# mongo_uri = "mongodb+srv://astorehut:wka7IlForuO7Ih0T@cluster0.5j1yu.mongodb.net/"
# #client = MongoClient(mongo_uri)
# client = pymongo.MongoClient(mongo_uri, tlsCAFile=ca)

print("connecting to cluster")
# Select the database and collection

# cluster = MongoClient(uri)
# db= cluster['test']
# collection = db['purchase_order']

db = client['test']  # Replace with your actual database name
collection = db['purchase_order']  # Replace with your actual collection name

print("DB Access successful , Training model on Database ")

# Query the data from MongoDB

data = list(collection.find({}))  # Retrieves all documents from the collection
print(f"Data from DB: {data}")
# Convert the MongoDB data to a pandas DataFrame
df = pd.DataFrame(data)

# If you want to drop the MongoDB-specific `_id` field
if '_id' in df.columns:
    df = df.drop(columns=['_id'])

# Save the DataFrame to a CSV file (if needed) or use directly
df.to_csv('mongodb_data.csv', index=False)

# Initialize the ChatOpenAI with GPT-4-turbo
llm = ChatOpenAI(temperature=0.5, model="gpt-4-turbo")  # Ensure the model name is correct

# Create the CSV agent using the DataFrame
agent_executor = create_csv_agent(llm, "mongodb_data.csv", allow_dangerous_code=True, verbose=True)

#agent_executor = create_csv_agent(llm, df, allow_dangerous_code=True, verbose=True)


# Invoke the agent with a prompt and get the result
print("Enter your prompt: ")
prompt = input("Enter your prompt: ")
result = agent_executor.invoke(prompt)

# Output processing
if isinstance(result, dict):
    print(result.get("output", "No output found"))
else:
    print(result)


In [None]:
# ---------------------------------------------- Test 3 (with Mongo DB) -----------------------------------------------------------------
import os
import pandas as pd
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain_openai import ChatOpenAI
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()


# Retrieve the keys from environment variables
uri = os.getenv("MONGODB_URI")
openai_api_key = os.getenv("OPENAI_API_KEY")

print(f"URI , Api_KEY{uri,openai_api_key}")

# Set OpenAI API key
os.environ["OPENAI_API_KEY"] = openai_api_key
# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'), maxPoolSize=1000)

# Ping the server to confirm the connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)


# Connect to the database and collection
db = client['test']  # Replace with your actual database name
collection = db['purchase_order']  # Replace with your actual collection name

# Query the data from MongoDB
data = list(collection.find({}))  # Retrieves all documents from the collection
print(data)
# Convert the MongoDB data to a pandas DataFrame
df = pd.DataFrame(data)

# Drop the MongoDB-specific `_id` field if it exists
if '_id' in df.columns:
    df = df.drop(columns=['_id'])

# Initialize the ChatOpenAI with GPT-4-turbo
llm = ChatOpenAI(temperature=0.5, model="gpt-4-turbo")

# Invoke the agent with a prompt and get the result
prompt = input("Enter your prompt: ")

# Create the Pandas DataFrame agent using the DataFrame directly
agent_executor = create_pandas_dataframe_agent(llm, df, allow_dangerous_code=True, verbose=True)

result = agent_executor.invoke(prompt)

# Output processing
if isinstance(result, dict):
    print(result.get("output", "No output found"))
else:
    print(result)


In [None]:
# -------------------------------------------- Test 4 (With Multithreading) ---------------------------------------------------------------------- 
import os
import pandas as pd
import threading
from queue import Queue
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain_openai import ChatOpenAI
from pymongo import MongoClient, ASCENDING
from pymongo.server_api import ServerApi

from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()


# Retrieve the keys from environment variables
uri = os.getenv("MONGODB_URI")
openai_api_key = os.getenv("OPENAI_API_KEY")

print(f"URI , Api_KEY{uri,openai_api_key}")

# Set OpenAI API key
os.environ["OPENAI_API_KEY"] = openai_api_key
# Create a new client with connection pooling enabled
client = MongoClient(uri, server_api=ServerApi('1'), maxPoolSize=100)

# Ping the server to confirm the connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)


# Connect to the database and collection
db = client['test']  # Replace with your actual database name
collection = db['purchase_order']  # Replace with your actual collection name

# Thread-safe queue to store fetched data
data_queue = Queue()

# Function to fetch a batch of data from MongoDB
def fetch_batch(skip, limit):
    data_batch = list(collection.find({}).skip(skip).limit(limit))
    data_queue.put(data_batch)

# Function to process data in parallel using multithreading
def process_data_in_parallel(batch_size=1000, num_threads=4):
    threads = []
    skip = 0
    all_data = []

    # Start threads for fetching data
    for i in range(num_threads):
        thread = threading.Thread(target=fetch_batch, args=(skip, batch_size))
        threads.append(thread)
        thread.start()
        skip += batch_size
    
    # Wait for all threads to complete
    for thread in threads:
        thread.join()
    
    # Collect data from all threads
    while not data_queue.empty():
        batch = data_queue.get()
        if batch:
            df_batch = pd.DataFrame(batch)
            df_batch = df_batch.drop(columns=['_id'], errors='ignore')
            all_data.append(df_batch)
    
    # Combine all batches into a single DataFrame
    full_df = pd.concat(all_data, ignore_index=True)
    return full_df

# Process data in parallel and obtain the full DataFrame
df = process_data_in_parallel()

# Initialize the ChatOpenAI with GPT-4-turbo
llm = ChatOpenAI(temperature=0.3, model="chatgpt-4o-latest")

# Create the Pandas DataFrame agent using the DataFrame directly
agent_executor = create_pandas_dataframe_agent(llm, df, allow_dangerous_code=True, verbose=True)

# Invoke the agent with a prompt and get the result
prompt = input("Enter your prompt: ")
result = agent_executor.invoke(prompt)

# Output processing
if isinstance(result, dict):
    print(result.get("output", "No output found"))
else:
    print(result)


In [None]:
# --------------------------------------------------- Test 5 (Multi threading) -----------------------------------------------------
import os
import pandas as pd
import threading
from queue import Queue
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain_openai import ChatOpenAI
from pymongo import MongoClient, ASCENDING
from pymongo.server_api import ServerApi

from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()


# Retrieve the keys from environment variables
uri = os.getenv("MONGODB_URI")
openai_api_key = os.getenv("OPENAI_API_KEY")

print(f"URI , Api_KEY{uri,openai_api_key}")

# Set OpenAI API key
os.environ["OPENAI_API_KEY"] = openai_api_key
# Create a new client with connection pooling enabled
client = MongoClient(uri, server_api=ServerApi('1'), maxPoolSize=100)

# Ping the server to confirm the connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)


# Connect to the database and collection
db = client['test']  # Replace with your actual database name
collection = db['purchase_order']  # Replace with your actual collection name

# Thread-safe queue to store fetched data
data_queue = Queue()

# Function to fetch a batch of data from MongoDB
def fetch_batch(skip, limit):
    data_batch = list(collection.find({}).skip(skip).limit(limit))
    data_queue.put(data_batch)

# Function to process data in parallel using multithreading
def process_data_in_batches(collection, batch_size=1000, num_threads=4):
    threads = []
    skip = 0
    all_data = []

    # Start threads for fetching data
    for i in range(num_threads):
        thread = threading.Thread(target=fetch_batch, args=(skip, batch_size))
        threads.append(thread)
        thread.start()
        skip += batch_size
    
    # Wait for all threads to complete
    for thread in threads:
        thread.join()
    
    # Collect data from all threads
    while not data_queue.empty():
        batch = data_queue.get()
        if batch:
            df_batch = pd.DataFrame(batch)
            df_batch = df_batch.drop(columns=['_id'], errors='ignore')
            all_data.append(df_batch)
    
    # Combine all batches into a single DataFrame
    full_df = pd.concat(all_data, ignore_index=True)
    return full_df

# Process data in batches using multithreading and obtain the full DataFrame
df = process_data_in_batches(collection)

# Initialize the ChatOpenAI with GPT-4-turbo
llm = ChatOpenAI(temperature=0.5, model="chatgpt-4o-latest")  # Adjusted model name as required

# Create the Pandas DataFrame agent using the DataFrame directly
agent_executor = create_pandas_dataframe_agent(llm, df, allow_dangerous_code=True, verbose=True)

# Invoke the agent with a prompt and get the result
prompt = input("Enter your prompt: ")
result = agent_executor.invoke(prompt)

# Output processing
if isinstance(result, dict):
    print(result.get("output", "No output found"))
else:
    print(result)


In [None]:
# ----------------------------------------------- Test 6 GUI TEST ---------------------------------------------------------------------------

import os
import pandas as pd
import tkinter as tk
from tkinter import scrolledtext
from langchain_experimental.agents.agent_toolkits import create_csv_agent
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv

# Load the dataset
df = pd.read_csv("dataset/data.csv")

# Load environment variables from .env file
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")

print(f"URI , Api_KEY{uri,openai_api_key}")

# Set OpenAI API key
os.environ["OPENAI_API_KEY"] = openai_api_key
# Initialize the ChatOpenAI with GPT-4-turbo
llm = ChatOpenAI(temperature=0.5, model="gpt-4-turbo")

# Create the CSV agent
agent_executor = create_csv_agent(llm, "dataset/data.csv", allow_dangerous_code=True, verbose=True)

# Function to handle the prompt and display the result
def handle_prompt():
    prompt = prompt_entry.get()  # Get the user's prompt from the input field
    result = agent_executor.invoke(prompt)  # Invoke the agent with the user's prompt
    
    # Assuming the result is in a dictionary, extract the plain text
    if isinstance(result, dict):
        output = result.get("output", "No output found")  # Modify 'output' if necessary
    else:
        output = result
    
    # Display the output in the output text box
    output_text.delete(1.0, tk.END)
    output_text.insert(tk.END, output)

# Create the main application window
root = tk.Tk()
root.title("LangChain CSV Agent")

# Create and pack the input prompt label and entry
prompt_label = tk.Label(root, text="Enter your prompt:")
prompt_label.pack(pady=5)

prompt_entry = tk.Entry(root, width=80)
prompt_entry.pack(pady=5)

# Create and pack the submit button
submit_button = tk.Button(root, text="Submit", command=handle_prompt)
submit_button.pack(pady=5)

# Create and pack the output text box (with a scrollbar)
output_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=100, height=20)
output_text.pack(pady=10)

# Run the GUI application
root.mainloop()


In [None]:
# --------------------------------------- Test 7 (GUI + MongoDb ) ---------------------------------------------------------------------

import os
import pandas as pd
import tkinter as tk
from tkinter import scrolledtext
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
from langchain_openai import ChatOpenAI
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()


# Retrieve the keys from environment variables
uri = os.getenv("MONGODB_URI")
openai_api_key = os.getenv("OPENAI_API_KEY")

print(f"URI , Api_KEY{uri,openai_api_key}")

# Set OpenAI API key
os.environ["OPENAI_API_KEY"] = openai_api_key
# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'), maxPoolSize=1000)

# Ping the server to confirm the connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)


# Connect to the database and collection
db = client['test']  # Replace with your actual database name
collection = db['purchase_order']  # Replace with your actual collection name

# Query the data from MongoDB
data = list(collection.find({}))  # Retrieves all documents from the collection

# Convert the MongoDB data to a pandas DataFrame
df = pd.DataFrame(data)

# Drop the MongoDB-specific `_id` field if it exists
if '_id' in df.columns:
    df = df.drop(columns=['_id'])

# Initialize the ChatOpenAI with GPT-4-turbo
llm = ChatOpenAI(temperature=0.5, model="gpt-4-turbo")

# Function to handle the prompt and display the result
def handle_prompt():
    prompt = prompt_entry.get()  # Get the user's prompt from the input field
    agent_executor = create_pandas_dataframe_agent(llm, df, allow_dangerous_code=True, verbose=True)
    result = agent_executor.invoke(prompt)  # Invoke the agent with the user's prompt
    
    # Assuming the result is in a dictionary, extract the plain text
    if isinstance(result, dict):
        output = result.get("output", "No output found")  # Modify 'output' if necessary
    else:
        output = result
    
    # Display the output in the output text box
    output_text.delete(1.0, tk.END)
    output_text.insert(tk.END, output)

# Create the main application window
root = tk.Tk()
root.title("LangChain MongoDB Agent")

# Create and pack the input prompt label and entry
prompt_label = tk.Label(root, text="Enter your prompt:")
prompt_label.pack(pady=5)

prompt_entry = tk.Entry(root, width=80)
prompt_entry.pack(pady=5)

# Create and pack the submit button
submit_button = tk.Button(root, text="Submit", command=handle_prompt)
submit_button.pack(pady=5)

# Create and pack the output text box (with a scrollbar)
output_text = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=100, height=20)
output_text.pack(pady=10)

# Run the GUI application
root.mainloop()
