In [2]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.agents import create_csv_agent
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from langchain.tools import BaseTool
from langchain.llms import OpenAI
from langchain import LLMMathChain, SerpAPIWrapper
from pathlib import Path
import openai
import pandas as pd
import os

In [15]:
OPENAI_API_KEY = ''
openai.api_key = OPENAI_API_KEY
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

In [14]:
llm = ChatOpenAI(temperature=0, model='gpt-4')
# Helper methods
def askChatGpt(prompt):
    messages=[{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
    model="gpt-4",
    temperature=0,
    messages = messages)
    return response.choices[0].message.content

def createTextChain(file_path):
    # Splitting the text data into chunks and creating OpenAIEmbeddings with it
    loader = TextLoader(file_path)
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    embeddings = OpenAIEmbeddings()

    # Creating a vector store to use as the index
    docsearch = Chroma.from_documents(texts, embeddings, collection_name='example')
    text_file_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())
    return text_file_chain

def createCsvAgent(file_path):
    return create_csv_agent(OpenAI(temperature=0), file_path, verbose=True)

def generateDescription(file_path, file_type, file_name):
    with open(file_path, 'r') as file:
        data = file.read()
        prompt='Here are the contents of a ' + file_type + ' file, named "' + file_name +  '": \n' \
             + data + "\nGive me a description of this file in the following format: 'Useful for when you need to answer questions about (enter description here along with information about column names with some example values). Input should be a fully formed question.'"
        response = askChatGpt(prompt)
        return response

def convertToCsv(file_path):
    return_file_name = file_path.split('.')[0] + '.csv'
    text_file = open(file_path, 'r')
    data = text_file.read()
    prompt = 'Convert this file to a csv: \n' + data
    response = askChatGpt(prompt)
    text_file.close()

    text_file = open(return_file_name, "w")
    text_file.write(response)
    text_file.close()

    return return_file_name


def isFileStructured(file_obj):
    text_file = open(file_obj['path'], 'r')
    data = text_file.read().replace('\n', '')
    prompt = 'Can this file data be converted to a well structured csv format? Answer either Yes or No. \n' + data
    response = askChatGpt(prompt)
    if 'yes' in response.lower():
        return True
    return False


def createTool(file_obj):
    file_path = file_obj['path']
    file_type = 'txt' if 'txt' in file_path else 'csv'

    if file_type == 'txt' and file_obj['structured']:
        csv_file_path = file_path.replace('txt', 'csv')
        csv_file = Path(csv_file_path)
        # if corresponding csv file doesnt already exist for this structured txt file, create one
        if not csv_file.is_file():
            file_path = convertToCsv(file_path)
        else:
            file_path = csv_file_path
        file_type = 'csv'
    
    if file_type == 'csv':
        print('creating csv agent for: ', file_path)
        agent = createCsvAgent(file_path)
    else:
        print('creating text chain for: ', file_path)
        agent = createTextChain(file_path)

    file_description = generateDescription(file_path, file_type, file_obj['name'])
    print(file_description)
    print()
    tool = Tool(
        name = file_obj['name'],
        func = agent.run,
        description = file_description
    )
    return tool

In [9]:
# Enter details for the 2 text files and 2 csv files
# structured is set true when the contents of the file are structured and can be converted to a tabular form
file_objects = [
    {
        'path': 'data/Syllabus.csv',
        'name': 'Syllabus'
    },
    {
        'path': 'data/Weekly Schedule.csv',
        'name': 'Weekly Schedule'
    },
    {
        'path': 'data/Grades.csv',
        'name': 'Grades'
    },
    {
        'path': 'data/Professors.csv',
        'name': 'Professors'
    }
]

In [10]:
# Checking if the contents of an input text file cns be structured to a csv, if yes, then flagging the corresponding
# file objects
for file_obj in file_objects:
    file_type = 'txt' if 'txt' in file_obj['path'] else 'csv'
    if (file_type == 'txt' and isFileStructured(file_obj)):
        file_obj['structured'] = True
file_objects

[{'path': 'data/Syllabus.csv', 'name': 'Syllabus'},
 {'path': 'data/Weekly Schedule.csv', 'name': 'Weekly Schedule'},
 {'path': 'data/Grades.csv', 'name': 'Grades'},
 {'path': 'data/Professors.csv', 'name': 'Professors'}]

In [6]:
tools = []

for file_obj in file_objects:
    tool = createTool(file_obj)
    tools.append(tool)

creating csv agent for:  data/Syllabus.csv
Useful for when you need to answer questions about the syllabus of 10th grade subjects, including their descriptions and key topics covered. The file contains two columns: "Subject" and "Description", with example values like "Chemistry" and "In 10th grade Chemistry, students will explore the fundamental principles of the physical and chemical properties of matter...". Input should be a fully formed question.

creating csv agent for:  data/Weekly Schedule.csv
Useful for when you need to answer questions about a student's weekly class schedule, including the day, period, subject, and professor for each class. For example, "What subject is taught on Monday during period 1 and who is the professor?" The answer would be Mathematics, taught by David Kim. The column names are Day, Period, Subject, and Professor, with example values like Monday, 1, Mathematics, and David Kim. Input should be a fully formed question.

creating csv agent for:  data/Gra

In [7]:
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

In [112]:
agent.run("What is the syllabus for the subject taught in 3rd period on Monday?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find out the subject taught in 3rd period on Monday and then look up its syllabus.
Action: Weekly Schedule
Action Input: What subject is taught on Monday during period 3?[0m

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find the subject for Monday during period 3
Action: python_repl_ast
Action Input: df[(df['Day'] == 'Monday') & (df['Period'] == 3)]['Subject'][0m
Observation: [36;1m[1;3m2    Chemistry
Name: Subject, dtype: object[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: Chemistry[0m

[1m> Finished chain.[0m

Observation: [33;1m[1;3mChemistry[0m
Thought:[32;1m[1;3mNow that I know the subject is Chemistry, I can look up its syllabus.
Action: Syllabus
Action Input: What is the syllabus for 10th grade Chemistry?[0m

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find the description for 10th grade Chemistry
Action: py

'In 10th grade Chemistry, students will explore the fundamental principles of the physical and chemical properties of matter. They will learn about the periodic table, chemical reactions, and atomic structure. Students will be introduced to laboratory techniques and safety procedures and perform experiments to reinforce theoretical concepts. The course will also focus on real-world applications of chemistry in fields such as medicine, environmental science, and engineering. By the end of the course, students will have a strong foundation in chemistry that will enable them to pursue further study in the field.'