In [1]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.agents import create_csv_agent
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from langchain.tools import BaseTool
from langchain.llms import OpenAI
from langchain import LLMMathChain, SerpAPIWrapper
from pathlib import Path
import openai
import pandas as pd
import os

In [6]:
OPENAI_API_KEY = ''
openai.api_key = OPENAI_API_KEY
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

In [7]:
# Helper methods
def askChatGpt(prompt):
    messages=[{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
    model="gpt-4",
    temperature=0,
    messages = messages)
    return response.choices[0].message.content

def createTextChain(file_path):
    # Splitting the text data into chunks and creating OpenAIEmbeddings with it
    llm = OpenAI(temperature=0)
    loader = TextLoader(file_path)
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    embeddings = OpenAIEmbeddings()

    # Creating a vector store to use as the index
    docsearch = Chroma.from_documents(texts, embeddings, collection_name='example')
    text_file_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())
    return text_file_chain

def createCsvAgent(file_path):
    llm = OpenAI(temperature=0)
    return create_csv_agent(OpenAI(temperature=0), file_path, verbose=True)

def generateDescription(file_path, file_type, file_name):
    with open(file_path, 'r') as file:
        data = file.read()
        prompt='Here are the contents of a ' + file_type + ' file, named "' + file_name +  '": \n' \
             + data + "\nGive me a description of this file in the following format: 'Useful for when you need to answer questions about (enter description here). Input should be a fully formed question.'"
        response = askChatGpt(prompt)
        return response

def convertToCsv(file_path):
    return_file_name = file_path.split('.')[0] + '.csv'
    text_file = open(file_path, 'r')
    data = text_file.read()
    prompt = 'Convert this file to a csv: \n' + data
    response = askChatGpt(prompt)
    text_file.close()

    text_file = open(return_file_name, "w")
    text_file.write(response)
    text_file.close()

#     dataframe1 = pd.read_csv("output.csv")
#     dataframe1.to_csv(return_file_name, 
#                   index = None)
    return return_file_name

def createTool(file_obj):
    file_path = file_obj['path']
    file_type = 'txt' if 'txt' in file_path else 'csv'

    if file_type == 'txt' and file_obj['structured']:
        csv_file_path = file_path.replace('txt', 'csv')
        csv_file = Path(csv_file_path)
        # if corresponding csv file doesnt already exist for this structured txt file, create one
        if not csv_file.is_file():
            file_path = convertToCsv(file_path)
        else:
            file_path = csv_file_path
        file_type = 'csv'
    
    if file_type == 'csv':
        print('creating csv agent for: ', file_path)
        agent = createCsvAgent(file_path)
    else:
        print('creating text chain for: ', file_path)
        agent = createTextChain(file_path)

    file_description = generateDescription(file_path, file_type, file_obj['name'])
    print(file_description)
    tool = Tool(
        name = file_obj['name'],
        func = agent.run,
        description = file_description
    )
    return tool

In [4]:
# Enter details for the 2 text files and 2 csv files
# structured is set true when the contents of the file are structured and can be converted to a tabular form
file_objects = [
    {
        'path': 'data/Syllabus.txt',
        'name': 'Syllabus',
        'structured': True
    },
    {
        'path': 'data/Weekly Schedule.txt',
        'name': 'Weekly Schedule',
        'structured': True
    },
    {
        'path': 'data/Grades.csv',
        'name': 'Grades',
        'structured': True
    },
    {
        'path': 'data/Professors.csv',
        'name': 'Professors',
        'structured': True
    }
]

In [5]:
tools = []

for file_obj in file_objects:
    tool = createTool(file_obj)
    tools.append(tool)

creating csv agent for:  data/Syllabus.csv


AuthenticationError: Incorrect API key provided: sk-2UsKT***************************************4Di7. You can find your API key at https://platform.openai.com/account/api-keys.

In [9]:
llm = OpenAI(temperature=0)
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

In [125]:
agent.run("What is the syllabus for the subject taught in 3rd period on Monday?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find out what subject is taught in 3rd period on Monday.
Action: Weekly Schedule
Action Input: What is the subject taught in 3rd period on Monday?[0m

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find the row in the dataframe that corresponds to Monday and 3rd period
Action: python_repl_ast
Action Input: df[(df['Day'] == 'Monday') & (df['Period'] == 3)][0m
Observation: [36;1m[1;3m      Day  Period      Class        Teacher
2  Monday       3  Chemistry  Jasmine Gomez[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: Chemistry[0m

[1m> Finished chain.[0m

Observation: [33;1m[1;3mChemistry[0m
Thought:[32;1m[1;3m I now need to find out what is in the syllabus for Chemistry
Action: Syllabus
Action Input: What is in the syllabus for Chemistry?[0m

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to look at the dataframe to find the 

'In 10th grade Chemistry, students will explore the fundamental principles of the physical and chemical properties of matter. They will learn about the periodic table, chemical reactions, and atomic structure. Students will be introduced to laboratory techniques and safety procedures and perform experiments to reinforce theoretical concepts. The course will also focus on real-world applications of chemistry in fields such as medicine, environmental science, and engineering. By the end of the course, students will have a strong foundation in chemistry that will enable them to pursue further study in the field.'

In [121]:
agent.run("What is the average age of professors who teach Mathematics in the weekly schedule?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find out the age of the professors who teach Mathematics.
Action: Professors
Action Input: What is the average age of professors who teach Mathematics?[0m

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to filter the dataframe to only include professors who teach Mathematics.
Action: python_repl_ast
Action Input: df[df['education'] == 'M.S.']['age'].mean()[0m
Observation: [36;1m[1;3m34.333333333333336[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: The average age of professors who teach Mathematics is 34.33.[0m

[1m> Finished chain.[0m

Observation: [31;1m[1;3mThe average age of professors who teach Mathematics is 34.33.[0m
Thought:[32;1m[1;3m I now know the final answer.
Final Answer: The average age of professors who teach Mathematics in the weekly schedule is 34.33.[0m

[1m> Finished chain.[0m


'The average age of professors who teach Mathematics in the weekly schedule is 34.33.'

In [122]:
agent.run("What is the average grade for the subject taught in 1st period on Tuesday?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find out what subject is taught in 1st period on Tuesday and then find the average grade for that subject.
Action: Weekly Schedule
Action Input: What subject is taught in 1st period on Tuesday?[0m

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find the row in the dataframe that matches the day and period
Action: python_repl_ast
Action Input: df[(df['Day'] == 'Tuesday') & (df['Period'] == 1)][0m
Observation: [36;1m[1;3m       Day  Period    Class       Teacher
8  Tuesday       1  Physics  Carlos Lopez[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: Physics[0m

[1m> Finished chain.[0m

Observation: [33;1m[1;3mPhysics[0m
Thought:[32;1m[1;3m I now need to find the average grade for Physics
Action: Grades
Action Input: What is the average grade for Physics?[0m

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to calculate the aver

'The average grade for the subject taught in 1st period on Tuesday is 85.1.'

In [123]:
agent.run("What days and periods is the subject with the lowest average grade taught?")




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find out which subject has the lowest average grade and when it is taught.
Action: Grades
Action Input: What is the subject with the lowest average grade?[0m

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to calculate the average grade for each subject
Action: python_repl_ast
Action Input: df.mean()[0m
Observation: [36;1m[1;3mCould not convert ['Student 1Student 2Student 3Student 4Student 5Student 6Student 7Student 8Student 9Student 10'] to numeric[0m
Thought:[32;1m[1;3m I need to remove the student column
Action: python_repl_ast
Action Input: df.drop('Student', axis=1).mean()[0m
Observation: [36;1m[1;3mChemistry                86.7
Mathematics              88.0
Physical Education       81.5
Foreign Language         88.0
Art                      91.9
Economics                85.6
Physics                  85.1
English Language Arts    78.3
dtype: float64[0m
Thought:[32;1m[1;

'English Language Arts is taught on Monday (Period 4), Tuesday (Period 3), Wednesday (Period 1), Thursday (Period 6), and Friday (Period 7).'

In [124]:
agent.run("What is the subject with the lowest average grade? Find its syllabus.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find the subject with the lowest average grade and then find its syllabus.
Action: Grades
Action Input: What is the subject with the lowest average grade?[0m

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to calculate the average grade for each subject
Action: python_repl_ast
Action Input: df.mean()[0m
Observation: [36;1m[1;3mCould not convert ['Student 1Student 2Student 3Student 4Student 5Student 6Student 7Student 8Student 9Student 10'] to numeric[0m
Thought:[32;1m[1;3m I need to remove the student column
Action: python_repl_ast
Action Input: df.drop('Student', axis=1).mean()[0m
Observation: [36;1m[1;3mChemistry                86.7
Mathematics              88.0
Physical Education       81.5
Foreign Language         88.0
Art                      91.9
Economics                85.6
Physics                  85.1
English Language Arts    78.3
dtype: float64[0m
Thought:[32;1m[1;

'In 10th grade English Language Arts, students will build upon the reading, writing, and communication skills learned in previous years. They will explore a variety of literary genres, including fiction, non-fiction, poetry, and drama. Students will practice close reading and analysis of texts, as well as develop their own writing skills. The course will also focus on public speaking and presentation skills, and will provide students with the tools to become effective communicators.'