In [14]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
import os

In [26]:
# enter your OPEN API key here
# os.environ['OPENAI_API_KEY'] = ''

In [16]:
# Enter details for the 2 text files and 2 csv files
first_text_file_path = 'pokegames.txt'
first_text_file_name = 'Pokemon Games'
first_text_file_description = 'useful for when you need to answer questions about pokemon video games. Input should be a fully formed question.'

second_text_file_path = 'pokemanga.txt'
second_text_file_name = 'Pokemon Adventures Manga'
second_text_file_description = 'useful for when you need to answer questions about pokemon games adventures manga. Input should be a fully formed question.'

first_csv_file_path = 'pokemon.csv'
first_csv_file_name = 'Pokemon Media'
first_csv_file_description = 'useful for when you need to answer questions about pokemon stats. Input should be a fully formed question.'

second_csv_file_path = 'pokedex.csv'
second_csv_file_name = 'Pokemon Media'
second_csv_file_description = 'useful for when you need to answer questions about pokemon types and its related stats. Input should be a fully formed question.'

In [17]:
# Splitting the text data into chunks and creating OpenAIEmbeddings with it
from langchain.document_loaders import TextLoader
llm = OpenAI(temperature=0)
loader = TextLoader(first_text_file_path)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()

# Creating a vector store to use as the index
docsearch = Chroma.from_documents(texts, embeddings, collection_name='example')

Created a chunk of size 1034, which is longer than the specified 1000
Created a chunk of size 1352, which is longer than the specified 1000
Created a chunk of size 1121, which is longer than the specified 1000
Created a chunk of size 1881, which is longer than the specified 1000
Created a chunk of size 2000, which is longer than the specified 1000
Created a chunk of size 1153, which is longer than the specified 1000
Created a chunk of size 1964, which is longer than the specified 1000
Using embedded DuckDB without persistence: data will be transient


In [18]:
# Creating a Retrieval QA chain which will answer questions pertaining to the first file
first_text_file_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())

In [19]:
# Repeating the process for the 2nd text file
loader = TextLoader(second_text_file_path)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()
docsearch = Chroma.from_documents(texts, embeddings, collection_name="example")
second_text_file_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever())

Created a chunk of size 1023, which is longer than the specified 1000
Created a chunk of size 1046, which is longer than the specified 1000
Created a chunk of size 1018, which is longer than the specified 1000
Created a chunk of size 1181, which is longer than the specified 1000
Using embedded DuckDB without persistence: data will be transient


In [20]:
# Creating 2 CSV agents which will answer questions pertaining to the input CSVs
from langchain.agents import create_csv_agent
first_csv_file_agent = create_csv_agent(OpenAI(temperature=0), first_csv_file_path, verbose=True)
second_csv_file_agent = create_csv_agent(OpenAI(temperature=0), second_csv_file_path, verbose=True)

In [21]:
# Import things that are needed generically
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from langchain.tools import BaseTool
from langchain.llms import OpenAI
from langchain import LLMMathChain, SerpAPIWrapper

In [22]:
# Setting the tools that will be used as input for initializing the main agent for answering questions
tools = [
    Tool(
        name = first_text_file_name,
        func=first_text_file_chain.run,
        description=first_text_file_description
    ),
    Tool(
        name = second_text_file_name,
        func=second_text_file_chain.run,
        description=second_text_file_description
    ),
    Tool(
        name = first_csv_file_name,
        func=first_csv_file_agent.run,
        description=first_csv_file_description
    ),
    Tool(
        name = second_csv_file_name,
        func=second_csv_file_agent.run,
        description=second_csv_file_description
    )
]

In [23]:
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

In [24]:
# Test out any prompt with this agent
agent.run("What are the pokemon games featuring Pikachu and what are his stats for attack?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find out what games Pikachu is featured in and what his stats are.
Action: Pokemon Games
Action Input: What pokemon games feature Pikachu?[0m
Observation: [36;1m[1;3m Super Smash Bros. Brawl, PokéPark Wii: Pikachu's Adventure, Super Smash Bros. Melee, and Super Smash Bros. for Nintendo 3DS and Wii U and Super Smash Bros. Ultimate.[0m
Thought:[32;1m[1;3m I need to find out Pikachu's stats for attack.
Action: Pokemon Media
Action Input: What are Pikachu's stats for attack?[0m

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find Pikachu's stats
Action: python_repl_ast
Action Input: df[df['name'] == 'Pikachu']['attack'][0m
Observation: [36;1m[1;3m32    55.0
Name: attack, dtype: float64[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: 55.0[0m

[1m> Finished chain.[0m

Observation: [31;1m[1;3m55.0[0m
Thought:[32;1m[1;3m I now know the final answer.
Final

"Pikachu is featured in Super Smash Bros. Brawl, PokéPark Wii: Pikachu's Adventure, Super Smash Bros. Melee, and Super Smash Bros. for Nintendo 3DS and Wii U and Super Smash Bros. Ultimate and his stats for attack are 55.0."

In [25]:
agent.run("What is pokemon with the highest stats for attack? Does that pokemon have any games or manga associated with it?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find out which pokemon has the highest attack stats and if it has any games or manga associated with it.
Action: Pokemon Media
Action Input: What is the pokemon with the highest attack stats?[0m

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find the pokemon with the highest attack stats
Action: python_repl_ast
Action Input: df[df['attack'] == df['attack'].max()]['name'].values[0][0m
Observation: [36;1m[1;3mMega Mewtwo X[0m
Thought:[32;1m[1;3m I now know the final answer
Final Answer: Mega Mewtwo X[0m

[1m> Finished chain.[0m

Observation: [31;1m[1;3mMega Mewtwo X[0m
Thought:[32;1m[1;3m I need to find out if Mega Mewtwo X has any games or manga associated with it.
Action: Pokemon Games
Action Input: Does Mega Mewtwo X have any games associated with it?[0m
Observation: [36;1m[1;3m No, Mega Mewtwo X does not have any games associated with it.[0m
Thought:[32;1m[1;3m

'Mega Mewtwo X has the highest attack stats, but does not have any games or manga associated with it.'