In [16]:
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.tools import WikipediaQueryRun
from langchain_chroma.vectorstores import Chroma

from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings

from langchain.tools.retriever import create_retriever_tool
from langchain.tools import tool

from langchain import hub
from platform import python_version

from langchain.agents import create_tool_calling_agent, AgentExecutor

In [17]:
wikipedia_tool = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())

In [45]:
vectorstore = Chroma(persist_directory='.chromadb_intro_to_data_science/', embedding_function=OpenAIEmbeddings())
retriever = vectorstore.as_retriever(search_type='mmr', search_kwargs={'k':3, 'lambda_mult':0.7})
retriever_tool = create_retriever_tool(
    retriever=retriever,
    name='search_data_science',
    description="""For any questions regarding the 
    Introduction to Data and Data Science course you must use this tool.
    """
)

In [36]:
@tool
def get_python_version() -> str:
    ''' Usefull for questions regarding the version od Python currently used. '''
    return python_version()



In [47]:
tools = [wikipedia_tool,  retriever_tool, get_python_version]

In [30]:
chat = ChatOpenAI(model='gpt-4', temperature=0, seed=365)

In [31]:
chat_prompt_template = hub.pull('hwchase17/openai-tools-agent')




In [32]:
chat_prompt_template.pretty_print()


You are a helpful assistant


[33;1m[1;3m{chat_history}[0m


[33;1m[1;3m{input}[0m


[33;1m[1;3m{agent_scratchpad}[0m


In [48]:
agent = create_tool_calling_agent(
    tools=tools, 
    llm=chat, 
    prompt=chat_prompt_template
)

In [49]:
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, return_intermediate_steps=True) 

In [59]:
ret = agent_executor.invoke({'input':'I am taking Introduction to Data and Data Science Course, and wondering what programming languages do data sciencists use? Who created these languages?'})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `search_data_science` with `{'query': 'programming languages used in data science'}`


[0m[33;1m[1;3mThus, we need a lot of computational power, and we can expect people to use the languages similar to those in the big data column. Apart from R, Python, and MATLAB, other, faster languages are used like Java, JavaScript, C, C++, and Scala. Cool. What we said may be wonderful, but that’s not all! By using one or more programming languages, people create application software or, as they are sometimes called, software solutions, that are adjusted for specific business needs.

As you can see from the infographic, R, and Python are the two most popular tools across all columns. Their biggest advantage is that they can manipulate data and are integrated within multiple data and data science software platforms. They are not just suitable for mathematical and statistical computations. In other words, R, and Python are ad

In [57]:
print(ret)

{'input': 'What are software tools commonly used by data sciencists?', 'output': 'Data scientists commonly use a variety of software tools for their work. Here are some of the most popular ones:\n\n1. **R and Python**: These are the two most popular tools used by data scientists. They are both programming languages that are particularly strong in statistics and data analysis. They are adaptable and can solve a wide variety of business and data-related problems.\n\n2. **Hadoop**: This is a software framework designed to handle big data. It distributes computational tasks across multiple computers, making it an effective tool for dealing with large datasets.\n\n3. **Power BI, SAS, Qlik, and Tableau**: These are software tools designed for business intelligence visualizations. They allow data scientists to create interactive dashboards and reports that can help businesses make data-driven decisions.\n\nRemember, the best tool to use depends on the specific needs and goals of the project.'