In [1]:
%load_ext dotenv
%dotenv

In [61]:
from langchain_core.prompts import PromptTemplate
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [13]:
embedding = OpenAIEmbeddings(model='text-embedding-ada-002')

In [15]:
vector_store = Chroma(persist_directory='./vector_db', embedding_function=embedding)

In [17]:
TEMPLATE = '''
Answer the following question:
{question}

To answer the question, use only the following context:
{context}

At the end of the response, specify the name of the lecture this context is taken from in the format:
Resources: *Lecture Title*
where *Lecture Title* should be substituted with the title of all resource lectures.
'''

prompt_template = PromptTemplate.from_template(TEMPLATE)

In [19]:
retriever = vector_store.as_retriever(search_type='mmr',search_kwargs={'k':3,'lambda_mult':0.7})

In [23]:
llm = ChatOpenAI(model='gpt-4o-mini',
                temperature=0,
                seed=10,
                max_completion_tokens=250)

In [63]:
str_parser = StrOutputParser()

In [25]:
question = 'what software do data scientists use?'

In [65]:
chain = {'context':retriever,'question':RunnablePassthrough()} | prompt_template | llm | str_parser

In [67]:
chain.invoke(question)

'Data scientists commonly use R and Python as their primary programming languages due to their adaptability and ability to manipulate data effectively. These languages are integrated within various data science software platforms and are suitable for a wide range of mathematical and statistical computations. Additionally, software frameworks like Hadoop are utilized to manage big data by distributing computational tasks across multiple computers. For business intelligence visualizations, tools such as Power BI, SaS, Qlik, and Tableau are also popular among data scientists.\n\nResources: *Data Analytics Course*'