In [1]:
# Generation: Generating a Response

In [2]:
%load_ext dotenv
%dotenv

In [3]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings 
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings 
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.runnables import RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI

In [4]:
import os
api_key = os.getenv("GEMINI_API_KEY")

embedding = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    google_api_key=api_key
)


vectorstore = Chroma(persist_directory = "./intro-to-ds-lectures", 
                     embedding_function = embedding
                  )

In [5]:
len(vectorstore.get()['documents'])

41

In [6]:
retriever = vectorstore.as_retriever(search_type = 'mmr', 
                                     search_kwargs = {'k':3, 
                                                      'lambda_mult':0.7})

In [7]:
TEMPLATE = '''
Answer the following question:
{question}

To answer the question, use only the following context:
{context}

At the end of the response, specify the name of the lecture this context is taken from in the format:
Resources: *Lecture Title*
where *Lecture Title* should be substituted with the title of all resource lectures.
'''

prompt_template = PromptTemplate.from_template(TEMPLATE)

In [8]:
chat = ChatGoogleGenerativeAI(
    google_api_key = os.getenv("GEMINI_API_KEY"),  
    model = "gemini-2.0-flash",
    temperature = 0,
    max_output_tokens = 100,
    model_kwargs = {"seed": 365}
)

In [9]:
question = "What software do data scientists use?"

In [10]:
chain = ({'context': retriever, 
         'question': RunnablePassthrough()} 
         | prompt_template 
         | chat 
         | StrOutputParser())

In [11]:
chain.invoke(question)

'Data scientists use programming and software tools. Hadoop is a software framework used to address the complexity of big data. Power BI, SaS, Qlik, and Tableau are examples of software designed for business intelligence visualizations.\n\nResources: *Programming Languages & Software Employed in Data Science - All the Tools You Need, Programming Languages & Software Employed in Data Science - All the Tools You Need*'

In [12]:
print('Data scientists use a variety of software tools. R and Python are the two most popular tools as they can manipulate data and are integrated within multiple data and data science software platforms. They are adaptable and can solve a wide range of business and data-related problems. Hadoop is a software framework designed to handle the complexity and computational intensity of big data by distributing computational tasks on multiple computers. Additionally, Power BI, SaS, Qlik, and Tableau are top-notch examples of software designed for business intelligence visualizations.\n\nResources: Programming Languages & Software Employed in Data Science - All the Tools You Need')

Data scientists use a variety of software tools. R and Python are the two most popular tools as they can manipulate data and are integrated within multiple data and data science software platforms. They are adaptable and can solve a wide range of business and data-related problems. Hadoop is a software framework designed to handle the complexity and computational intensity of big data by distributing computational tasks on multiple computers. Additionally, Power BI, SaS, Qlik, and Tableau are top-notch examples of software designed for business intelligence visualizations.

Resources: Programming Languages & Software Employed in Data Science - All the Tools You Need
