In [1]:
sports_news_text = {"title": "Sports Section", "text": "The New York Giants won their game against the Dallas Cowboys with a score of 27-24. The game was held at MetLife Stadium and was attended by over 70,000 fans. Key players included quarterback Daniel Jones, who threw for 300 yards and 2 touchdowns, and wide receiver Sterling Shepard, who had 8 receptions for 120 yards."}

In [2]:
finance_news_text = {"title": "Finance Section", "text": "The stock market saw significant gains today, with the S&P 500 rising by 1.5% and the Dow Jones Industrial Average increasing by 1.2%. Tech stocks led the rally, with companies like Apple and Microsoft reporting strong earnings. Investors are optimistic about the economic recovery and are looking forward to upcoming Federal Reserve meetings."}

In [3]:
import google.generativeai as genai 
from dotenv import load_dotenv
import os
# Load environment variables from .env file
load_dotenv()

# Access environment variables
GOOGLE_API_KEY = os.getenv("API_KEY")

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
genai.configure(api_key=GOOGLE_API_KEY)

In [5]:
for model in genai.list_models():
    print(model)

Model(name='models/embedding-gecko-001',
      base_model_id='',
      version='001',
      display_name='Embedding Gecko',
      description='Obtain a distributed representation of a text.',
      input_token_limit=1024,
      output_token_limit=1,
      supported_generation_methods=['embedText', 'countTextTokens'],
      temperature=None,
      max_temperature=None,
      top_p=None,
      top_k=None)
Model(name='models/gemini-1.5-pro-latest',
      base_model_id='',
      version='001',
      display_name='Gemini 1.5 Pro Latest',
      description=('Alias that points to the most recent production (non-experimental) release '
                   'of Gemini 1.5 Pro, our mid-size multimodal model that supports up to 2 '
                   'million tokens.'),
      input_token_limit=2000000,
      output_token_limit=8192,
      supported_generation_methods=['generateContent', 'countTokens'],
      temperature=1.0,
      max_temperature=2.0,
      top_p=0.95,
      top_k=40)
Model(name='m

In [6]:
sports_embedding_vector = genai.embed_content(model="models/embedding-001", content=sports_news_text['text'], task_type="retrieval_document")

In [7]:
finance_embedding_vector = genai.embed_content(model="models/embedding-001", content=finance_news_text['text'], task_type="retrieval_document")

In [8]:
type(sports_embedding_vector)

dict

In [9]:
sports_embedding_vector['embedding'][:5]  # Display the first 5 dimensions of the embedding vector

[-0.0012966293, -0.02887677, -0.03934953, 0.008775748, 0.05155461]

In [10]:
len(sports_embedding_vector['embedding'])

768

In [11]:
def embed_text(text):
    response = genai.embed_content(
        model="models/embedding-001",
        content=text,
        task_type="retrieval_document"
    )
    return response['embedding']

In [12]:
import pandas as pd

In [13]:
documents = [finance_news_text, sports_news_text]

In [14]:
documents

[{'title': 'Finance Section',
  'text': 'The stock market saw significant gains today, with the S&P 500 rising by 1.5% and the Dow Jones Industrial Average increasing by 1.2%. Tech stocks led the rally, with companies like Apple and Microsoft reporting strong earnings. Investors are optimistic about the economic recovery and are looking forward to upcoming Federal Reserve meetings.'},
 {'title': 'Sports Section',
  'text': 'The New York Giants won their game against the Dallas Cowboys with a score of 27-24. The game was held at MetLife Stadium and was attended by over 70,000 fans. Key players included quarterback Daniel Jones, who threw for 300 yards and 2 touchdowns, and wide receiver Sterling Shepard, who had 8 receptions for 120 yards.'}]

In [15]:
df = pd.DataFrame(documents)

In [16]:
df

Unnamed: 0,title,text
0,Finance Section,"The stock market saw significant gains today, ..."
1,Sports Section,The New York Giants won their game against the...


In [17]:
df.columns = ['Title', 'Text']

In [18]:
df

Unnamed: 0,Title,Text
0,Finance Section,"The stock market saw significant gains today, ..."
1,Sports Section,The New York Giants won their game against the...


In [19]:
df['Embedding'] = df['Text'].apply(embed_text)

In [20]:
df

Unnamed: 0,Title,Text,Embedding
0,Finance Section,"The stock market saw significant gains today, ...","[0.006075789, 0.028479917, -0.03414916, 0.0293..."
1,Sports Section,The New York Giants won their game against the...,"[-0.0012966293, -0.02887677, -0.03934953, 0.00..."


In [22]:
import numpy as np

In [25]:
def query_similarity_score(query, vector):
    query_embedding = embed_text(query)
    return np.dot(query_embedding, vector) 

In [26]:
query = "How do investors feel about upcoming Federal Reserve meetings?"

In [27]:
df['Similarity_Score'] = df['Embedding'].apply(lambda x: query_similarity_score(query, x))

In [28]:
df 

Unnamed: 0,Title,Text,Embedding,Similarity_Score
0,Finance Section,"The stock market saw significant gains today, ...","[0.006075789, 0.028479917, -0.03414916, 0.0293...",0.825059
1,Sports Section,The New York Giants won their game against the...,"[-0.0012966293, -0.02887677, -0.03934953, 0.00...",0.639542


In [34]:
df.sort_values(by='Similarity_Score', ascending=False)[['Title', 'Text']].iloc[0]

Title                                      Finance Section
Text     The stock market saw significant gains today, ...
Name: 0, dtype: object

In [35]:
def most_similar_document(query):
    df['Similarity_Score'] = df['Embedding'].apply(lambda x: query_similarity_score(query, x))
    title = df.sort_values(by='Similarity_Score', ascending=False)[['Title', 'Text']].iloc[0]['Title']
    text = df.sort_values(by='Similarity_Score', ascending=False)[['Title', 'Text']].iloc[0]['Text']
    return title, text

In [36]:
most_similar_document("What was the outcome of the football game between the Giants and Cowboys?")

('Sports Section',
 'The New York Giants won their game against the Dallas Cowboys with a score of 27-24. The game was held at MetLife Stadium and was attended by over 70,000 fans. Key players included quarterback Daniel Jones, who threw for 300 yards and 2 touchdowns, and wide receiver Sterling Shepard, who had 8 receptions for 120 yards.')

In [37]:
text, title = most_similar_document("What was the outcome of the football game between the Giants and Cowboys?")
print(f"Title: {title}\nText: {text}")

Title: The New York Giants won their game against the Dallas Cowboys with a score of 27-24. The game was held at MetLife Stadium and was attended by over 70,000 fans. Key players included quarterback Daniel Jones, who threw for 300 yards and 2 touchdowns, and wide receiver Sterling Shepard, who had 8 receptions for 120 yards.
Text: Sports Section


In [53]:
def rag(query):
    title, text = most_similar_document(query)
    response = genai.GenerativeModel('gemini-2.5-pro')
    prompt=f"Based on the following document title and text, answer the question.\n\nDocument Title: {title}\nDocument Text: {text}\n\nQuestion: {query}\n\nAnswer:",
    response = response.generate_content(prompt)
    return response.text

In [54]:
print(rag("What was the outcome of the football game between the Giants and Cowboys?"))

The New York Giants won the game against the Dallas Cowboys with a score of 27-24.
