##### Copyright 2024 Google LLC.

In [1]:
!pip install -U -q google-generativeai


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import textwrap
import numpy as np
import pandas as pd

import google.generativeai as genai



from IPython.display import Markdown

In [2]:
# Or use `os.getenv('API_KEY')` to fetch an environment variable.

import google.generativeai as genai

# Set your API key manually
GOOGLE_API_KEY = "AIzaSyDEWge2PzWvKDCLHkPFhD4xdsvB7GPSqss"
genai.configure(api_key=GOOGLE_API_KEY)



Key Point: Next, you will choose a model. Any embedding model will work for this tutorial, but for real applications it's important to choose a specific model and stick with it. The outputs of different models are not compatible with each other.

**Note**: At this time, the Gemini API is [only available in certain regions](https://ai.google.dev/gemini-api/docs/available-regions).

In [3]:
for m in genai.list_models():
  if 'embedContent' in m.supported_generation_methods:
    print(m.name)

models/embedding-001
models/text-embedding-004


In [6]:
model = 'models/embedding-001'
embedding = genai.embed_content(model=model,
                                content="jvhiyhighjihiyhgkiuh",
                                task_type="retrieval_document",
                                title="title")

print(embedding)

{'embedding': [0.022285031, 0.006432155, -0.051558062, -0.035611223, 0.05828589, 0.025274409, 0.03630779, -0.0007751998, -0.014323384, 0.051769305, -0.01888757, -0.0109815085, -0.042532813, 0.014663985, 0.00230979, -0.013360447, 0.02633334, -0.00234763, 0.028458158, -0.0071547227, -0.016558176, 0.010666258, -0.022199009, -0.040127497, 0.017608069, -0.024883412, 0.013067426, -0.03489601, -0.032509234, -0.013983085, -0.012554802, 0.0021771942, -0.061531052, 0.00518471, 0.022047464, -0.06869267, 0.0021179556, -0.007148818, 0.021522304, 0.018251022, 0.023489838, -0.0069533726, -0.033541355, 0.011370304, 0.01797465, 0.016786432, -0.034509867, 0.028804654, 0.012110709, -0.060457814, 0.026449282, 0.027039526, 0.0889816, -0.025439387, -0.025371859, -0.017466009, 0.05213288, 0.019027468, -0.04672284, 0.039657682, -0.022967376, -0.0034840596, 0.035442337, -0.014047184, -0.0419975, -0.046768572, -0.030109633, 0.022686193, 0.06764761, -0.00010029963, -0.04032826, -0.057085663, 0.026000096, -0.0182

Organize the contents of the dictionary into a dataframe for better visualization.

Get the embeddings for each of these bodies of text. Add this information to the dataframe.

In [4]:
import json

In [5]:
import json

# Load JSON file
with open('data.json', 'r') as f:
    data = json.load(f)  # Load JSON list

text_data=dict()
# Traverse and print each document as a JSON string
i=1
for doc in data:
    json_str = json.dumps(doc)  # Convert dictionary to JSON string
    text_data[i]=json_str[1:-1]
    print(text_data[i])
    i+=1



"Ages": 25, "Gender": "Male", "Height": 180, "Weight": 80, "Activity Level": "Moderately Active", "Dietary Preference": "Omnivore", "Daily Calorie Target": 2000, "Protein": 120, "Sugar": 125.0, "Sodium": 24.0, "Calories": 2020, "Carbohydrates": 250, "Fiber": 30.0, "Fat": 60, "Breakfast Suggestion": "Oatmeal with berries and nuts", "Breakfast Calories": 320.0, "Breakfast Protein": 15.0, "Breakfast Carbohydrates": 48.0, "Breakfast Fats": 12.0, "Lunch Suggestion": "Grilled chicken salad with mixed greens", "Lunch Calories": 250.0, "Lunch Protein": 26.0, "Lunch Carbohydrates": 26.0, "Dinner Suggestion": "Salmon with roasted vegetables", "Dinner Calories": 2020.0, "Dinner Protein.1": 60.0, "Dinner Carbohydrates.1": 250.0, "Dinner Fats": 60.0, "Snack Suggestion": "Greek yogurt with fruit", "Snacks Calories": 150, "Snacks Protein": 10, "Snacks Carbohydrates": 20, "Snacks Fats": 5, "Disease": "Weight Gain", "Lunch Fats": 12
"Ages": 32, "Gender": "Female", "Height": 165, "Weight": 65, "Activity

In [6]:
df = pd.DataFrame.from_dict(text_data, orient='index')

In [7]:
df.columns = ['Text']
df

Unnamed: 0,Text
1,"""Ages"": 25, ""Gender"": ""Male"", ""Height"": 180, ""..."
2,"""Ages"": 32, ""Gender"": ""Female"", ""Height"": 165,..."
3,"""Ages"": 48, ""Gender"": ""Male"", ""Height"": 175, ""..."
4,"""Ages"": 55, ""Gender"": ""Female"", ""Height"": 160,..."
5,"""Ages"": 62, ""Gender"": ""Male"", ""Height"": 170, ""..."
...,...
1694,"""Ages"": 53, ""Gender"": ""Female"", ""Height"": 182,..."
1695,"""Ages"": 38, ""Gender"": ""Male"", ""Height"": 150, ""..."
1696,"""Ages"": 57, ""Gender"": ""Male"", ""Height"": 165, ""..."
1697,"""Ages"": 40, ""Gender"": ""Male"", ""Height"": 166, ""..."


In [8]:
df

Unnamed: 0,Text
1,"""Ages"": 25, ""Gender"": ""Male"", ""Height"": 180, ""..."
2,"""Ages"": 32, ""Gender"": ""Female"", ""Height"": 165,..."
3,"""Ages"": 48, ""Gender"": ""Male"", ""Height"": 175, ""..."
4,"""Ages"": 55, ""Gender"": ""Female"", ""Height"": 160,..."
5,"""Ages"": 62, ""Gender"": ""Male"", ""Height"": 170, ""..."
...,...
1694,"""Ages"": 53, ""Gender"": ""Female"", ""Height"": 182,..."
1695,"""Ages"": 38, ""Gender"": ""Male"", ""Height"": 150, ""..."
1696,"""Ages"": 57, ""Gender"": ""Male"", ""Height"": 165, ""..."
1697,"""Ages"": 40, ""Gender"": ""Male"", ""Height"": 166, ""..."


In [9]:
df['Title'] = df.index.astype(str)

In [10]:
df

Unnamed: 0,Text,Title
1,"""Ages"": 25, ""Gender"": ""Male"", ""Height"": 180, ""...",1
2,"""Ages"": 32, ""Gender"": ""Female"", ""Height"": 165,...",2
3,"""Ages"": 48, ""Gender"": ""Male"", ""Height"": 175, ""...",3
4,"""Ages"": 55, ""Gender"": ""Female"", ""Height"": 160,...",4
5,"""Ages"": 62, ""Gender"": ""Male"", ""Height"": 170, ""...",5
...,...,...
1694,"""Ages"": 53, ""Gender"": ""Female"", ""Height"": 182,...",1694
1695,"""Ages"": 38, ""Gender"": ""Male"", ""Height"": 150, ""...",1695
1696,"""Ages"": 57, ""Gender"": ""Male"", ""Height"": 165, ""...",1696
1697,"""Ages"": 40, ""Gender"": ""Male"", ""Height"": 166, ""...",1697


In [12]:
# Get the embeddings of each text and add to an embeddings column in the dataframe

model = 'models/embedding-001'
def embed_fn(title, text):
  return genai.embed_content(model=model,
                             content=text,
                             task_type="retrieval_document",
                             title=title)["embedding"]


df['Embeddings'] = df.apply(lambda row: embed_fn(row['Title'], row['Text']), axis=1)
df

Unnamed: 0,Text,Title,Embeddings
1,"""Ages"": 25, ""Gender"": ""Male"", ""Height"": 180, ""...",1,"[0.0030266906, -0.008476619, -0.044675007, -0...."
2,"""Ages"": 32, ""Gender"": ""Female"", ""Height"": 165,...",2,"[0.009257302, 0.0007417661, -0.038306423, -0.0..."
3,"""Ages"": 48, ""Gender"": ""Male"", ""Height"": 175, ""...",3,"[0.008448843, 0.0029076363, -0.038388327, -0.0..."
4,"""Ages"": 55, ""Gender"": ""Female"", ""Height"": 160,...",4,"[0.013579629, -0.0016232682, -0.043363273, -0...."
5,"""Ages"": 62, ""Gender"": ""Male"", ""Height"": 170, ""...",5,"[0.0037556323, 0.015628109, -0.0420066, -0.032..."
...,...,...,...
1694,"""Ages"": 53, ""Gender"": ""Female"", ""Height"": 182,...",1694,"[0.012902935, 0.004273881, -0.0486263, -0.0408..."
1695,"""Ages"": 38, ""Gender"": ""Male"", ""Height"": 150, ""...",1695,"[0.019779664, 0.0104915835, -0.04268414, -0.01..."
1696,"""Ages"": 57, ""Gender"": ""Male"", ""Height"": 165, ""...",1696,"[0.0169689, 0.012871087, -0.04702588, -0.02512..."
1697,"""Ages"": 40, ""Gender"": ""Male"", ""Height"": 166, ""...",1697,"[0.014956241, 0.0091882795, -0.043393888, -0.0..."


In [13]:
df.to_csv("embedding.csv")

In [14]:
df.head()

Unnamed: 0,Text,Title,Embeddings
1,"""Ages"": 25, ""Gender"": ""Male"", ""Height"": 180, ""...",1,"[0.0030266906, -0.008476619, -0.044675007, -0...."
2,"""Ages"": 32, ""Gender"": ""Female"", ""Height"": 165,...",2,"[0.009257302, 0.0007417661, -0.038306423, -0.0..."
3,"""Ages"": 48, ""Gender"": ""Male"", ""Height"": 175, ""...",3,"[0.008448843, 0.0029076363, -0.038388327, -0.0..."
4,"""Ages"": 55, ""Gender"": ""Female"", ""Height"": 160,...",4,"[0.013579629, -0.0016232682, -0.043363273, -0...."
5,"""Ages"": 62, ""Gender"": ""Male"", ""Height"": 170, ""...",5,"[0.0037556323, 0.015628109, -0.0420066, -0.032..."


Use the `find_best_passage` function to calculate the dot products, and then sort the dataframe from the largest to smallest dot product value to retrieve the relevant passage out of the database.

In [16]:
def find_best_passage(query, dataframe):
  """
  Compute the distances between the query and each document in the dataframe
  using the dot product.
  """
  query_embedding = genai.embed_content(model=model,
                                        content=query,
                                        task_type="retrieval_query")
  dot_products = np.dot(np.stack(dataframe['Embeddings']), query_embedding["embedding"])
  idx = np.argmax(dot_products)
  return dataframe.iloc[idx]['Text'] # Return text from index with max value

In [28]:


def find_best_passage2(query, dataframe):
    """
    Compute the cosine similarity between the query and each document in the dataframe.
    Returns the most similar text.
    """
    # Generate query embedding
    query_embedding = genai.embed_content(
        model=model,
        content=query,
        task_type="retrieval_query"
    )["embedding"]
    
    # Convert list of embeddings to numpy array
    doc_embeddings = np.stack(dataframe['Embeddings'])  # Shape: (n_docs, embedding_dim)
    
    # Normalize embeddings (L2 normalization)
    doc_embeddings_norm = doc_embeddings / np.linalg.norm(doc_embeddings, axis=1, keepdims=True)
    query_embedding_norm = query_embedding / np.linalg.norm(query_embedding)  # Normalize query
    
    # Compute cosine similarity
    cosine_similarities = np.dot(doc_embeddings_norm, query_embedding_norm)
    
    # Get the index of the highest similarity
    idx = np.argmax(cosine_similarities)
    
    return dataframe.iloc[idx]['Text']  # Return text from the best-matching passage


In [104]:

import random

def find_best_passage3(query, dataframe, top_k=3, noise_factor=0.01):
    """
    Compute the cosine similarity between the query and each document in the dataframe,
    with a bit of randomness for more accurate and diverse results.
    """
    model1 = genai.GenerativeModel('gemini-1.5-pro-latest')
    a1 = model1.generate_content(f'''Convert the following user query into a structured JSON format with specified fields:

User Query: "{query}"

The output should include:
- **Personal Information**: Age, Gender, Height, Weight, Activity Level, Dietary Preference.
- **Nutrition Targets**: Daily Calorie Target, Protein, Sugar, Sodium, Carbohydrates, Fiber, Fat.
- **Health Condition**: Disease or dietary focus.

Ensure the format matches the example structure and do not enter any commnets etc andmentioin 0 if any daat is not devicable:
"
  "Ages": 25,
  "Gender": "Male",
  "Height": 180,
  "Weight": 80,
  "Activity Level": "Moderately Active",
  "Dietary Preference": "Omnivore",
  "Daily Calorie Target": 2000,
  "Protein": 120,
  "Sugar": 125.0,
  "Sodium": 24.0,
  "Calories": 2020,
  "Carbohydrates": 250,
  "Fiber": 30.0,
  "Fat": 60,
  "Disease": "Weight Gain""
'''
)
    model = 'models/embedding-001'
    # Generate query embedding
    print(a1.text)
    query_embedding = genai.embed_content(
        model=model,
        content=a1.text,
        task_type="retrieval_query"
    )["embedding"]
    
    # Convert list of embeddings to NumPy array
    doc_embeddings = np.stack(dataframe['Embeddings'])  # Shape: (n_docs, embedding_dim)
    
    # Normalize embeddings (L2 normalization)
    doc_embeddings_norm = doc_embeddings / np.linalg.norm(doc_embeddings, axis=1, keepdims=True)
    query_embedding_norm = query_embedding / np.linalg.norm(query_embedding)  # Normalize query
    
    # Compute cosine similarity
    cosine_similarities = np.dot(doc_embeddings_norm, query_embedding_norm)
    
    # Introduce slight noise for randomness
    cosine_similarities += np.random.uniform(-noise_factor, noise_factor, size=cosine_similarities.shape)
    
    # Get top-k indices sorted by similarity
    top_k_indices = np.argsort(cosine_similarities)[-top_k:]  # Get top-k highest similarity scores
    
    # Randomly choose one from the top-k
    chosen_idx = random.choice(top_k_indices)
    
    return dataframe.iloc[chosen_idx]['Text']  # Return text from the selected index


View the most relevant document from the database:

In [31]:
passage = find_best_passage('"Ages": 12, "Gender": "Female", "Height": 175, "Weight": 95, "Activity Level": "Very Active", "Dietary Preference": "Omnivore"', df)
passage

'"Ages": 18, "Gender": "Female", "Height": 198, "Weight": 91, "Activity Level": "Extremely Active", "Dietary Preference": "Omnivore", "Daily Calorie Target": 3103, "Protein": 232, "Sugar": 155.0, "Sodium": 46.4, "Calories": 3095, "Carbohydrates": 310, "Fiber": 37.2, "Fat": 103, "Breakfast Suggestion": "Tofu scramble with veggies", "Breakfast Calories": 312.0, "Breakfast Protein": 29.0, "Breakfast Carbohydrates": 75.0, "Breakfast Fats": 28.0, "Lunch Suggestion": "Lentil soup with whole wheat bread", "Lunch Calories": 328.0, "Lunch Protein": 23.0, "Lunch Carbohydrates": 39.0, "Dinner Suggestion": "Vegetable stir-fry with brown rice", "Dinner Calories": 250.0, "Dinner Protein.1": 28.0, "Dinner Carbohydrates.1": 32.0, "Dinner Fats": 15.0, "Snack Suggestion": "Banana with peanut butter", "Snacks Calories": 210, "Snacks Protein": 5, "Snacks Carbohydrates": 30, "Snacks Fats": 9, "Disease": "Weight Gain", "Lunch Fats": 8'

In [30]:
passage = find_best_passage2('"Ages": 12, "Gender": "Female", "Height": 175, "Weight": 95, "Activity Level": "Very Active", "Dietary Preference": "Omnivore"', df)
passage

'"Ages": 18, "Gender": "Female", "Height": 198, "Weight": 91, "Activity Level": "Extremely Active", "Dietary Preference": "Omnivore", "Daily Calorie Target": 3103, "Protein": 232, "Sugar": 155.0, "Sodium": 46.4, "Calories": 3095, "Carbohydrates": 310, "Fiber": 37.2, "Fat": 103, "Breakfast Suggestion": "Tofu scramble with veggies", "Breakfast Calories": 312.0, "Breakfast Protein": 29.0, "Breakfast Carbohydrates": 75.0, "Breakfast Fats": 28.0, "Lunch Suggestion": "Lentil soup with whole wheat bread", "Lunch Calories": 328.0, "Lunch Protein": 23.0, "Lunch Carbohydrates": 39.0, "Dinner Suggestion": "Vegetable stir-fry with brown rice", "Dinner Calories": 250.0, "Dinner Protein.1": 28.0, "Dinner Carbohydrates.1": 32.0, "Dinner Fats": 15.0, "Snack Suggestion": "Banana with peanut butter", "Snacks Calories": 210, "Snacks Protein": 5, "Snacks Carbohydrates": 30, "Snacks Fats": 9, "Disease": "Weight Gain", "Lunch Fats": 8'

In [132]:
passage = find_best_passage3('diet for 60 years man', df)
passage

```json
{
  "Ages": 60,
  "Gender": "Male",
  "Height": 0,
  "Weight": 0,
  "Activity Level": "0",
  "Dietary Preference": "0",
  "Daily Calorie Target": 0,
  "Protein": 0,
  "Sugar": 0,
  "Sodium": 0,
  "Calories":0,
  "Carbohydrates": 0,
  "Fiber": 0,
  "Fat": 0,
  "Disease": "0"
}
```



'"Ages": 75, "Gender": "Male", "Height": 197, "Weight": 112, "Activity Level": "Moderately Active", "Dietary Preference": "Omnivore", "Daily Calorie Target": 2738, "Protein": 101, "Sugar": 79.5, "Sodium": 20.2, "Calories": 1337, "Carbohydrates": 159, "Fiber": 19.08, "Fat": 33, "Breakfast Suggestion": "Egg and spinach wrap", "Breakfast Calories": 166.0, "Breakfast Protein": 16.41, "Breakfast Carbohydrates": 53.0, "Breakfast Fats": 13.05, "Lunch Suggestion": "Avocado and chickpea salad", "Lunch Calories": 388.0, "Lunch Protein": 19.8, "Lunch Carbohydrates": 12.1, "Dinner Suggestion": "Tofu and quinoa bowl", "Dinner Calories": 540.0, "Dinner Protein.1": 48.0, "Dinner Carbohydrates.1": 14.0, "Dinner Fats": 33.0, "Snack Suggestion": "Protein bar", "Snacks Calories": 250, "Snacks Protein": 8, "Snacks Carbohydrates": 30, "Snacks Fats": 12, "Disease": "Weight Gain, Hypertension, Heart Disease", "Lunch Fats": 9'

## Question and Answering Application

Let's try to use the text generation API to create a Q & A system. Input your own custom data below to create a simple question and answering example. You will still use the dot product as a metric of similarity.

In [128]:
def make_prompt(query, relevant_passage):
  escaped = relevant_passage.replace("'", "").replace('"', "").replace("\n", " ")
  prompt = textwrap.dedent("""You are a helpful and informative bot that answers questions using text from the reference passage included below. \
  However, you are talking to a non-technical audience, so be sure to break down complicated concepts and \
  strike a friendly and converstional tone. \
  just use this passage as an reference and generate dite plan and ignore the disease part depend more on passage for diet suggistions and dont mention any comments
  use diete plan used in passage only and dont explain just specift the dite plan
  QUESTION: '{query}'
  PASSAGE: '{relevant_passage}'

    ANSWER:
  """).format(query=query, relevant_passage=escaped)

  return prompt

In [133]:
prompt = make_prompt('diet for 60 years man', passage)
print(prompt)

You are a helpful and informative bot that answers questions using text from the reference passage included below.   However, you are talking to a non-technical audience, so be sure to break down complicated concepts and   strike a friendly and converstional tone.   just use this passage as an reference and generate dite plan and ignore the disease part depend more on passage for diet suggistions and dont mention any comments
  use diete plan used in passage only and dont explain just specift the dite plan
  QUESTION: 'diet for 60 years man'
  PASSAGE: 'Ages: 75, Gender: Male, Height: 197, Weight: 112, Activity Level: Moderately Active, Dietary Preference: Omnivore, Daily Calorie Target: 2738, Protein: 101, Sugar: 79.5, Sodium: 20.2, Calories: 1337, Carbohydrates: 159, Fiber: 19.08, Fat: 33, Breakfast Suggestion: Egg and spinach wrap, Breakfast Calories: 166.0, Breakfast Protein: 16.41, Breakfast Carbohydrates: 53.0, Breakfast Fats: 13.05, Lunch Suggestion: Avocado and chickpea salad, 

Choose one of the Gemini content generation models in order to find the answer to your query.

In [113]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp-1219
models/learnlm-1.5-pro-experimental


In [134]:
model1 = genai.GenerativeModel('gemini-1.5-pro-latest')
answer = model1.generate_content(prompt)

In [135]:
Markdown(answer.text)

Hey there!  For a 60-year-old man, aiming for a daily calorie target around 2738, here's a sample meal plan:

*   **Breakfast:** Egg and spinach wrap
*   **Lunch:** Avocado and chickpea salad
*   **Dinner:** Tofu and quinoa bowl
*   **Snack:** Protein bar 


## Next steps

To learn how to use other services in the Gemini API, see the [Python quickstart](https://ai.google.dev/tutorials/python_quickstart).

To learn more about how you can use embeddings, see these  other tutorials:

 * [Anomaly Detection with Embeddings](https://ai.google.dev/gemini-api/tutorials/anomaly_detection)
 * [Clustering with Embeddings](https://ai.google.dev/gemini-api/tutorials/clustering_with_embeddings)
 * [Training a Text Classifier with Embeddings](https://ai.google.dev/gemini-api/tutorials/text_classifier_embeddings)