In [38]:
from google import genai
from google.genai.types import HttpOptions

from IPython.display import HTML, Markdown, display
from google.api_core import retry
import os
# Import environment variables from env.json
import json

# Load environment variables from env.json
with open('../env.json', 'r') as f:
    env_vars = json.load(f)
# Set environment variables from the loaded file
os.environ["GOOGLE_CLOUD_PROJECT"] = env_vars["google_cloud_project"]
os.environ["GOOGLE_CLOUD_LOCATION"] = env_vars["google_cloud_location"]
os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = env_vars["google_genai_use_vertexai"]
# Set the fine-tuned model ID as an environment variable
os.environ["FINE_TUNED_MODEL_ID"] = env_vars["fine_tuned_v1_model_id"]
os.environ["GOOGLE_API_KEY"] = env_vars["google_api_keys"][1]
GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]


is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in {429, 503})

genai.models.Models.generate_content = retry.Retry(
    predicate=is_retriable)(genai.models.Models.generate_content)

client = genai.Client(api_key=GOOGLE_API_KEY)

In [39]:
import google.generativeai as genai

genai.configure(api_key=GOOGLE_API_KEY)

# List available models
print("Available models:")
for m in genai.list_models():
    print(m.name)

# Use a specific model
model = genai.GenerativeModel('gemini-2.0-flash-lite-001')  # base untuned model

# Generate content
response = model.generate_content("How can I be a better team leader?")
print(response.text)


Available models:
models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.5-pro-exp-03-25
models/gemini-2.5-pro-preview-03-25
models/gemini-2.5-flash-preview-04-17
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-exp-image-generation
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models

KeyboardInterrupt: 

In [None]:
# The genai module doesn't have a Client attribute, so we'll use the GenerativeModel directly
# No need to create a client instance

def generate_response(prompt):
    inputs = f'''You are a prompt engineering expert that transforms simple 
    prompts into more effective versions. Analyze the input prompt and create
      an improved version that includes specific details, context, desired output format, 
      and any relevant constraints. Make the prompt clear, specific, and designed to 
      generate high-quality responses.
      
      Input Prompt: {prompt}

      Respond ONLY with the text of the improved prompt, without any explanations, 
      introductions, or additional commentary.
      '''

    response = model.generate_content(contents=inputs)
    return response.text

print(generate_response('Explain AI to me like im a little kid'))

Imagine you're talking to a 7-year-old. Explain what Artificial Intelligence (AI) is, using simple words and examples they can easily understand. Focus on how AI helps computers do smart things, like playing games or recognizing pictures. Break down the explanation into three short paragraphs. In the first paragraph, define AI in a general way. In the second, give an example of AI in action. In the third, explain a simple limitation of AI. End your response with a single sentence summarizing what AI can do.



In [8]:
import pandas as pd

# Load the ShareGPT dataset from CSV
try:
    # Define the path to the CSV file
    csv_path = "../data/Prompt_Training_2.0/seperated_test_data.csv"
    
    # Load the CSV file into a pandas DataFrame
    df_sharegpt = pd.read_csv(csv_path)
    
    # Display basic information about the dataset
    print(f"Dataset loaded successfully with {len(df_sharegpt)} rows")
    print("\nDataset columns:")
    for col in df_sharegpt.columns:
        print(f"- {col}")
    
    # Display the first few rows of the dataset
    print("\nFirst 5 rows of the dataset:")
    display(df_sharegpt.head())
    
except FileNotFoundError:
    print(f"Error: The file at '../data/Prompt_Training_2.0/seperated_test_data.csv' was not found.")
except Exception as e:
    print(f"An error occurred while loading the dataset: {str(e)}")
    


Dataset loaded successfully with 962 rows

Dataset columns:
- original_prompt
- context
- instruction
- has_context
- conversation_id

First 5 rows of the dataset:


Unnamed: 0,original_prompt,context,instruction,has_context,conversation_id
0,One-pot vegetarian pasta recipes for busy nights,,One-pot vegetarian pasta recipes for busy nights,False,93453
1,We have the following blog content... what is ...,"The content:\n\nConsumers want more choices, b...",We have the following blog content... what is ...,True,65263
2,how o sort element using merge sort technique ...,,how o sort element using merge sort technique ...,False,99000
3,"make a javascript class ""GraphicLayer"" which i...","One of those properties will be center point, ...","make a javascript class ""GraphicLayer"" which i...",True,96296
4,Please outline the steps to build an automated...,,Please outline the steps to build an automated...,False,38806


In [9]:
# Apply the generate_response function to each original_prompt in the dataframe
# This will create a new column 'base_response' with the improved prompts
print("Generating improved prompts for each original prompt...")

# Import tqdm for progress bar
from tqdm import tqdm

# Define a function to safely apply generate_response
def safe_generate_response(prompt):
    try:
        return generate_response(prompt)
    except Exception as e:
        print(f"Error processing prompt: {str(e)[:100]}...")
        return "Error generating response"

# Create directory if it doesn't exist
import os
os.makedirs("../data/BaseModelResponses", exist_ok=True)

# Initialize counter for saving
counter = [0]
total_rows = len(df_sharegpt)

# Define a function to process each row and save every 100 rows
def process_and_save(prompt):
    response = safe_generate_response(prompt)
    return response

# Apply the function to each row in the dataframe with tqdm progress bar
tqdm.pandas(desc="Processing prompts")
df_sharegpt['base_response'] = df_sharegpt['original_prompt'].progress_apply(process_and_save)

# Save the final complete dataset
df_sharegpt.to_csv("../data/BaseModelResponses/improved_prompts.csv", index=False)
print(f"Completed generating {total_rows} improved prompts")
print(f"Final results saved to ../data/BaseModelResponses/improved_prompts.csv")

print("\nSample of results:")
display(df_sharegpt[['original_prompt', 'base_response']].head(3))


Generating improved prompts for each original prompt...


Processing prompts: 100%|██████████| 962/962 [37:52<00:00,  2.36s/it]  

Completed generating 962 improved prompts
Final results saved to ../data/BaseModelResponses/improved_prompts.csv

Sample of results:





Unnamed: 0,original_prompt,base_response
0,One-pot vegetarian pasta recipes for busy nights,Create a list of five (5) unique one-pot veget...
1,We have the following blog content... what is ...,Analyze the provided blog content about choosi...
2,how o sort element using merge sort technique ...,Write a Java program that implements the Merge...


In [10]:
import os
from google import genai
from google.genai.types import HttpOptions



# Access the model ID from environment variables
fine_tuned_model_id = os.environ["FINE_TUNED_MODEL_ID"]


# Initialize the GenAI client for Vertex AI
client = genai.Client(http_options=HttpOptions(api_version="v1"))

# Define your prompt
prompt = "How can I be a better team leader?"

# Call your fine-tuned model
response = client.models.generate_content(
    model=fine_tuned_model_id,
    contents=prompt,
)

# Print the response
print(response.text)


You are a seasoned leadership consultant. Provide a comprehensive guide on how to improve team leadership skills. Break down the guide into the following sections:

1.  **Understanding the Fundamentals:** Define team leadership, discuss its importance, and outline the core responsibilities of a team leader.
2.  **Building Effective Communication:** Detail strategies for clear, concise, and empathetic communication within a team. Include tips on active listening, providing constructive feedback, and handling difficult conversations.
3.  **Motivating and Engaging Team Members:** Explain different motivational techniques, such as recognition, rewards, and opportunities for growth. Discuss how to create a positive and engaging work environment.
4.  **Delegation and Empowerment:** Describe how to delegate tasks effectively and empower team members to take ownership and responsibility. Include strategies for monitoring progress and providing support.
5.  **Conflict Resolution:** Outline step

In [11]:
#works with no context, but you can do with context too
def generate_response(prompt):
    inputs = f'''You are a prompt engineering expert that transforms simple 
    prompts into more effective versions. Analyze the input prompt and create
      an improved version that includes specific details, context, desired output format, 
      and any relevant constraints. Make the prompt clear, specific, and designed to 
      generate high-quality responses.
      
      Input Prompt: {prompt}

      Respond ONLY with the text of the improved prompt, without any explanations, 
      introductions, or additional commentary.
      '''

    return client.models.generate_content(
        model=fine_tuned_model_id,
        contents=inputs).text

In [13]:
import pandas as pd

# Load the ShareGPT dataset from CSV
try:
    # Define the path to the CSV file
    csv_path = "../data/Prompt_Training_2.0/seperated_test_data.csv"
    
    # Load the CSV file into a pandas DataFrame
    df_sharegpt = pd.read_csv(csv_path)
    
    # Display basic information about the dataset
    print(f"Dataset loaded successfully with {len(df_sharegpt)} rows")
    print("\nDataset columns:")
    for col in df_sharegpt.columns:
        print(f"- {col}")
    
    # Display the first few rows of the dataset
    print("\nFirst 5 rows of the dataset:")
    display(df_sharegpt.head())
    
except FileNotFoundError:
    print(f"Error: The file at '../data/Prompt_Training_2.0/seperated_test_data.csv' was not found.")
except Exception as e:
    print(f"An error occurred while loading the dataset: {str(e)}")
    

Dataset loaded successfully with 962 rows

Dataset columns:
- original_prompt
- context
- instruction
- has_context
- conversation_id

First 5 rows of the dataset:


Unnamed: 0,original_prompt,context,instruction,has_context,conversation_id
0,One-pot vegetarian pasta recipes for busy nights,,One-pot vegetarian pasta recipes for busy nights,False,93453
1,We have the following blog content... what is ...,"The content:\n\nConsumers want more choices, b...",We have the following blog content... what is ...,True,65263
2,how o sort element using merge sort technique ...,,how o sort element using merge sort technique ...,False,99000
3,"make a javascript class ""GraphicLayer"" which i...","One of those properties will be center point, ...","make a javascript class ""GraphicLayer"" which i...",True,96296
4,Please outline the steps to build an automated...,,Please outline the steps to build an automated...,False,38806


In [14]:
# Apply the generate_response function to each original_prompt in the dataframe
# This will create a new column 'base_response' with the improved prompts
print("Generating improved prompts for each original prompt...")

# Import tqdm for progress bar
from tqdm import tqdm

# Define a function to safely apply generate_response
def safe_generate_response(prompt):
    try:
        return generate_response(prompt)
    except Exception as e:
        print(f"Error processing prompt: {str(e)[:100]}...")
        return "Error generating response"

# Create directory if it doesn't exist
import os
os.makedirs("../data/FineTunedResponses", exist_ok=True)

# Initialize counter for saving
counter = [0]
total_rows = len(df_sharegpt)

# Define a function to process each row and save every 100 rows
def process_and_save(prompt):
    response = safe_generate_response(prompt)
    counter[0] += 1
    
    return response

# Apply the function to each row in the dataframe with tqdm progress bar
tqdm.pandas(desc="Processing prompts")
df_sharegpt['fine_tuned_response'] = df_sharegpt['original_prompt'].progress_apply(process_and_save)

# Save the final complete dataset
df_sharegpt.to_csv("../data/FineTunedResponses/all_responses.csv", index=False)
print(f"Completed generating {total_rows} improved prompts")
print(f"Final results saved to ../data/FineTunedResponses/all_responses.csv")

print("\nSample of results:")
display(df_sharegpt[['original_prompt', 'fine_tuned_response']].head(3))


Generating improved prompts for each original prompt...


Processing prompts: 100%|██████████| 962/962 [26:43<00:00,  1.67s/it]

Completed generating 962 improved prompts
Final results saved to ../data/FineTunedResponses/all_responses.csv

Sample of results:





Unnamed: 0,original_prompt,fine_tuned_response
0,One-pot vegetarian pasta recipes for busy nights,I am looking for one-pot vegetarian pasta reci...
1,We have the following blog content... what is ...,Here's an example of how to analyze blog conte...
2,how o sort element using merge sort technique ...,I need a Java function implementation for the ...


In [15]:
print(df_sharegpt['original_prompt'].iloc[0])

print(generate_response(df_sharegpt['original_prompt'].iloc[0]))

One-pot vegetarian pasta recipes for busy nights
Here are a few examples of well-crafted one-pot pasta recipes:

Example 1: One-Pot Creamy Tomato Pasta
Ingredients: 1 pound pasta, 1 can diced tomatoes, 1 cup vegetable broth, 1/2 cup heavy cream, 1/4 cup chopped basil, salt, pepper
Instructions: In a large pot, combine pasta, tomatoes, and broth. Bring to a boil and cook until pasta is al dente. Stir in heavy cream, basil, salt, and pepper. Simmer for a few minutes to thicken.

Example 2: One-Pot Lemon Garlic Pasta
Ingredients: 1 pound pasta, 1/2 cup olive oil, 4 cloves garlic, minced, 1 lemon, zested and juiced, 1 cup vegetable broth, salt, pepper
Instructions: Heat olive oil in a large pot. Sauté garlic until fragrant. Add pasta, lemon zest, and juice, and broth. Bring to a boil and cook until pasta is al dente. Season with salt and pepper.

Now, create a one-pot pasta recipe with the following ingredients:

Ingredients: 1 pound pasta, 1 jar pesto, 1 can cannellini beans, 1 cup vegeta

In [30]:
# Test the fine-tuned model
os.environ["FINE_TUNED_MODEL_ID"] = env_vars["fine_tuned_v2_model_id"]
import os
from google import genai
from google.genai.types import HttpOptions

# Initialize the GenAI client for Vertex AI
client = genai.Client(http_options=HttpOptions(api_version="v1"))

def generate_response(instruction, context):
    prompt_template = f"""You are a prompt engineering expert.
    Your task is to rewrite the instruction below using advanced prompt engineering techniques. If context is provided, use it as *background knowledge* to better understand the task — but do not include it in the final output.

    Guidelines:
    - Enhance the instruction to be clearer, more specific, and more effective
    - Use any prompting technique that best fits
    - Ground your rewrite in the provided context, if applicable
    - Do NOT copy or reference the context in your rewritten instruction

    Context:
    {context}

    Original Instruction:
    {instruction}

    Output ONLY the improved instruction without any additional text, titling, explanations, or acknowledgment."""

    return client.models.generate_content(
        model=os.environ["FINE_TUNED_MODEL_ID"],
        contents=prompt_template
    ).text


# Define a test prompt
instruction = "Write a function to calculate the Fibonacci sequence in Python"
context = "The Fibonacci sequence is a series of numbers where each number is the sum of the two preceding ones, usually starting with 0 and 1."

# Call the fine-tuned model
response = generate_response(instruction, context)

# Print the response
print("Original prompt:")
print(instruction + ' ' +context)
print("\nImproved prompt from fine-tuned model:")
print(response + ' ' + context)

Original prompt:
Write a function to calculate the Fibonacci sequence in Python The Fibonacci sequence is a series of numbers where each number is the sum of the two preceding ones, usually starting with 0 and 1.

Improved prompt from fine-tuned model:
Generate a Python function named `fibonacci(n)` that computes and returns the n-th Fibonacci number. Ensure the function addresses standard edge cases:
1.  If `n` is less than 0, return `ValueError: Input must be a non-negative integer.`.
2.  If `n` is exactly 0, return 0.
3.  If `n` is exactly 1, return 1.
4.  If `n` is greater than 1, employ dynamic programming (DP) for efficient calculation. The Fibonacci sequence is a series of numbers where each number is the sum of the two preceding ones, usually starting with 0 and 1.


In [31]:
import pandas as pd

# Load the ShareGPT dataset from CSV
try:
    # Define the path to the CSV file
    csv_path = "../data/Prompt_Training_2.0/seperated_test_data.csv"
    
    # Load the CSV file into a pandas DataFrame
    df_sharegpt = pd.read_csv(csv_path)
    
    # Display basic information about the dataset
    print(f"Dataset loaded successfully with {len(df_sharegpt)} rows")
    print("\nDataset columns:")
    for col in df_sharegpt.columns:
        print(f"- {col}")
    
    # Display the first few rows of the dataset
    print("\nFirst 5 rows of the dataset:")
    display(df_sharegpt.head())
    
except FileNotFoundError:
    print(f"Error: The file at '../data/Prompt_Training_2.0/seperated_test_data.csv' was not found.")
except Exception as e:
    print(f"An error occurred while loading the dataset: {str(e)}")

Dataset loaded successfully with 962 rows

Dataset columns:
- original_prompt
- context
- instruction
- has_context
- conversation_id

First 5 rows of the dataset:


Unnamed: 0,original_prompt,context,instruction,has_context,conversation_id
0,One-pot vegetarian pasta recipes for busy nights,,One-pot vegetarian pasta recipes for busy nights,False,93453
1,We have the following blog content... what is ...,"The content:\n\nConsumers want more choices, b...",We have the following blog content... what is ...,True,65263
2,how o sort element using merge sort technique ...,,how o sort element using merge sort technique ...,False,99000
3,"make a javascript class ""GraphicLayer"" which i...","One of those properties will be center point, ...","make a javascript class ""GraphicLayer"" which i...",True,96296
4,Please outline the steps to build an automated...,,Please outline the steps to build an automated...,False,38806


In [32]:
# Apply the generate_response function to each original_prompt in the dataframe
# This will create a new column 'base_response' with the improved prompts
print("Generating improved prompts for each original prompt...")

# Import tqdm for progress bar
from tqdm import tqdm

# Define a function to safely apply generate_response
def safe_generate_response(instruction, context):
    try:
        return generate_response(instruction, context)
    except Exception as e:
        print(f"Error processing prompt: {str(e)[:100]}...")
        return "Error generating response"

# Create directory if it doesn't exist
import os
os.makedirs("../data/FineTunedV2Responses", exist_ok=True)

# Initialize counter for saving
counter = [0]
total_rows = len(df_sharegpt)

# Define a function to process each row and save every 100 rows
def process_and_save(row):
    instruction = row['instruction']
    context = row['context']
    response = safe_generate_response(instruction, context)
    counter[0] += 1
    
    return response

# Apply the function to each row in the dataframe with tqdm progress bar
tqdm.pandas(desc="Processing prompts")
df_sharegpt['fine_tuned_instruction'] = df_sharegpt.progress_apply(process_and_save, axis=1)

# Save the final complete dataset
df_sharegpt.to_csv("../data/FineTunedV2Responses/improved_prompts.csv", index=False)
print(f"Completed generating {total_rows} improved prompts")
print(f"Final results saved to ../data/FineTunedV2Responses/improved_prompts.csv")

print("\nSample of results:")
display(df_sharegpt[['fine_tuned_instruction', 'instruction', 'context']].head(3))


Generating improved prompts for each original prompt...


Processing prompts: 100%|██████████| 962/962 [38:55<00:00,  2.43s/it]  

Completed generating 962 improved prompts
Final results saved to ../data/FineTunedV2Responses/improved_prompts.csv

Sample of results:





Unnamed: 0,fine_tuned_instruction,instruction,context
0,Generate a numbered list of 5-7 one-pot vegeta...,One-pot vegetarian pasta recipes for busy nights,
1,Analyze the provided text [insert or reference...,We have the following blog content... what is ...,"The content:\n\nConsumers want more choices, b..."
2,Generate a complete and runnable Java program ...,how o sort element using merge sort technique ...,


In [None]:
import google.generativeai as genai

genai.configure(api_key=GOOGLE_API_KEY)

model = genai.GenerativeModel('gemini-2.0-flash')

def get_response(prompt):
    response = model.generate_content(prompt)
    return response.text


# Rename fine_tuned_response to fine_tuned_prompt in the all_prompts.csv file
import pandas as pd
import time
from tqdm import tqdm

# Load the prompts from the CSV file
prompts_df = pd.read_csv("../data/FineTunedResponses/improved_prompts.csv")

# Rename the column
prompts_df = prompts_df.rename(columns={'fine_tuned_response': 'fine_tuned_prompt'})

# Create a new column to store the responses
prompts_df['response'] = None

# Process each prompt and get a response
print("\nProcessing fine-tuned prompts:")
for i in tqdm(range(len(prompts_df))):
    try:
        # Get the response for the fine-tuned prompt
        response = get_response(prompts_df.loc[i, 'fine_tuned_prompt'])
        prompts_df.loc[i, 'response'] = response
        
        # Add a small delay to avoid rate limiting
        time.sleep(0.5)
    except Exception as e:
        print(f"Error processing prompt {i}: {e}")
        prompts_df.loc[i, 'response'] = f"Error: {str(e)}"
        
        # Wait a bit longer if there's an error
        time.sleep(2)

# Save the results to a new CSV file
prompts_df.to_csv("../data/FineTunedResponses/responses.csv", index=False)
print("\nResponses saved to ../data/FineTunedResponses/responses.csv")

# Display a few examples of the responses
print("\nSample responses to fine-tuned prompts:")
print(prompts_df[['fine_tuned_prompt', 'response']].head(3))





Available models:
models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.5-pro-exp-03-25
models/gemini-2.5-pro-preview-03-25
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-exp-image-generation
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-fla

 14%|█▎        | 132/962 [16:38<1:58:39,  8.58s/it]

Error processing prompt 132: Could not create `Blob`, expected `Blob`, `dict` or an `Image` type(`PIL.Image.Image` or `IPython.display.Image`).
Got a: <class 'float'>
Value: nan


 18%|█▊        | 175/962 [21:28<1:57:37,  8.97s/it]

Error processing prompt 175: Could not create `Blob`, expected `Blob`, `dict` or an `Image` type(`PIL.Image.Image` or `IPython.display.Image`).
Got a: <class 'float'>
Value: nan


 44%|████▍     | 421/962 [54:39<57:03,  6.33s/it]  

Error processing prompt 421: Could not create `Blob`, expected `Blob`, `dict` or an `Image` type(`PIL.Image.Image` or `IPython.display.Image`).
Got a: <class 'float'>
Value: nan


 47%|████▋     | 448/962 [58:42<1:28:35, 10.34s/it]

Error processing prompt 448: Could not create `Blob`, expected `Blob`, `dict` or an `Image` type(`PIL.Image.Image` or `IPython.display.Image`).
Got a: <class 'float'>
Value: nan


 51%|█████▏    | 495/962 [1:04:43<40:21,  5.18s/it]  

Error processing prompt 495: Could not create `Blob`, expected `Blob`, `dict` or an `Image` type(`PIL.Image.Image` or `IPython.display.Image`).
Got a: <class 'float'>
Value: nan


 58%|█████▊    | 554/962 [1:12:22<1:26:46, 12.76s/it]

Error processing prompt 554: Could not create `Blob`, expected `Blob`, `dict` or an `Image` type(`PIL.Image.Image` or `IPython.display.Image`).
Got a: <class 'float'>
Value: nan


 66%|██████▋   | 638/962 [1:23:11<34:11,  6.33s/it]  

Error processing prompt 638: Could not create `Blob`, expected `Blob`, `dict` or an `Image` type(`PIL.Image.Image` or `IPython.display.Image`).
Got a: <class 'float'>
Value: nan


 79%|███████▊  | 756/962 [1:38:09<24:47,  7.22s/it]  

Error processing prompt 756: Could not create `Blob`, expected `Blob`, `dict` or an `Image` type(`PIL.Image.Image` or `IPython.display.Image`).
Got a: <class 'float'>
Value: nan


 80%|████████  | 773/962 [1:40:24<31:30, 10.00s/it]

Error processing prompt 773: Could not create `Blob`, expected `Blob`, `dict` or an `Image` type(`PIL.Image.Image` or `IPython.display.Image`).
Got a: <class 'float'>
Value: nan


100%|██████████| 962/962 [2:03:37<00:00,  7.71s/it]



Responses saved to ../data/FineTunedResponses/responses.csv

Sample responses to fine-tuned prompts:
                                   fine_tuned_prompt  \
0  I am looking for one-pot vegetarian pasta reci...   
1  Here's an example of how to analyze blog conte...   
2  I need a Java function implementation for the ...   

                                            response  
0  Okay, here are three one-pot vegetarian pasta ...  
1  Okay, I need the blog content to analyze! Plea...  
2  ```java\n// MergeSort.java\npublic class Merge...  


In [26]:

import google.generativeai as genai

genai.configure(api_key=GOOGLE_API_KEY)

model = genai.GenerativeModel('gemini-2.0-flash')

def get_response(prompt):
    response = model.generate_content(prompt)
    return response.text


# Process the prompts from Flash2.0Responses
import pandas as pd
import time
from tqdm import tqdm

# Load the prompts from the CSV file
prompts_df = pd.read_csv("../data/BaseModelResponses/improved_prompts.csv")

# Rename the columns to match our expected format
prompts_df = prompts_df.rename(columns={'base_response': 'base_prompt'})

# Create a new column to store the responses
prompts_df['response'] = None

# Process each prompt and get a response
print("\nProcessing BaseModelResponses prompts:")
for i in tqdm(range(len(prompts_df))):
    try:
        # Get the response for the base prompt
        response = get_response(prompts_df.loc[i, 'base_prompt'])
        prompts_df.loc[i, 'response'] = response
        
        # Add a small delay to avoid rate limiting
        time.sleep(0.1)
    except Exception as e:
        print(f"Error processing prompt {i}: {e}")
        prompts_df.loc[i, 'response'] = f"Error: {str(e)}"
        
        # Wait a bit longer if there's an error
        time.sleep(2)

# Save the results to a new CSV file
prompts_df.to_csv("../data/BaseModelResponses/responses.csv", index=False)
print("\nResponses saved to ../data/BaseModelResponses/responses.csv")

# Display a few examples of the responses
print("\nSample responses to Base Model prompts:")
print(prompts_df[['base_prompt', 'response']].head(3))






Processing BaseModelResponses prompts:


 55%|█████▌    | 531/962 [1:03:39<26:34,  3.70s/it]  

Error processing prompt 531: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. The candidate's [finish_reason](https://ai.google.dev/api/generate-content#finishreason) is 4. Meaning that the model was reciting from copyrighted material.


 81%|████████  | 775/962 [1:36:23<41:31, 13.32s/it]  

Error processing prompt 775: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. The candidate's [finish_reason](https://ai.google.dev/api/generate-content#finishreason) is 4. Meaning that the model was reciting from copyrighted material.


100%|██████████| 962/962 [2:01:34<00:00,  7.58s/it]



Responses saved to ../data/BaseModelResponses/responses.csv

Sample responses to Base Model prompts:
                                         base_prompt  \
0  Create a list of five (5) unique one-pot veget...   
1  Analyze the provided blog content about choosi...   
2  Write a Java program that implements the Merge...   

                                            response  
0  ***\n\n**1. Lemon Garlic Spinach Pasta**\n\nTh...  
1  Here's a breakdown of the analysis:\n\n**1. Pr...  
2  ```java\n// Java program to implement Merge So...  


In [37]:
import google.generativeai as genai

genai.configure(api_key=GOOGLE_API_KEY)

model = genai.GenerativeModel('gemini-2.0-flash')

def get_response(instruction, context=None):
    if context is not None and not pd.isna(context):
        prompt = f"{instruction}\n{context}"
    else:
        prompt = instruction
    response = model.generate_content(prompt)
    return response.text


# Load the prompts from the CSV file
import pandas as pd
import time
from tqdm import tqdm

# Load the prompts from the CSV file
prompts_df = pd.read_csv("../data/FineTunedV2Responses/improved_prompts.csv")

# Create a new column to store the responses
prompts_df['response'] = None

# Define a function to apply to each row
def process_row(row):
    try:
        # Get the response using both instruction and context
        response = get_response(row['fine_tuned_instruction'], row['context'])
        time.sleep(0.5)  # Add delay to avoid rate limiting
        return response
    except Exception as e:
        print(f"Error processing prompt: {e}")
        time.sleep(2)  # Wait longer if there's an error
        return f"Error: {str(e)}"

# Process each prompt and get a response using apply
print("\nProcessing fine-tuned prompts:")
tqdm.pandas()
prompts_df['response'] = prompts_df.progress_apply(process_row, axis=1)

# Save the results to a new CSV file
prompts_df.to_csv("../data/FineTunedV2Responses/responses.csv", index=False)
print("\nResponses saved to ../data/FineTunedV2Responses/responses.csv")

# Display a few examples of the responses
print("\nSample responses to fine-tuned prompts:")
print(prompts_df[['fine_tuned_instruction', 'response']].head(3))






Processing fine-tuned prompts:


  0%|          | 0/962 [00:00<?, ?it/s]

 77%|███████▋  | 744/962 [1:39:20<25:37,  7.05s/it]  

Error processing prompt: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. The candidate's [finish_reason](https://ai.google.dev/api/generate-content#finishreason) is 4. Meaning that the model was reciting from copyrighted material.


 81%|████████  | 776/962 [1:43:57<28:10,  9.09s/it]

Error processing prompt: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. The candidate's [finish_reason](https://ai.google.dev/api/generate-content#finishreason) is 4. Meaning that the model was reciting from copyrighted material.


100%|██████████| 962/962 [2:10:30<00:00,  8.14s/it]



Responses saved to ../data/FineTunedV2Responses/responses.csv

Sample responses to fine-tuned prompts:
                              fine_tuned_instruction  \
0  Generate a numbered list of 5-7 one-pot vegeta...   
1  Analyze the provided text [insert or reference...   
2  Generate a complete and runnable Java program ...   

                                            response  
0  Here are 6 one-pot vegetarian pasta recipes, d...  
1  1.  **Estimated Searcher Profile & Goal:** An ...  
2  ```java\nimport java.util.Arrays;\n\npublic cl...  


In [34]:
model = genai.GenerativeModel('gemini-2.0-flash')
def get_response(prompt):
    response = model.generate_content(prompt)
    return response.text


# Process the prompts from ShareGPT
import pandas as pd
import time
from tqdm import tqdm

# Load the prompts from the CSV file
prompts_df = pd.read_csv("../data/Prompt_Training_2.0/seperated_test_data.csv")

# Create a new column to store the responses
prompts_df['response'] = None

# Process each prompt and get a response
print("\nProcessing ShareGPT prompts:")
for i in tqdm(range(len(prompts_df))):
    try:
        # Get the response for the prompt
        response = get_response(prompts_df.loc[i, 'original_prompt'])
        prompts_df.loc[i, 'response'] = response
        
        # Add a small delay to avoid rate limiting
        time.sleep(0.5)
    except Exception as e:
        print(f"Error processing prompt {i}: {e}")
        prompts_df.loc[i, 'response'] = f"Error: {str(e)}"
        
        # Wait a bit longer if there's an error
        time.sleep(2)

# Save the results to a new CSV file
prompts_df.to_csv("../data/NoModelResponses/responses.csv", index=False)
print("\nResponses saved to ../data/NoModelResponses/responses.csv")

# Display a few examples of the responses
print("\nSample responses to ShareGPT prompts:")
print(prompts_df[['original_prompt', 'response']].head(3))

AttributeError: module 'google.genai' has no attribute 'GenerativeModel'