In [9]:
from google import genai
from google.genai import types

from IPython.display import HTML, Markdown, display
from google.api_core import retry
import os
# Import environment variables from env.json
import json

# Load environment variables from env.json
with open('../env.json', 'r') as f:
    env_vars = json.load(f)

# Set environment variables from the loaded file
os.environ["GOOGLE_CLOUD_PROJECT"] = env_vars["google_cloud_project"]
os.environ["GOOGLE_CLOUD_LOCATION"] = env_vars["google_cloud_location"]
os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = env_vars["google_genai_use_vertexai"]
# Set the fine-tuned model ID as an environment variable
os.environ["FINE_TUNED_MODEL_ID"] = env_vars["fine_tuned_model_id"]
os.environ["GOOGLE_API_KEY"] = env_vars["google_api_key"]
GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]


is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in {429, 503})

genai.models.Models.generate_content = retry.Retry(
    predicate=is_retriable)(genai.models.Models.generate_content)

client = genai.Client(api_key=GOOGLE_API_KEY)

In [13]:
import google.generativeai as genai

genai.configure(api_key=GOOGLE_API_KEY)

# List available models
print("Available models:")
for m in genai.list_models():
    print(m.name)

# Use a specific model
model = genai.GenerativeModel('gemini-2.0-flash')  # or 'gemini-pro-vision' for images

# Generate content
response = model.generate_content("How can I be a better team leader?")
print(response.text)


Available models:
models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.5-pro-exp-03-25
models/gemini-2.5-pro-preview-03-25
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-exp-image-generation
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-fla

In [29]:
client = genai.Client(api_key=GOOGLE_API_KEY)

def generate_response(prompt):
    inputs = f'''You are a prompt engineering expert that transforms simple 
    prompts into more effective versions. Analyze the input prompt and create
      an improved version that includes specific details, context, desired output format, 
      and any relevant constraints. Make the prompt clear, specific, and designed to 
      generate high-quality responses.
      
      Input Prompt: {prompt}

      Respond ONLY with the text of the improved prompt, without any explanations, 
      introductions, or additional commentary.
      '''

    return client.models.generate_content(
        model=model,
        contents=inputs).text

print(generate_response('Explain AI to me like im a little kid'))

Imagine you're talking to a 7-year-old child. Explain Artificial Intelligence (AI) in simple terms, using analogies and examples a child can easily understand. Break down complex concepts into bite-sized pieces. Explain what AI can do, provide three kid-friendly examples of AI in action (e.g., a robot that plays games, a smart toy, or a voice assistant). Avoid technical jargon. Your response should be no more than 200 words and formatted as a numbered list.



In [32]:
import pandas as pd

# Load the ShareGPT dataset from CSV
try:
    # Define the path to the CSV file
    csv_path = "../data/ShareGPT/separated_prompts_clean.csv"
    
    # Load the CSV file into a pandas DataFrame
    df_sharegpt = pd.read_csv(csv_path)
    
    # Display basic information about the dataset
    print(f"Dataset loaded successfully with {len(df_sharegpt)} rows")
    print("\nDataset columns:")
    for col in df_sharegpt.columns:
        print(f"- {col}")
    
    # Display the first few rows of the dataset
    print("\nFirst 5 rows of the dataset:")
    display(df_sharegpt.head())

    df_sharegpt.drop(columns=['context', 'prompt'], inplace=True)
    
except FileNotFoundError:
    print(f"Error: The file at '../data/ShareGPT/separated_prompts_clean.csv' was not found.")
except Exception as e:
    print(f"An error occurred while loading the dataset: {str(e)}")
    


Dataset loaded successfully with 1000 rows

Dataset columns:
- original_id
- original_prompt
- context
- prompt

First 5 rows of the dataset:


Unnamed: 0,original_id,original_prompt,context,prompt
0,140731,One-pot vegetarian pasta recipes for busy nights,,One-pot vegetarian pasta recipes for busy nights
1,121053,We have the following blog content... what is ...,,We have the following blog content... what is ...
2,37805,how o sort element using merge sort technique ...,,how o sort element using merge sort technique ...
3,116016,"make a javascript class ""GraphicLayer"" which i...",,"make a javascript class ""GraphicLayer"" which i..."
4,132819,!Please outline the steps to build an automate...,,Please outline the steps to build an automated...


In [34]:
# Apply the generate_response function to each original_prompt in the dataframe
# This will create a new column 'base_response' with the improved prompts
print("Generating improved prompts for each original prompt...")

# Import tqdm for progress bar
from tqdm import tqdm

# Define a function to safely apply generate_response
def safe_generate_response(prompt):
    try:
        return generate_response(prompt)
    except Exception as e:
        print(f"Error processing prompt: {str(e)[:100]}...")
        return "Error generating response"

# Create directory if it doesn't exist
import os
os.makedirs("../data/Flash2.0Responses", exist_ok=True)

# Initialize counter for saving
counter = [0]
total_rows = len(df_sharegpt)

# Define a function to process each row and save every 100 rows
def process_and_save(prompt):
    response = safe_generate_response(prompt)
    counter[0] += 1
    
    # Save every 100 rows
    if counter[0] % 100 == 0:
        batch_num = counter[0] // 100
        print(f"Processed {counter[0]} rows. Saving batch {batch_num}...")
        df_sharegpt.to_csv(f"../data/Flash2.0Responses/responses_batch_{batch_num}.csv", index=False)
    
    return response

# Apply the function to each row in the dataframe with tqdm progress bar
tqdm.pandas(desc="Processing prompts")
df_sharegpt['base_response'] = df_sharegpt['original_prompt'].progress_apply(process_and_save)

# Save the final complete dataset
df_sharegpt.to_csv("../data/Flash2.0Responses/all_responses.csv", index=False)
print(f"Completed generating {total_rows} improved prompts")
print(f"Final results saved to ../data/Flash2.0Responses/all_responses.csv")

print("\nSample of results:")
display(df_sharegpt[['original_prompt', 'base_response']].head(3))


Generating improved prompts for each original prompt...


Processing prompts:  10%|█         | 101/1000 [03:43<35:57,  2.40s/it] 

Processed 100 rows. Saving batch 1...


Processing prompts:  20%|██        | 201/1000 [07:47<22:11,  1.67s/it]  

Processed 200 rows. Saving batch 2...


Processing prompts:  30%|███       | 301/1000 [11:23<23:35,  2.02s/it]

Processed 300 rows. Saving batch 3...


Processing prompts:  40%|████      | 401/1000 [15:21<22:50,  2.29s/it]

Processed 400 rows. Saving batch 4...


Processing prompts:  50%|█████     | 501/1000 [18:59<19:46,  2.38s/it]

Processed 500 rows. Saving batch 5...


Processing prompts:  60%|██████    | 601/1000 [22:28<12:49,  1.93s/it]

Processed 600 rows. Saving batch 6...


Processing prompts:  70%|███████   | 701/1000 [26:08<06:39,  1.34s/it]

Processed 700 rows. Saving batch 7...


Processing prompts:  80%|████████  | 801/1000 [30:12<07:08,  2.15s/it]

Processed 800 rows. Saving batch 8...


Processing prompts:  90%|█████████ | 901/1000 [34:05<04:50,  2.93s/it]

Processed 900 rows. Saving batch 9...


Processing prompts: 100%|██████████| 1000/1000 [38:01<00:00,  2.28s/it]

Processed 1000 rows. Saving batch 10...
Completed generating 1000 improved prompts
Final results saved to ../data/Flash2.0Responses/all_responses.csv

Sample of results:





Unnamed: 0,original_prompt,base_response
0,One-pot vegetarian pasta recipes for busy nights,Write five unique one-pot vegetarian pasta rec...
1,We have the following blog content... what is ...,Analyze the provided blog content from GigSala...
2,how o sort element using merge sort technique ...,Write a Java program that implements the Merge...


In [None]:
import os
from google import genai
from google.genai.types import HttpOptions



# Access the model ID from environment variables
fine_tuned_model_id = os.environ["FINE_TUNED_MODEL_ID"]


# Initialize the GenAI client for Vertex AI
client = genai.Client(http_options=HttpOptions(api_version="v1"))

# Define your prompt
prompt = "How can I be a better team leader?"

# Call your fine-tuned model
response = client.models.generate_content(
    model=fine_tuned_model_id,
    contents=prompt,
)

# Print the response
print(response.text)


Here are some common challenges faced by team leaders and potential solutions. First, consider the challenge of low team morale. What are three specific strategies a team leader could implement to improve morale? Then, consider the challenge of poor communication within the team. What are three concrete steps a team leader could take to enhance communication effectiveness? Finally, consider the challenge of conflict resolution. What are three techniques a team leader could utilize to effectively resolve conflicts within the team? Explain your reasoning for each strategy and technique.


In [3]:
#works with no context, but you can do with context too
def generate_response(prompt):
    inputs = f'''You are a prompt engineering expert that transforms simple 
    prompts into more effective versions. Analyze the input prompt and create
      an improved version that includes specific details, context, desired output format, 
      and any relevant constraints. Make the prompt clear, specific, and designed to 
      generate high-quality responses.
      
      Input Prompt: {prompt}

      Respond ONLY with the text of the improved prompt, without any explanations, 
      introductions, or additional commentary.
      '''

    return client.models.generate_content(
        model=fine_tuned_model_id,
        contents=inputs).text

In [4]:
import pandas as pd

# Load the ShareGPT dataset from CSV
try:
    # Define the path to the CSV file
    csv_path = "../data/ShareGPT/separated_prompts_clean.csv"
    
    # Load the CSV file into a pandas DataFrame
    df_sharegpt = pd.read_csv(csv_path)
    
    # Display basic information about the dataset
    print(f"Dataset loaded successfully with {len(df_sharegpt)} rows")
    print("\nDataset columns:")
    for col in df_sharegpt.columns:
        print(f"- {col}")
    
    # Display the first few rows of the dataset
    print("\nFirst 5 rows of the dataset:")
    display(df_sharegpt.head())

    df_sharegpt.drop(columns=['context', 'prompt'], inplace=True)
    
except FileNotFoundError:
    print(f"Error: The file at '../data/ShareGPT/separated_prompts_clean.csv' was not found.")
except Exception as e:
    print(f"An error occurred while loading the dataset: {str(e)}")
    

Dataset loaded successfully with 1000 rows

Dataset columns:
- original_id
- original_prompt
- context
- prompt

First 5 rows of the dataset:


Unnamed: 0,original_id,original_prompt,context,prompt
0,140731,One-pot vegetarian pasta recipes for busy nights,,One-pot vegetarian pasta recipes for busy nights
1,121053,We have the following blog content... what is ...,,We have the following blog content... what is ...
2,37805,how o sort element using merge sort technique ...,,how o sort element using merge sort technique ...
3,116016,"make a javascript class ""GraphicLayer"" which i...",,"make a javascript class ""GraphicLayer"" which i..."
4,132819,!Please outline the steps to build an automate...,,Please outline the steps to build an automated...


In [7]:
# Apply the generate_response function to each original_prompt in the dataframe
# This will create a new column 'base_response' with the improved prompts
print("Generating improved prompts for each original prompt...")

# Import tqdm for progress bar
from tqdm import tqdm

# Define a function to safely apply generate_response
def safe_generate_response(prompt):
    try:
        return generate_response(prompt)
    except Exception as e:
        print(f"Error processing prompt: {str(e)[:100]}...")
        return "Error generating response"

# Create directory if it doesn't exist
import os
os.makedirs("../data/FineTunedResponses", exist_ok=True)

# Initialize counter for saving
counter = [0]
total_rows = len(df_sharegpt)

# Define a function to process each row and save every 100 rows
def process_and_save(prompt):
    response = safe_generate_response(prompt)
    counter[0] += 1
    
    # Save every 100 rows
    if counter[0] % 100 == 0:
        batch_num = counter[0] // 100
        print(f"Processed {counter[0]} rows. Saving batch {batch_num}...")
        df_sharegpt.to_csv(f"../data/FineTunedResponses/responses_batch_{batch_num}.csv", index=False)
    
    return response

# Apply the function to each row in the dataframe with tqdm progress bar
tqdm.pandas(desc="Processing prompts")
df_sharegpt['fine_tuned_response'] = df_sharegpt['original_prompt'].progress_apply(process_and_save)

# Save the final complete dataset
df_sharegpt.to_csv("../data/FineTunedResponses/all_responses.csv", index=False)
print(f"Completed generating {total_rows} improved prompts")
print(f"Final results saved to ../data/FineTunedResponses/all_responses.csv")

print("\nSample of results:")
display(df_sharegpt[['original_prompt', 'fine_tuned_response']].head(3))


Generating improved prompts for each original prompt...


Processing prompts:  10%|█         | 101/1000 [03:31<38:16,  2.55s/it]

Processed 100 rows. Saving batch 1...


Processing prompts:  20%|██        | 201/1000 [06:10<22:23,  1.68s/it]

Processed 200 rows. Saving batch 2...


Processing prompts:  30%|███       | 301/1000 [08:42<17:58,  1.54s/it]

Processed 300 rows. Saving batch 3...


Processing prompts:  40%|████      | 401/1000 [11:31<19:23,  1.94s/it]

Processed 400 rows. Saving batch 4...


Processing prompts:  50%|█████     | 501/1000 [14:01<12:17,  1.48s/it]

Processed 500 rows. Saving batch 5...


Processing prompts:  60%|██████    | 601/1000 [16:46<10:35,  1.59s/it]

Processed 600 rows. Saving batch 6...


Processing prompts:  70%|███████   | 701/1000 [19:11<08:09,  1.64s/it]

Processed 700 rows. Saving batch 7...


Processing prompts:  80%|████████  | 801/1000 [21:56<04:41,  1.41s/it]

Processed 800 rows. Saving batch 8...


Processing prompts:  90%|█████████ | 901/1000 [24:29<03:01,  1.83s/it]

Processed 900 rows. Saving batch 9...


Processing prompts: 100%|██████████| 1000/1000 [27:00<00:00,  1.62s/it]

Processed 1000 rows. Saving batch 10...
Completed generating 1000 improved prompts
Final results saved to ../data/FineTunedResponses/all_responses.csv

Sample of results:





Unnamed: 0,original_prompt,fine_tuned_response
0,One-pot vegetarian pasta recipes for busy nights,Here are two examples of one-pot pasta recipes...
1,We have the following blog content... what is ...,Here is an example of blog content and the cor...
2,how o sort element using merge sort technique ...,You are a Java programming assistant. I need y...


In [6]:
print(df_sharegpt['original_prompt'].iloc[0])

print(generate_response(df_sharegpt['original_prompt'].iloc[0]))

One-pot vegetarian pasta recipes for busy nights
Here are a few examples of one-pot pasta recipes and the structure I want you to follow:

Recipe 1: Creamy Tomato Basil Pasta
Ingredients: Pasta, canned tomatoes, basil, cream, garlic
Instructions: Cook pasta according to package directions. Add tomatoes, basil, and garlic. Simmer for 10 minutes. Stir in cream and serve.

Recipe 2: One-Pot Lemon Herb Pasta
Ingredients: Pasta, vegetable broth, lemon, herbs, Parmesan cheese
Instructions: Cook pasta in vegetable broth. Add lemon juice, herbs, and Parmesan cheese. Simmer until pasta is cooked and broth is absorbed.

Now, write a one-pot vegetarian pasta recipe using ingredients like zucchini, cherry tomatoes, and pesto. Include ingredients and step-by-step instructions. Use a clear and concise writing style.


In [9]:
# Display random examples from the dataset
import random

# Set a random seed for reproducibility
random.seed(42)

# Select 5 random indices from the dataframe
random_indices = random.sample(range(len(df_sharegpt)), 5)

print("\nRandom examples of prompts and their fine-tuned responses:")
for idx in random_indices:
    print(f"\n--- Example {idx} ---")
    print(f"Original Prompt:\n{df_sharegpt['original_prompt'].iloc[idx]}")
    print(f"\nFine-tuned Response:\n{df_sharegpt['fine_tuned_response'].iloc[idx]}")
    print("-" * 80)

# Alternative display using pandas
print("\nRandom examples as a dataframe:")
display(df_sharegpt.loc[random_indices, ['original_prompt', 'fine_tuned_response']])



Random examples of prompts and their fine-tuned responses:

--- Example 654 ---
Original Prompt:
write code for a user script to set up a aws ec2 instance to create a web server running a react application with a python and django backend

Fine-tuned Response:
Here's an example of how an EC2 instance is set up for a web server:

Prompt: Write a bash script to launch an EC2 instance with the following configurations:
- AMI: amazonlinuxlatest
- Instance Type: t2.micro
- Security Group: Allow HTTP and SSH access.
- Install Nginx.
- Deploy a sample 'Hello World' HTML page in the document root.

Now, write a bash script to launch an EC2 instance with the following configurations:
- AMI: amazonlinuxlatest
- Instance Type: t2.micro
- Security Group: Allow HTTP (port 80), HTTPS (port 443), and SSH (port 22) access.
- Install Docker.
- Pull a pre-built Docker image of a Django backend (replace 'your_django_image:latest') and run it as a container.
- Pull a pre-built Docker image of a React fro

Unnamed: 0,original_prompt,fine_tuned_response
654,write code for a user script to set up a aws e...,Here's an example of how an EC2 instance is se...
114,You are a very good copy editor. Please take t...,You are a highly skilled and detail-oriented c...
25,1 / 1Is it possible to run Citrix Optimizer vi...,You are an expert Citrix administrator. I need...
759,can you give me a few name for an auction hous...,I want you to rewrite the following prompt to ...
281,"Please write a reply to this email, saying tha...",You are an expert in web accessibility. Develo...


In [10]:
# Load the fine-tuned responses
print("Loading fine-tuned responses...")
df_fine_tuned = pd.read_csv("../data/FineTunedResponses/all_responses.csv")

# Load the Flash 2.0 responses
print("Loading Flash 2.0 responses...")
df_flash = pd.read_csv("../data/Flash2.0Responses/all_responses.csv")

# Join the datasets on original_id
print("Joining datasets on original_id...")
df_combined = pd.merge(
    df_fine_tuned, 
    df_flash,
    on="original_id",
    suffixes=("_fine_tuned", "_flash")
)

# Display information about the combined dataset
print(f"Combined dataset shape: {df_combined.shape}")
print("\nSample of combined results:")
display(df_combined[['original_id', 'original_prompt_fine_tuned', 'fine_tuned_response', 'base_response']].head(5))

# Save the combined dataset
df_combined.to_csv("../data/combined_responses.csv", index=False)
print("Combined dataset saved to ../data/combined_responses.csv")


Loading fine-tuned responses...
Loading Flash 2.0 responses...
Joining datasets on original_id...
Combined dataset shape: (1000, 5)

Sample of combined results:


Unnamed: 0,original_id,original_prompt_fine_tuned,fine_tuned_response,base_response
0,140731,One-pot vegetarian pasta recipes for busy nights,Here are two examples of one-pot pasta recipes...,Write five unique one-pot vegetarian pasta rec...
1,121053,We have the following blog content... what is ...,Here is an example of blog content and the cor...,Analyze the provided blog content from GigSala...
2,37805,how o sort element using merge sort technique ...,You are a Java programming assistant. I need y...,Write a Java program that implements the Merge...
3,116016,"make a javascript class ""GraphicLayer"" which i...",Here are some examples of JavaScript classes f...,"```\nCreate a JavaScript class named ""GraphicL..."
4,132819,!Please outline the steps to build an automate...,Here's an example of a good automated chat bot...,"Develop a detailed, step-by-step plan for crea..."


Combined dataset saved to ../data/combined_responses.csv


In [13]:
# Display some readable examples from the combined dataset
print("\nReadable examples from the combined dataset:")
print("=" * 100)

# Select different examples to display in a more readable format
# Choose examples with interesting contrasts between fine-tuned and base responses
sample_indices = [116016, 132819, 654]  # Different examples from the dataset

for i, idx in enumerate(sample_indices):
    row_idx = df_combined.index[df_combined['original_id'] == idx][0] if idx in df_combined['original_id'].values else i
    print(f"\n\n--- Example {i+1} ---")
    print(f"Original Prompt:\n{df_combined['original_prompt_fine_tuned'].iloc[row_idx]}")
    print("\n" + "-" * 50)
    print(f"Fine-tuned Model Response:\n{df_combined['fine_tuned_response'].iloc[row_idx]}")
    print("\n" + "-" * 50)
    print(f"Base Model Response:\n{df_combined['base_response'].iloc[row_idx]}")
    print("\n" + "=" * 100)

# Get 5 random examples for additional variety
random_indices = random.sample(range(len(df_combined)), 5)
print("\n\nAdditional random examples:")
print("=" * 100)

for i, idx in enumerate(random_indices):
    print(f"\n\n--- Random Example {i+1} ---")
    print(f"Original Prompt:\n{df_combined['original_prompt_fine_tuned'].iloc[idx]}")
    print("\n" + "-" * 50)
    print(f"Fine-tuned Model Response:\n{df_combined['fine_tuned_response'].iloc[idx]}")
    print("\n" + "-" * 50)
    print(f"Base Model Response:\n{df_combined['base_response'].iloc[idx]}")
    print("\n" + "=" * 100)



Readable examples from the combined dataset:


--- Example 1 ---
Original Prompt:
make a javascript class "GraphicLayer" which is supplied a parent element, an image element or the url of an image. If the second argument is a url, it will create the image element. the third argument will be called properties, and contain information about the placement of the image in the parent element. One of those properties will be center point, and its x and y will be between zero and one and will be relative to the parent element. (x:.5 and y: .5 will mean the child image is centered on the parent. x:0 and y:0 will center the element above the upper left corner of the parent) Another property will be size, which will also be relative to the parent element.The default center point will be x: 0.5 and y: 0.5, which means the image will be centered on the parent element, with 1 being 100% of the size of the parent, in either width or height, while preserving the aspect ratio.   these dimensions shou

In [14]:
model = genai.GenerativeModel('gemini-2.0-flash')
def get_response(prompt):
    response = model.generate_content(prompt)
    return response.text


# Rename fine_tuned_response to fine_tuned_prompt in the all_prompts.csv file
import pandas as pd
import time
from tqdm import tqdm

# Load the prompts from the CSV file
prompts_df = pd.read_csv("../data/FineTunedResponses/all_prompts.csv")

# Rename the column
prompts_df = prompts_df.rename(columns={'fine_tuned_response': 'fine_tuned_prompt'})

# Create a new column to store the responses
prompts_df['response_to_fine_tuned'] = None

# Process each prompt and get a response
print("\nProcessing fine-tuned prompts:")
for i in tqdm(range(len(prompts_df))):
    try:
        # Get the response for the fine-tuned prompt
        response = get_response(prompts_df.loc[i, 'fine_tuned_prompt'])
        prompts_df.loc[i, 'response_to_fine_tuned'] = response
        
        # Add a small delay to avoid rate limiting
        time.sleep(0.5)
    except Exception as e:
        print(f"Error processing prompt {i}: {e}")
        prompts_df.loc[i, 'response_to_fine_tuned'] = f"Error: {str(e)}"
        
        # Wait a bit longer if there's an error
        time.sleep(2)

# Save the results to a new CSV file
prompts_df.to_csv("../data/FineTunedResponses/responses_to_fine_tuned.csv", index=False)
print("\nResponses saved to ../data/FineTunedResponses/responses_to_fine_tuned.csv")

# Display a few examples of the responses
print("\nSample responses to fine-tuned prompts:")
print(prompts_df[['fine_tuned_prompt', 'response_to_fine_tuned']].head(3))






Processing fine-tuned prompts:


  0%|          | 0/1000 [00:00<?, ?it/s]

  2%|▏         | 16/1000 [01:22<55:37,  3.39s/it]  

Error processing prompt 16: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. The candidate's [finish_reason](https://ai.google.dev/api/generate-content#finishreason) is 4. Meaning that the model was reciting from copyrighted material.


  4%|▍         | 38/1000 [04:30<2:08:14,  8.00s/it]

Error processing prompt 38: Could not create `Blob`, expected `Blob`, `dict` or an `Image` type(`PIL.Image.Image` or `IPython.display.Image`).
Got a: <class 'float'>
Value: nan


 35%|███▍      | 349/1000 [42:08<1:05:17,  6.02s/it]

Error processing prompt 349: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. The candidate's [finish_reason](https://ai.google.dev/api/generate-content#finishreason) is 4. Meaning that the model was reciting from copyrighted material.


 40%|███▉      | 397/1000 [48:05<1:33:50,  9.34s/it]

Error processing prompt 397: Could not create `Blob`, expected `Blob`, `dict` or an `Image` type(`PIL.Image.Image` or `IPython.display.Image`).
Got a: <class 'float'>
Value: nan


 41%|████▏     | 413/1000 [49:31<58:29,  5.98s/it]  

Error processing prompt 413: Could not create `Blob`, expected `Blob`, `dict` or an `Image` type(`PIL.Image.Image` or `IPython.display.Image`).
Got a: <class 'float'>
Value: nan


 48%|████▊     | 480/1000 [58:59<1:44:21, 12.04s/it]

Error processing prompt 480: Could not create `Blob`, expected `Blob`, `dict` or an `Image` type(`PIL.Image.Image` or `IPython.display.Image`).
Got a: <class 'float'>
Value: nan


 52%|█████▏    | 517/1000 [1:04:00<1:04:32,  8.02s/it]

Error processing prompt 517: Could not create `Blob`, expected `Blob`, `dict` or an `Image` type(`PIL.Image.Image` or `IPython.display.Image`).
Got a: <class 'float'>
Value: nan


 80%|████████  | 804/1000 [1:42:28<36:53, 11.30s/it]  

Error processing prompt 804: Could not create `Blob`, expected `Blob`, `dict` or an `Image` type(`PIL.Image.Image` or `IPython.display.Image`).
Got a: <class 'float'>
Value: nan


 87%|████████▋ | 872/1000 [1:50:37<21:18,  9.98s/it]

Error processing prompt 872: Could not create `Blob`, expected `Blob`, `dict` or an `Image` type(`PIL.Image.Image` or `IPython.display.Image`).
Got a: <class 'float'>
Value: nan


100%|██████████| 1000/1000 [2:07:21<00:00,  7.64s/it]



Responses saved to ../data/FineTunedResponses/responses_to_fine_tuned.csv

Sample responses to fine-tuned prompts:
                                   fine_tuned_prompt  \
0  Here are two examples of one-pot pasta recipes...   
1  Here is an example of blog content and the cor...   
2  You are a Java programming assistant. I need y...   

                              response_to_fine_tuned  
0  Okay, let's analyze the strengths and weakness...  
1  Okay, let's break down the likely user intent ...  
2  ```java\npublic class MergeSort {\n\n    publi...  


In [15]:
model = genai.GenerativeModel('gemini-2.0-flash')
def get_response(prompt):
    response = model.generate_content(prompt)
    return response.text


# Process the prompts from Flash2.0Responses
import pandas as pd
import time
from tqdm import tqdm

# Load the prompts from the CSV file
prompts_df = pd.read_csv("../data/Flash2.0Responses/all_prompts.csv")

# Rename the columns to match our expected format
prompts_df = prompts_df.rename(columns={'base_response': 'base_prompt'})

# Create a new column to store the responses
prompts_df['response_to_base'] = None

# Process each prompt and get a response
print("\nProcessing Flash2.0 prompts:")
for i in tqdm(range(len(prompts_df))):
    try:
        # Get the response for the base prompt
        response = get_response(prompts_df.loc[i, 'original_prompt'])
        prompts_df.loc[i, 'response_to_base'] = response
        
        # Add a small delay to avoid rate limiting
        time.sleep(0.5)
    except Exception as e:
        print(f"Error processing prompt {i}: {e}")
        prompts_df.loc[i, 'response_to_base'] = f"Error: {str(e)}"
        
        # Wait a bit longer if there's an error
        time.sleep(2)

# Save the results to a new CSV file
prompts_df.to_csv("../data/Flash2.0Responses/responses_to_base.csv", index=False)
print("\nResponses saved to ../data/Flash2.0Responses/responses_to_base.csv")

# Display a few examples of the responses
print("\nSample responses to Flash2.0 prompts:")
print(prompts_df[['original_prompt', 'response_to_base']].head(3))






Processing Flash2.0 prompts:


  2%|▏         | 16/1000 [01:26<46:44,  2.85s/it]  

Error processing prompt 16: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. The candidate's [finish_reason](https://ai.google.dev/api/generate-content#finishreason) is 4. Meaning that the model was reciting from copyrighted material.


 33%|███▎      | 331/1000 [39:14<1:08:08,  6.11s/it]

Error processing prompt 331: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. The candidate's [finish_reason](https://ai.google.dev/api/generate-content#finishreason) is 4. Meaning that the model was reciting from copyrighted material.


 70%|██████▉   | 696/1000 [1:24:37<25:44,  5.08s/it]  

Error processing prompt 696: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. The candidate's [finish_reason](https://ai.google.dev/api/generate-content#finishreason) is 4. Meaning that the model was reciting from copyrighted material.


 94%|█████████▍| 943/1000 [1:54:23<09:26,  9.94s/it]  

Error processing prompt 943: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. The candidate's [finish_reason](https://ai.google.dev/api/generate-content#finishreason) is 4. Meaning that the model was reciting from copyrighted material.


100%|██████████| 1000/1000 [2:01:53<00:00,  7.31s/it]



Responses saved to ../data/Flash2.0Responses/responses_to_base.csv

Sample responses to Flash2.0 prompts:
                                     original_prompt  \
0   One-pot vegetarian pasta recipes for busy nights   
1  We have the following blog content... what is ...   
2  how o sort element using merge sort technique ...   

                                    response_to_base  
0  Okay, here are some delicious and easy one-pot...  
1  Okay, after analyzing the blog content, here's...  
2  ```java\npublic class MergeSort {\n\n    publi...  


In [16]:
model = genai.GenerativeModel('gemini-2.0-flash')
def get_response(prompt):
    response = model.generate_content(prompt)
    return response.text


# Process the prompts from ShareGPT
import pandas as pd
import time
from tqdm import tqdm

# Load the prompts from the CSV file
prompts_df = pd.read_csv("../data/ShareGPT/separated_prompts_clean.csv")

# Create a new column to store the responses
prompts_df['og_response'] = None

# Process each prompt and get a response
print("\nProcessing ShareGPT prompts:")
for i in tqdm(range(len(prompts_df))):
    try:
        # Get the response for the prompt
        response = get_response(prompts_df.loc[i, 'original_prompt'])
        prompts_df.loc[i, 'og_response'] = response
        
        # Add a small delay to avoid rate limiting
        time.sleep(0.5)
    except Exception as e:
        print(f"Error processing prompt {i}: {e}")
        prompts_df.loc[i, 'og_response'] = f"Error: {str(e)}"
        
        # Wait a bit longer if there's an error
        time.sleep(2)

# Save the results to a new CSV file
prompts_df.to_csv("../data/ShareGPT/og_responses.csv", index=False)
print("\nResponses saved to ../data/ShareGPT/og_responses.csv")

# Display a few examples of the responses
print("\nSample responses to ShareGPT prompts:")
print(prompts_df[['original_prompt', 'og_response']].head(3))


Processing ShareGPT prompts:


 40%|███▉      | 399/1000 [45:54<47:03,  4.70s/it]  

Error processing prompt 399: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. The candidate's [finish_reason](https://ai.google.dev/api/generate-content#finishreason) is 4. Meaning that the model was reciting from copyrighted material.


 99%|█████████▉| 988/1000 [1:57:00<01:39,  8.28s/it]  

Error processing prompt 988: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. The candidate's [finish_reason](https://ai.google.dev/api/generate-content#finishreason) is 4. Meaning that the model was reciting from copyrighted material.


100%|██████████| 1000/1000 [1:59:03<00:00,  7.14s/it]



Responses saved to ../data/ShareGPT/og_responses.csv

Sample responses to ShareGPT prompts:
                                     original_prompt  \
0   One-pot vegetarian pasta recipes for busy nights   
1  We have the following blog content... what is ...   
2  how o sort element using merge sort technique ...   

                                         og_response  
0  Okay, here are some one-pot vegetarian pasta r...  
1  Okay, let's break down the likely user intent ...  
2  ```java\npublic class MergeSort {\n\n    publi...  
