In [11]:
import google.generativeai as genai
import pandas as pd
import json
import time
import os

# Configure Gemini API (replace with your actual API key)
genai.configure(api_key="AIzaSyDageo_eDlOer8d1brDnrPYWt7aII23HJ0")

# Load prompt from prompt.txt
with open("prompt.txt", "r") as file:
    base_prompt = file.read()

# Define input and output file paths
input_file = "original/main_test.csv"   # Input CSV file
output_file = "modified/main_test.csv"  # Final output CSV file
temp_json_file = "modified/temp_results.jsonl"  # Temporary file to track progress

# Load input CSV
df = pd.read_csv(input_file)

# Convert CSV rows to JSON format
data = df.to_dict(orient="records")

# Track already processed questions
processed_questions = set()

# Read existing JSONL file to avoid re-processing
if os.path.exists(temp_json_file):
    with open(temp_json_file, "r") as f:
        for line in f:
            try:
                entry = json.loads(line.strip())  # Read each line as a separate JSON object
                processed_questions.add(entry["question"])
            except json.JSONDecodeError:
                pass  # Ignore corrupted lines

# Define function to extract final numerical value
def extract_value(entry):
    prompt = f"""
    {base_prompt}
    
    **Input JSON:**
    {json.dumps(entry, indent=2)}
    
    **Output JSON:** (Ensure the response is valid JSON)
    """
    
    model = genai.GenerativeModel("gemini-2.0-flash")

    try:
        response = model.generate_content(prompt)
        response_text = response.text.strip()

        # Extract JSON portion manually
        json_start = response_text.find("{")
        json_end = response_text.rfind("}") + 1
        json_str = response_text[json_start:json_end]  # Extract JSON portion

        # Attempt to parse the extracted JSON
        response_json = json.loads(json_str)
        print(response_json.get("value", None))
        return response_json.get("value", None)  # Extract 'value' field
    except (json.JSONDecodeError, AttributeError, TypeError):
        return None  # Return None if JSON extraction fails

# Process data row by row
new_results = []
with open(temp_json_file, "a") as temp_f:  # Open in append mode
    for i, entry in enumerate(data):
        if entry["question"] in processed_questions:
            print(f"Skipping {i+1}/{len(data)}: Already processed -> {entry['question'][:50]}...")
            continue  # Skip if already processed

        print(f"Processing {i+1}/{len(data)}: {entry['question'][:50]}...")
        entry["value"] = extract_value(entry)

        # Save progress to JSONL file
        temp_f.write(json.dumps(entry) + "\n")

        # Store new result for CSV writing
        new_results.append(entry)

        time.sleep(1)  # Delay to avoid hitting API rate limits

# Convert results to DataFrame
if new_results:
    new_df = pd.DataFrame(new_results)

    # Merge with original data (if file exists, append)
    if os.path.exists(output_file):
        existing_df = pd.read_csv(output_file)
        final_df = pd.concat([existing_df, new_df], ignore_index=True)
    else:
        final_df = new_df

    # Save updated CSV
    final_df.to_csv(output_file, index=False)

print(f"\n✅ Updated CSV saved as {output_file}")


Skipping 1/1319: Already processed -> Janet’s ducks lay 16 eggs per day. She eats three ...
Skipping 2/1319: Already processed -> A robe takes 2 bolts of blue fiber and half that m...
Skipping 3/1319: Already processed -> Josh decides to try flipping a house.  He buys a h...
Processing 4/1319: James decides to run 3 sprints 3 times a week.  He...
540
Processing 5/1319: Every day, Wendi feeds each of her chickens three ...
20
Processing 6/1319: Kylar went to the store to buy glasses for his new...
64
Processing 7/1319: Toulouse has twice as many sheep as Charleston. Ch...
260
Processing 8/1319: Carla is downloading a 200 GB file. Normally she c...
160
Processing 9/1319: John drives for 3 hours at a speed of 60 mph and t...
45
Processing 10/1319: Eliza's rate per hour for the first 40 hours she w...
460
Processing 11/1319: A new program had 60 downloads in the first month....
366
Processing 12/1319: Toula went to the bakery and bought various types ...
694
Processing 13/1319: Carlos is p

KeyboardInterrupt: 