In [1]:
import pandas as pd

df = pd.read_csv("ipc_sections.csv", quotechar='"', encoding='utf-8')

In [2]:
df['Section'] = df['Section'].str.replace(r'^IPC_', '', regex=True)
df['Section'] = df['Section'].astype(str)
df = df.sort_values(by='Section').reset_index(drop=True)
df.head()

Unnamed: 0,Description,Offense,Punishment,Section
0,Description of IPC Section 121A\nAccording to ...,Conspiring to commit certain offences against ...,Imprisonment for Life or 10 Years + Fine,121A
1,Description of IPC Section 122\nAccording to s...,"Collecting arms, etc., with The intention of w...",Imprisonment for Life or 10 Years + Fine,122
2,Description of IPC Section 122\nAccording to s...,"Collecting arms, etc., with The intention of w...",Imprisonment for Life or 10 Years + Fine,122
3,Description of IPC Section 123\nAccording to s...,Concealing with intent to facilitate a design ...,10 Years + Fine,123
4,Description of IPC Section 124\nAccording to s...,"Assaulting President, Governor, etc., with int...",7 Years + Fine,124


In [3]:
df.head()

Unnamed: 0,Description,Offense,Punishment,Section
0,Description of IPC Section 121A\nAccording to ...,Conspiring to commit certain offences against ...,Imprisonment for Life or 10 Years + Fine,121A
1,Description of IPC Section 122\nAccording to s...,"Collecting arms, etc., with The intention of w...",Imprisonment for Life or 10 Years + Fine,122
2,Description of IPC Section 122\nAccording to s...,"Collecting arms, etc., with The intention of w...",Imprisonment for Life or 10 Years + Fine,122
3,Description of IPC Section 123\nAccording to s...,Concealing with intent to facilitate a design ...,10 Years + Fine,123
4,Description of IPC Section 124\nAccording to s...,"Assaulting President, Governor, etc., with int...",7 Years + Fine,124


In [4]:
import pandas as pd
import os
import json
import re
import google.generativeai as genai
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=GOOGLE_API_KEY)

# Set up the Gemini model
model = genai.GenerativeModel(
    model_name="gemini-2.0-flash",
    generation_config={"temperature": 0}
)

# Clean and extract JSON from model response
def extract_json_from_response(response_text):
    cleaned = re.sub(r"```(?:json)?\s*", "", response_text)
    cleaned = re.sub(r"\s*```$", "", cleaned)
    cleaned = cleaned.strip()
    match = re.search(r"\[.*\]", cleaned, re.DOTALL)
    return match.group(0) if match else cleaned

# Function to create prompt for 10 IPC sections
def create_prompt(sub_df):
    entries = []
    for _, row in sub_df.iterrows():
        entry = (
            f"Section {row['Section']}:\n"
            f"Description: {row['Description']}\n"
            f"Offense: {row['Offense']}\n"
        )
        entries.append(entry)

    prompt = (
        "Below are details of 10 IPC sections:\n\n"
        + "\n".join(entries) +
        "\n\nBased on the above sections, generate 20 unique legal scenarios. "
        "Each scenario should describe a real-world situation and list two or more relevant IPC section numbers.\n"
        "Return the response in **raw JSON** format, without backticks or explanations. Format:\n"
        '[\n  {\n    "scenario": "Some event",\n    "ipc_sections": ["140", "141"]\n  }, ...\n]'
    )
    return prompt

# Function to get response from Gemini and parse it
def generate_scenarios(prompt):
    chat = model.start_chat()
    response = chat.send_message(prompt)
    if response:
        try:
            cleaned = extract_json_from_response(response.text)
            return json.loads(cleaned)
        except json.JSONDecodeError:
            print("⚠️ JSON decode error!")
    return []

# Iterate through the DataFrame in chunks of 10 and collect all scenarios
batch_size = 10
all_scenarios = []

for i in range(0, len(df), batch_size):
    chunk = df.iloc[i:i+batch_size]
    print(f"⏳ Processing batch {i//batch_size + 1} ({i} to {i+len(chunk)-1})...")
    
    prompt = create_prompt(chunk)
    scenarios = generate_scenarios(prompt)
    
    if scenarios:
        all_scenarios.extend(scenarios)
        print(f"✅ Got {len(scenarios)} scenarios.")
    else:
        print("❌ No scenarios returned for this batch.")

# Save combined scenarios to a JSON file
output_path = "combined_ipc_scenarios.json"
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(all_scenarios, f, indent=2, ensure_ascii=False)

print(f"\n🎉 All done! Total scenarios collected: {len(all_scenarios)}")
print(f"📁 Saved to: {output_path}")

⏳ Processing batch 1 (0 to 9)...
✅ Got 20 scenarios.
⏳ Processing batch 2 (10 to 19)...
✅ Got 20 scenarios.
⏳ Processing batch 3 (20 to 29)...
✅ Got 20 scenarios.
⏳ Processing batch 4 (30 to 39)...
✅ Got 20 scenarios.
⏳ Processing batch 5 (40 to 49)...
✅ Got 20 scenarios.
⏳ Processing batch 6 (50 to 59)...
✅ Got 20 scenarios.
⏳ Processing batch 7 (60 to 69)...
✅ Got 20 scenarios.
⏳ Processing batch 8 (70 to 79)...
✅ Got 20 scenarios.
⏳ Processing batch 9 (80 to 89)...
✅ Got 20 scenarios.
⏳ Processing batch 10 (90 to 99)...
✅ Got 20 scenarios.
⏳ Processing batch 11 (100 to 109)...
✅ Got 20 scenarios.
⏳ Processing batch 12 (110 to 119)...
✅ Got 20 scenarios.
⏳ Processing batch 13 (120 to 129)...
✅ Got 20 scenarios.
⏳ Processing batch 14 (130 to 139)...
✅ Got 20 scenarios.
⏳ Processing batch 15 (140 to 149)...
✅ Got 20 scenarios.
⏳ Processing batch 16 (150 to 159)...
✅ Got 20 scenarios.
⏳ Processing batch 17 (160 to 169)...
✅ Got 20 scenarios.
⏳ Processing batch 18 (170 to 179)...
✅ Got 2

In [None]:
import json
import pandas as pd
import csv  # Needed for quoting options

# Load the JSON file
with open("combined_ipc_scenarios.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# Convert to DataFrame
df = pd.DataFrame(data)

# Create an 'id' column
df.insert(0, "id", range(1, len(df) + 1))

# Join section list into a comma-separated string
df["ipc_sections"] = df["ipc_sections"].apply(lambda x: ", ".join(x))

# Rename columns
df.rename(columns={"scenario": "scenario", "ipc_sections": "sections"}, inplace=True)

# Save to CSV with all text fields quoted
df.to_csv("ipc_scenarios.csv", index=False, quoting=csv.QUOTE_ALL)

print("✅ CSV with quoted text saved as ipc_scenarios.csv")


✅ CSV with quoted text saved as ipc_scenarios.csv
