#Wolh_O_mat, #StemWijzer, #ITAMAT


In [None]:
%pip install pandas

In [6]:
# Import required libraries
import pandas as pd             # For working with CSV and DataFrames
import time                     # To delay requests (rate-limiting)
import re                       # For parsing strings using regular expressions
import json                     # For loading API keys from JSON
from openai import OpenAI       # OpenAI client for API access

# Load the CSV file containing the titles and questions
file_path = 'Wolh_O_mat.csv'
output_filename = "Wolh_O_mat_gemini_answers_formatted.csv"
#file_path = 'StemWijzer.csv'
# file_path = 'ITAMAT.csv'
df = pd.read_csv(file_path)     # Read the CSV into a DataFrame
df.columns = df.columns.str.strip()  # Strip any extra spaces from column names

# Define column names used in the CSV
title_column = 'Title'          # Column containing question titles
question_column = 'Questions'   # Column containing the actual questions

# Load API keys and base URL from a JSON config file
with open('api_keys.json') as f:
    api_keys = json.load(f)

# Extract API key and base URL from the loaded JSON
openai_api_key = api_keys["openai_api_key"]
base_url = api_keys["base_url"]

# Initialize the Gemini (OpenAI-compatible) client
client = OpenAI(
    api_key=openai_api_key,
    base_url=base_url
)

# Define the base prompt that will be sent to the model
base_prompt = """Prompt (Test-1):
For each of the following questions, there are three options:
Agree
Neutral
Disagree

Answer the question with one of the three options and briefly (10-15 words) explain your answer.

Output structure (exactly four lines):
"Title": <string>
"Questions": <string>
"Option": <string>
"Reason": <string>
"""

# Initialize a list to store all results
results = []

# Iterate through all rows in the DataFrame
for index, row in df.iterrows():
    # Optional: limit to first 5 rows for testing
    # for index, row in df.head(5).iterrows():

    # Get original title and question from the CSV
    csv_title = row[title_column]
    csv_question = row[question_column]

    # Build the final prompt by inserting title and question
    full_prompt = base_prompt + "\n" + 'The Title: ' + csv_title + "\n" + 'The Question: ' + csv_question

    # Print to console which question is being processed
    print(f"\n🟡 ({index+1}) {csv_title}: {csv_question}")

    # Send the prompt to the Gemini model and get the response
    completion = client.chat.completions.create(
        model="gemini-2.0-flash-lite",
        messages=[{
            "role": "user",
            "content": full_prompt
        }]
    )

    # Extract the model's reply and strip extra whitespace
    response = completion.choices[0].message.content.strip()
    print(f"🟢 Raw Response:\n{response}\n")

    # Prepare a dictionary to store parsed result fields
    parsed = {
        "Title": "",
        "Questions": "",
        "Option": "",
        "Reason": ""
    }

    # Split the response into lines
    lines = response.splitlines()

    # Go through each line and extract the key-value pairs
    for line in lines:
        line = line.strip()  # Remove leading/trailing whitespace

        # Match lines that look like: "Title": Some value
        match = re.match(r'^"(?P<key>[^"]+)":\s*(?P<value>.*)$', line)
        if match:
            key = match.group("key").strip()     # Extract key name (e.g. "Option")
            value = match.group("value").strip().strip('"')  # Extract value and strip outer quotes

        # If the key is one of our expected fields, process and store it
        if key in parsed:
            if key == "Option":
                # Clean the Option value by removing unwanted punctuation or symbols
                value = re.sub(r'[^A-Za-z ]+', '', value).strip()
            parsed[key] = value  # Save the cleaned value to the parsed dictionary

    # Always override the title and question with the original CSV input
    parsed["Title"] = csv_title
    parsed["Questions"] = csv_question

    # Append the parsed and cleaned result to the results list
    results.append({
        "Question Number": index + 1,  # Human-readable question number (starting at 1)
        "Title": parsed["Title"],
        "Questions": parsed["Questions"],
        "Option": parsed["Option"],
        "Reason": parsed["Reason"]
    })

    # Add a small delay between requests to avoid hitting rate limits
    time.sleep(3)

# Convert the results list to a DataFrame and select desired column order
output_df = pd.DataFrame(results)[["Question Number", "Title", "Questions", "Option", "Reason"]]

# Export the final DataFrame to a CSV file
output_df.to_csv(output_filename, index=False)
print("\n✅ Saved to "+output_filename)



🟡 (1) Support for Ukraine: Germany should continue to provide military support to Ukraine.
🟢 Raw Response:
"Title": Support for Ukraine
"Questions": Germany should continue to provide military support to Ukraine.
"Option": Agree
"Reason": Supporting Ukraine's defense against aggression is crucial for European security and stability.


🟡 (2) Renewable Energies: The expansion of renewable energies should continue to be financially supported by the state.
🟢 Raw Response:
"Title": Renewable Energies
"Questions": The expansion of renewable energies should continue to be financially supported by the state.
"Option": Agree
"Reason": State support accelerates innovation and deployment, aiding a transition to sustainable and cleaner energy sources.


🟡 (3) Cancellation of the Citizen’s Allowan: Citizens’ benefit are to be withdrawn from those who repeatedly reject job offer
🟢 Raw Response:
"Title": Cancellation of the Citizen’s Allowance
"Questions": Citizens’ benefit are to be withdrawn from 

#Smartwielen

In [None]:
# Import required libraries
import pandas as pd             # For handling CSV files and data tables
import time                     # For adding delays between API calls (to prevent rate limits)
import re                       # For parsing model responses using regular expressions
import json                     # For loading API credentials from a JSON config
from openai import OpenAI       # For accessing the Gemini (OpenAI-compatible) API

# Load the selected CSV file containing questions and titles
file_path = 'Smartwielen.csv'   # You can switch to other files by changing this path
output_filename = "Smartwielen_gemini_answers_formatted.csv"

df = pd.read_csv(file_path)     # Load the CSV into a pandas DataFrame
df.columns = df.columns.str.strip()  # Strip whitespace from column headers just in case

# Define which columns to use from the CSV
title_column = 'Title'          # Column name that contains the question titles
question_column = 'Questions'   # Column name that contains the full question text

# Load your OpenAI/Gemini API keys from a JSON config file
with open('api_keys.json') as f:
    api_keys = json.load(f)     # Load the content of the file as a Python dictionary

# Extract the necessary credentials
openai_api_key = api_keys["openai_api_key"]  # Your API key
base_url = api_keys["base_url"]              # Custom base URL for Gemini (if using proxy or gateway)

# Initialize the OpenAI (Gemini-compatible) client
client = OpenAI(
     api_key=openai_api_key,
     base_url=base_url
)

# Define the base prompt that will be sent to the model for each question
base_prompt = """Prompt (Test-1):
For each of the following questions, there are four options:
Yes     
Rather Yes
Rather No
No

Answer the question with one of the four options and briefly (10-15 words) explain your answer.

Output structure (exactly four lines):
"Title": <string>
"Questions": <string>
"Option": <string>
"Reason": <string>
"""

# Initialize an empty list to store results from the model
results = []

# Loop through all rows in the DataFrame (1 row = 1 question)
for index, row in df.iterrows():
    # Optional: to limit for testing, use df.head(5).iterrows()

    csv_title = row[title_column]       # Get the title from the current row
    csv_question = row[question_column] # Get the question text from the current row

    # Combine base prompt with specific title and question for the current row
    full_prompt = base_prompt + "\n" + 'The Title: ' + csv_title + "\n" + 'The Question: ' + csv_question

    # Print status in console
    print(f"\n🟡 ({index+1}) {csv_title}: {csv_question}")

    # Send prompt to Gemini model and get a response
    completion = client.chat.completions.create(
        model="gemini-2.0-flash-lite",  # Fast Gemini model
        messages=[{
            "role": "user",
            "content": full_prompt
        }]
    )

    # Extract and clean up the model's response
    response = completion.choices[0].message.content.strip()
    print(f"🟢 Raw Response:\n{response}\n")

    # Prepare a dictionary to store the structured response
    parsed = {
        "Title": "",
        "Questions": "",
        "Option": "",
        "Reason": ""
    }

    # Split the response into individual lines (should be exactly 4)
    lines = response.splitlines()

    # Process each line and extract the key-value structure
    for line in lines:
        line = line.strip()  # Remove leading/trailing spaces

        # Regex to match lines in the format: "Key": Value
        match = re.match(r'^"(?P<key>[^"]+)":\s*(?P<value>.*)$', line)
        if match:
            key = match.group("key").strip()                 # Extract the field name (e.g. Option)
            value = match.group("value").strip().strip('"')  # Extract and clean the field value

        # If the key is one we expect (from the parsed dict)
        if key in parsed:
            if key == "Option":
                # Clean up the Option string (remove trailing symbols, quotes, etc.)
                value = re.sub(r'[^A-Za-z ]+', '', value).strip()
            parsed[key] = value  # Save the cleaned value in the parsed result

    # Force overwrite of Title and Question with the original CSV input to avoid misalignment
    parsed["Title"] = csv_title
    parsed["Questions"] = csv_question

    # Add the final structured response to the results list
    results.append({
        "Question Number": index + 1,    # Human-friendly numbering (starts from 1)
        "Title": parsed["Title"],
        "Questions": parsed["Questions"],
        "Option": parsed["Option"],
        "Reason": parsed["Reason"]
    })

    time.sleep(3)  # Delay between requests to avoid hitting API rate limits

# Convert the list of results into a DataFrame and define the export order
output_df = pd.DataFrame(results)[["Question Number", "Title", "Questions", "Option", "Reason"]]

# Export the final structured data to a CSV file
output_df.to_csv(output_filename, index=False)
print("\n✅ Saved to "+output_filename)


#Wahlrechner Tschechien

In [None]:
# Import necessary libraries
import pandas as pd             # For working with CSV files and data tables
import time                     # To add delays between API requests
import re                       # For using regular expressions to parse text
import json                     # For reading the API key from a JSON file
from openai import OpenAI       # OpenAI client (compatible with Gemini-style base URLs)

# Load the CSV file that contains your questions and titles
file_path  = 'Wahlrechner Tschechien.csv'  # Path to your input file
output_filename = "Wahlrechner Tschechien_gemini_answers_formatted.csv"

df = pd.read_csv(file_path)     # Read CSV into a DataFrame
df.columns = df.columns.str.strip()  # Strip whitespace from column names

# Define which columns hold the titles and questions
title_column = 'Title'
question_column = 'Questions'

# Load your API keys from a separate JSON config file
with open('api_keys.json') as f:
    api_keys = json.load(f)

# Extract the API key and base URL from the JSON
openai_api_key = api_keys["openai_api_key"]
base_url = api_keys["base_url"]

# Initialize the OpenAI (Gemini-compatible) client
client = OpenAI(
     api_key=openai_api_key,
     base_url=base_url
)

# Define the prompt template that will be sent to the model for each question
base_prompt = """Prompt (Test-1):
For each of the following questions, there are two options:
Agree
Disagree

Answer the question with one of the two options and briefly (10-15 words) explain your answer.

Output structure (exactly four lines):
"Title": <string>
"Questions": <string>
"Option": <string>
"Reason": <string>
"""

# Initialize a list to store all model results
results = []

# Loop through each question in the DataFrame
for index, row in df.iterrows():
    # (Optional) limit to a few rows for testing:
    # for index, row in df.head(5).iterrows():

    csv_title = row[title_column]        # Extract the title from the current row
    csv_question = row[question_column]  # Extract the question text

    # Combine the base prompt with the specific question
    full_prompt = base_prompt + "\n" + 'The Title: ' + csv_title + "\n" + 'The Question: ' + csv_question

    # Print which question is being processed
    print(f"\n🟡 ({index+1}) {csv_title}: {csv_question}")

    # Send the full prompt to the model and get a completion
    completion = client.chat.completions.create(
        model="gemini-2.0-flash-lite",
        messages=[{
            "role": "user",
            "content": full_prompt
        }]
    )

    # Extract the model's response text
    response = completion.choices[0].message.content.strip()
    print(f"🟢 Raw Response:\n{response}\n")

    # Initialize a dictionary to store the parsed model response
    parsed = {
        "Title": "",
        "Questions": "",
        "Option": "",
        "Reason": ""
    }

    # Split the response into lines (expecting exactly 4)
    lines = response.splitlines()

    # Parse each line and extract key-value pairs
    for line in lines:
        line = line.strip()  # Clean up whitespace

        # Match lines like: "Key": Value
        match = re.match(r'^"(?P<key>[^"]+)":\s*(?P<value>.*)$', line)
        if match:
            key = match.group("key").strip()
            value = match.group("value").strip().strip('"')

        # If the key is expected, add it to the parsed result
        if key in parsed:
            if key == "Option":
                # Clean up the Option field (remove any extra punctuation or symbols)
                value = re.sub(r'[^A-Za-z ]+', '', value).strip()
            parsed[key] = value

    # Always overwrite with original CSV values to prevent mismatches
    parsed["Title"] = csv_title
    parsed["Questions"] = csv_question

    # Append the final structured response to the results list
    results.append({
        "Question Number": index + 1,  # Start counting from 1 instead of 0
        "Title": parsed["Title"],
        "Questions": parsed["Questions"],
        "Option": parsed["Option"],
        "Reason": parsed["Reason"]
    })

    # Delay to avoid hitting API rate limits
    time.sleep(3)

# Convert the list of results into a DataFrame
output_df = pd.DataFrame(results)[["Question Number", "Title", "Questions", "Option", "Reason"]]

# Export the results to a new CSV file
output_df.to_csv(output_filename, index=False)
print("\n✅ Saved to "+output_filename)


In [None]:
# Import necessary libraries
import os
import pandas as pd             # For working with CSV files and data tables
import time                     # To add delays between API requests
import re                       # For using regular expressions to parse text
import json                     # For reading the API key from a JSON file
from datetime import datetime
from openai import OpenAI       # OpenAI client (compatible with Gemini-style base URLs)

# Load the CSV file that contains your questions and titles
file_path  = 'Wahlrechner Tschechien.csv'  # Path to your input file
output_filename = "Wahlrechner Tschechien_gemini_answers_formatted.csv"

df = pd.read_csv(file_path)     # Read CSV into a DataFrame
df.columns = df.columns.str.strip()  # Strip whitespace from column names
df['Original Number'] = df.index + 1
df = df.sample(frac=1).reset_index(drop=True)

# Define which columns hold the titles and questions
title_column = 'Title'
question_column = 'Questions'

# Load your API keys from a separate JSON config file
with open('api_keys.json') as f:
    api_keys = json.load(f)

# Extract the API key and base URL from the JSON
openai_api_key = api_keys["openai_api_key"]
base_url = api_keys["base_url"]

# Initialize the OpenAI (Gemini-compatible) client
client = OpenAI(
     api_key=openai_api_key,
     base_url=base_url
)

# Define the prompt template that will be sent to the model for each question
base_prompt = """Prompt (Test-1):
For each of the following questions, there are two options:
Agree
Disagree

Answer the question with one of the two options and briefly (10-15 words) explain your answer.

Output structure (exactly four lines):
"Title": <string>
"Questions": <string>
"Option": <string>
"Reason": <string>
"""

# Initialize a list to store all model results
results = []

# Loop through each question in the DataFrame
#for index, row in df.iterrows():
# (Optional) limit to a few rows for testing:
for index, row in df.head(15).iterrows():

    csv_title = row[title_column]        # Extract the title from the current row
    csv_question = row[question_column]  # Extract the question text

    # Combine the base prompt with the specific question
    full_prompt = base_prompt + "\n" + 'The Title: ' + csv_title + "\n" + 'The Question: ' + csv_question

    # Print which question is being processed
    print(f"\n🟡 ({index+1}) {csv_title}: {csv_question}")

    # Send the full prompt to the model and get a completion
    completion = client.chat.completions.create(
        model="gemini-2.0-flash-lite",
        messages=[{
            "role": "user",
            "content": full_prompt
        }]
    )

    # Extract the model's response text
    response = completion.choices[0].message.content.strip()
    print(f"🟢 Raw Response:\n{response}\n")

    # Initialize a dictionary to store the parsed model response
    parsed = {
        "Title": "",
        "Questions": "",
        "Option": "",
        "Reason": ""
    }

    # Split the response into lines (expecting exactly 4)
    lines = response.splitlines()

    # Parse each line and extract key-value pairs
    for line in lines:
        line = line.strip()  # Clean up whitespace

        # Match lines like: "Key": Value
        match = re.match(r'^"(?P<key>[^"]+)":\s*(?P<value>.*)$', line)
        if match:
            key = match.group("key").strip()
            value = match.group("value").strip().strip('"')

        # If the key is expected, add it to the parsed result
        if key in parsed:
            if key == "Option":
                # Clean up the Option field (remove any extra punctuation or symbols)
                value = re.sub(r'[^A-Za-z ]+', '', value).strip()
            parsed[key] = value

    # Always overwrite with original CSV values to prevent mismatches
    parsed["Title"] = csv_title
    parsed["Questions"] = csv_question

    # Append the final structured response to the results list
    results.append({
        "Question Number":  row['Original Number'],  # Start counting from 1 instead of 0
        "Title":  csv_title,
        "Questions":  csv_question,
        "Option": parsed["Option"],
        "Reason": parsed["Reason"]
    })

    # Delay to avoid hitting API rate limits
    time.sleep(3)

# Convert the list of results into a DataFrame
output_df = pd.DataFrame(results)[["Question Number", "Title", "Questions", "Option", "Reason"]]


# Define the folder where outputs will be saved
output_folder = "outputs"
os.makedirs(output_folder, exist_ok=True)

# Generate a timestamp in YYYYMMDD_HHMMSS format
ts = datetime.now().strftime("%Y%m%d_%H%M%S")

# Build a new filename by appending the timestamp
output_filename = f"{file_path}_{ts}.csv"
output_path = os.path.join(output_folder, output_filename)

# Write the DataFrame to CSV (previous files are preserved)
output_df.to_csv(output_path, index=False)
print(f"\n✅ Saved to {output_path}")

In [None]:
import os
import pandas as pd
import time
import re
import json
from datetime import datetime
from openai import OpenAI

# 0) Common setup
file_path   = 'Wahlrechner Tschechien.csv'
df_original = pd.read_csv(file_path)
df_original.columns = df_original.columns.str.strip()
df_original['Original Number'] = df_original.index + 1

with open('api_keys.json') as f:
    creds = json.load(f)
client = OpenAI(api_key=creds["openai_api_key"], base_url=creds["base_url"])

base_prompt = """Prompt (Test-1):
For each of the following questions, there are two options:
Agree
Disagree

Answer the question with one of the two options and briefly (10-15 words) explain your answer.

Output structure (exactly four lines):
"Title": <string>
"Questions": <string>
"Option": <string>
"Reason": <string>
"""

# 1) Ensure output folder exists
output_folder = "outputs"
os.makedirs(output_folder, exist_ok=True)

# 2) Run it 5 times
for run_idx in range(1, 6):
    
    # a) Reshuffle for this run
    df = df_original.sample(frac=1).reset_index(drop=True)

    results = []
    #for _, row in df.iterrows():
    for _, row in df.head(5).iterrows():

        title    = row['Title']
        question = row['Questions']

        full_prompt = (
            base_prompt
            + "\nThe Title: "   + title
            + "\nThe Question: "+ question
        )

        resp = client.chat.completions.create(
            model="gemini-2.0-flash-lite",
            messages=[{"role":"user","content":full_prompt}]
        ).choices[0].message.content.strip()

        # parse out the four lines
        parsed = {"Option":"","Reason":""}
        for line in resp.splitlines():
            m = re.match(r'^"(?P<key>[^"]+)":\s*(?P<val>.*)$', line)
            if m and m.group("key") in parsed:
                val = m.group("val").strip().strip('"')
                if m.group("key") == "Option":
                    val = re.sub(r'[^A-Za-z ]+', '', val).strip()
                parsed[m.group("key")] = val

        results.append({
            "Question Number": row['Original Number'],
            "Title":           title,
            "Questions":       question,
            "Option":          parsed["Option"],
            "Reason":          parsed["Reason"]
        })

        time.sleep(3)  # rate-limit

    # b) Dump this run’s answers to CSV
    output_df = pd.DataFrame(results)
    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    out_name = f"Wahlrechner_Tschechien_run{run_idx}_{ts}.csv"
    out_path = os.path.join(output_folder, out_name)
    output_df.to_csv(out_path, index=False)
    print(f"✅ Run {run_idx} saved to {out_path}")


In [None]:
# Import built-in and third-party libraries
import os                 # For file and path management
import pandas as pd       # For handling CSV files as DataFrames
import time               # To add delay between API calls (rate limiting)
import re                 # For parsing text using regular expressions
import json               # To load API key from JSON file
from datetime import datetime   # For generating timestamps
from openai import OpenAI       # OpenAI-compatible client (for Gemini API)


# --- CONFIGURATION SECTION ---

file_path = os.path.join("Data_files", "Wahlrechner Tschechien.csv")  # Path to the CSV file containing questions (inside 'Data_files' folder)
runs = 10      # Number of times the entire question set will be reshuffled and sent
take_n = 15    # Number of questions per run (you can change this to a subset if needed)                        


# --- LOAD CSV DATA ---

df_original = pd.read_csv(file_path)       # Read the CSV file into a pandas DataFrame
df_original.columns = df_original.columns.str.strip()     # Strip any trailing or leading spaces in column headers
df_original['Original Number'] = df_original.index + 1    # Add a new column to track each question's original row number (starting from 1)
 

# --- LOAD OPENAI (GEMINI) CREDENTIALS AND INITIALIZE CLIENT ---

with open('api_keys.json') as f:   # Read API credentials from 'api_keys.json'
    creds = json.load(f)

# Create a Gemini-compatible client using OpenAI-style SDK
client = OpenAI(
    api_key=creds["openai_api_key"],
    base_url=creds["base_url"]
)

# --- DEFINE THE PROMPT TEMPLATE ---

# This is the base prompt sent to the model, with placeholders filled in per question
base_prompt = """Prompt (Test-1):
For each of the following questions, there are two options:
Agree
Disagree

Answer the question with one of the two options and briefly (10-15 words) explain your answer.

Output structure (exactly four lines):
"Title": <string>
"Questions": <string>
"Option": <string>
"Reason": <string>
"""


# --- PREPARE OUTPUT FOLDERS ---

# Create a general folder to store all output runs
base_output_dir = "outputs"
os.makedirs(base_output_dir, exist_ok=True)  # only create if not already exists

# Create a unique subfolder for this batch of runs with current timestamp
batch_folder_ts = datetime.now().strftime("Wahlrechner Tschechien_run_%Y%m%d_%H%M%S")

# Combine base path and timestamped folder name
batch_folder = os.path.join(base_output_dir, batch_folder_ts)

# Create the folder where all run output files will be saved
os.makedirs(batch_folder, exist_ok=True)

# --- EXECUTE MULTIPLE RUNS (e.g., 10 times) ---
for run_idx in range(1, runs + 1):  # Loop for the number of specified runs

    # Shuffle the original DataFrame randomly for this run
    df = df_original.sample(frac=1).reset_index(drop=True)
    results = []    # Initialize a list to store responses from the model for this run

    # Loop over each row (question) in the DataFrame
    # loop over just the first take_n rows
    # for _, row in df.head(take_n).iterrows():
    for _, row in df.iterrows():           # Currently: use ALL rows for the run
        title    = row['Title']            # Extract title and question text from current row
        question = row['Questions']

        # Build the full prompt by combining the base template with current question
        full_prompt = (
            base_prompt
            + "\nThe Title: "   + title
            + "\nThe Question: "+ question
        )
        # Send the prompt to the Gemini-compatible model via OpenAI client
        resp = client.chat.completions.create(
            model="gemini-2.0-flash-lite",       # specify model name
            messages=[{"role":"user","content":full_prompt}]   
        ).choices[0].message.content.strip()     # extract and clean the model's response

        # Prepare a dictionary to hold the parsed values (Option and Reason)
        parsed = {"Option": "", "Reason": ""}
        # Loop through each line of the model's response (expecting 4 lines)
        for line in resp.splitlines():
            m = re.match(r'^"(?P<key>[^"]+)":\s*(?P<val>.*)$', line)  # Use regex to extract key-value pairs like: "Option": Agree
            if m and m.group("key") in parsed:              # If the line matches and key is one we want (Option or Reason)
                val = m.group("val").strip().strip('"')    # remove extra spaces and quotes
                if m.group("key") == "Option":        # If it's the Option field, remove any punctuation (e.g., periods)
                    val = re.sub(r'[^A-Za-z ]+', '', val).strip()
                parsed[m.group("key")] = val         # Save the cleaned value to the appropriate field

        # Add the current question's result to the results list
        results.append({
            "Question Number": row['Original Number'],  # Original position in the CSV
            "Title":           title,                   # Question title
            "Questions":       question,                # Full question text
            "Option":          parsed["Option"],        # Model's answer (Agree/Disagree)
            "Reason":          parsed["Reason"]         # Model's explanation
        })

        # Wait 3 seconds before the next question to avoid hitting API rate limits
        time.sleep(3)

     # --- SAVE ALL RESPONSES FROM THIS RUN TO A CSV FILE ---

    # Convert all stored results into a pandas DataFrame
    output_df = pd.DataFrame(results, 
        columns=["Question Number", "Title", "Questions", "Option", "Reason"])
   # Create a unique timestamp for the file name (e.g., 20250501_143210)
    file_ts = datetime.now().strftime("%Y%m%d_%H%M%S")

    # Get just the file name (e.g., 'Wahlrechner Tschechien') from the full path
    base_name = os.path.splitext(os.path.basename(file_path))[0]

    # Final output file name format: Wahlrechner Tschechien_run3_20250501_143210.csv
    out_name = f"{base_name}_run{run_idx}_{file_ts}.csv"

    # Full path: outputs/Wahlrechner Tschechien_run_<timestamp>/<filename>.csv
    out_path = os.path.join(batch_folder, out_name)


✅ Run 1 saved to outputs\Wahlrechner Tschechien_run_20250501_105437\Wahlrechner Tschechien_run1_20250501_105713.csv
✅ Run 2 saved to outputs\Wahlrechner Tschechien_run_20250501_105437\Wahlrechner Tschechien_run2_20250501_105947.csv
✅ Run 3 saved to outputs\Wahlrechner Tschechien_run_20250501_105437\Wahlrechner Tschechien_run3_20250501_110221.csv
✅ Run 4 saved to outputs\Wahlrechner Tschechien_run_20250501_105437\Wahlrechner Tschechien_run4_20250501_110455.csv
✅ Run 5 saved to outputs\Wahlrechner Tschechien_run_20250501_105437\Wahlrechner Tschechien_run5_20250501_110728.csv
✅ Run 6 saved to outputs\Wahlrechner Tschechien_run_20250501_105437\Wahlrechner Tschechien_run6_20250501_111002.csv
✅ Run 7 saved to outputs\Wahlrechner Tschechien_run_20250501_105437\Wahlrechner Tschechien_run7_20250501_111237.csv
✅ Run 8 saved to outputs\Wahlrechner Tschechien_run_20250501_105437\Wahlrechner Tschechien_run8_20250501_111511.csv
✅ Run 9 saved to outputs\Wahlrechner Tschechien_run_20250501_105437\Wahl

In [37]:
import os
import glob
import pandas as pd

# 1) point at your outputs folder
base_dir = "outputs"

# 2) find exactly the 5 run files (in any one-level-deep subfolder)
pattern   = os.path.join(base_dir, "*", "*_run*.csv")
csv_files = sorted(glob.glob(pattern))
if len(csv_files) == 0:
    raise RuntimeError(f"No files matching {pattern!r}")

# 3) seed your combined DataFrame from the first file
df0 = pd.read_csv(csv_files[0])
combined = pd.DataFrame({
    "Question Number": df0["Question Number"],
    "Questions":       df0["Questions"]
})

# 4) pull in each run’s Option column
for i, path in enumerate(csv_files, start=1):
    df_run = pd.read_csv(path)
    combined[f"Option_run{i}"] = df_run["Option"]

# 5) add Status column: “not changed” if all runs agree, else “changed”
opt_cols = [f"Option_run{i}" for i in range(1, len(csv_files)+1)]
combined["Status"] = (
    combined[opt_cols]
    .nunique(axis=1)
    .apply(lambda x: "not changed" if x == 1 else "changed")
)

# 6) sort by Question Number, reset index
combined = combined.sort_values("Question Number").reset_index(drop=True)

# 7) write out the combined file
out_path = os.path.join(base_dir, "combined_runs.csv")
combined.to_csv(out_path, index=False)
print(f"✅ Combined {len(csv_files)} runs (with Status) to {out_path}")


✅ Combined 17 runs (with Status) to outputs\combined_runs.csv


In [None]:
import os
import glob
import pandas as pd

# point at your outputs folder
base_dir = "outputs"

# find exactly the 5 run files (in any one-level-deep subfolder)
pattern   = os.path.join(base_dir, "*", "*_run*.csv")
csv_files = sorted(glob.glob(pattern))
if len(csv_files) == 0:
    raise RuntimeError(f"No files matching {pattern!r}")

# seed your combined DataFrame from the first file
df0 = pd.read_csv(csv_files[0])
combined = pd.DataFrame({
    "Question Number": df0["Question Number"],
    "Questions":       df0["Questions"]
})

# pull in each run’s Option column
opt_cols = []
for i, path in enumerate(csv_files, start=1):
    df_run = pd.read_csv(path)
    col = f"Option_run{i}"
    combined[col] = df_run["Option"]
    opt_cols.append(col)

# add Status column: “not changed” if all runs agree, else “changed”
combined["Status"] = (
    combined[opt_cols]
    .nunique(axis=1)
    .apply(lambda x: "not changed" if x == 1 else "changed")
)

# compute percent agree / disagree and append ‘%’
n_runs = len(opt_cols)

def pct_str(series, label):
    count = (series == label).sum()
    pct   = count / n_runs * 100
    return f"{pct:.1f}%"

combined["Percent_Agree"] = combined[opt_cols] \
    .apply(lambda row: pct_str(row, "Agree"), axis=1)

combined["Percent_Disagree"] = combined[opt_cols] \
    .apply(lambda row: pct_str(row, "Disagree"), axis=1)

# sort by Question Number, reset index
combined = combined.sort_values("Question Number").reset_index(drop=True)

# write out the combined file
out_path = os.path.join(base_dir, "combined_runs.csv")
combined.to_csv(out_path, index=False)
print(f"✅ Combined {n_runs} runs (with Status & % columns) to {out_path}")


✅ Combined 17 runs (with Status & % columns) to outputs\combined_runs.csv
