In [None]:
"""Extract and if wanted subset the responses from the multijail results.
This script reads the JSONL files containing the model responses and extracts the relevant fields."""

import pandas as pd
from utils import check
import json
import os

BASE_PATH = "C:\\Users\\emste\\Documents\\cloned_Gits\\model_steering_multilingual\\results\\multijail\\meta-llama__meta-llama-3-8b-instruct"
MODEL_NAME = "meta-llama__meta-llama-3-8b-instruct_"
OUTPUT_PATH = "C:\\Users\\emste\\Documents\\cloned_Gits\\model_steering_multilingual\\data"


SAMPLE_SIZE = 150 

sample_path_to_name= {
    "samples_multijail_2025-07-16T14-54-59.581312.jsonl": "baseline",
    "samples_multijail_2025-07-16T15-09-40.878560.jsonl": "steered0.33",
    "samples_multijail_2025-07-16T15-19-24.223812.jsonl": "steered0.66",
    "samples_multijail_2025-07-16T15-30-50.120362.jsonl": "steered1.0"}

full_df = pd.DataFrame()

for path, steer_strength in sample_path_to_name.items():
    full_path = os.path.join(BASE_PATH, path)
    subset_name=MODEL_NAME+steer_strength

    print(f"Processing file: {path} with steer strength: {steer_strength}")

    data = []

    with open(full_path, "r", encoding="utf-8") as file:
        for line in file:
            line = line.strip()
            if line:
                data.append(json.loads(line))

    individual_df = pd.DataFrame(data)

    # return df for this steering strength with prompt, response and steer_strength identifier
    prompts = individual_df["doc"].apply(lambda x: x['prompt'] if isinstance(x, dict) else x) # the prompt column contains more than just prompt, its a dict with generation params.
    answers = individual_df["resps"]
    steer_strength = pd.Series([steer_strength]*len(prompts))
    lang_id = individual_df["doc"].apply(lambda x: x['id'] if isinstance(x, dict) else x)

    frame = {'prompt': prompts,
         'response': answers,
         "steer_strength": subset_name,
         "lang_id": lang_id}

    individual_df = pd.DataFrame(frame)

    if SAMPLE_SIZE:
        individual_df = individual_df[:SAMPLE_SIZE]

    # We append to the full dataframe
    full_df = pd.concat([full_df, individual_df], ignore_index=True)

full_df.to_csv(os.path.join(OUTPUT_PATH, f"multijail_clean_combined_sample{SAMPLE_SIZE}_{MODEL_NAME}.csv"), index=False)
print(f"\n=== Final DataFrame ===")
print(f"Total rows: {len(full_df)}")
print(full_df.info())

print((f"Output has been written to: {OUTPUT_PATH}"))

Processing file: samples_multijail_2025-07-16T14-54-59.581312.jsonl with steer strength: baseline
Processing file: samples_multijail_2025-07-16T15-09-40.878560.jsonl with steer strength: steered0.33
Processing file: samples_multijail_2025-07-16T15-19-24.223812.jsonl with steer strength: steered0.66
Processing file: samples_multijail_2025-07-16T15-30-50.120362.jsonl with steer strength: steered1.0

=== Final DataFrame ===
Total rows: 600
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   prompt          600 non-null    object
 1   response        600 non-null    object
 2   steer_strength  600 non-null    object
 3   lang_id         600 non-null    object
dtypes: object(4)
memory usage: 18.9+ KB
None
Output has been written to: C:\Users\emste\Documents\cloned_Gits\model_steering_multilingual\data
