In [None]:
import pandas as pd 
import json


df = pd.read_csv('./bai_rephrase.csv')

# Assuming your dataframe is called 'df'

# Step 1: Melt the dataframe to long format
id_vars = ['prompt_id', 'symptom', 'severity']
value_vars = ['original_text', 'rephrase_1', 'rephrase_2', 'rephrase_3', 'rephrase_4', 'rephrase_5']

df_long = df.melt(
    id_vars=id_vars,
    value_vars=value_vars,
    var_name='rephrase_source',
    value_name='prompt_text'
)

# Step 2: Create new IDs with rephrase suffix
def create_new_id(row):
    if row['rephrase_source'] == 'original_text':
        return row['prompt_id']
    else:
        # Extract number from 'rephrase_1' -> '_r1'
        rephrase_num = row['rephrase_source'].split('_')[1]
        return f"{row['prompt_id']}_r{rephrase_num}"

df_long['id'] = df_long.apply(create_new_id, axis=1)

# Step 3: Create the JSON structure
records = []
for _, row in df_long.iterrows():
    record = {
        "id": row['id'],
        "prompt_text": row['prompt_text'],
        "tags": {
            "symptom": row['symptom'],
            "severity": int(row['severity']),
            "disorder": "gad",
            "version": "v1",
            "rephrase_source": row['rephrase_source']
        }
    }
    records.append(record)

# Step 4: Save to JSONL file
with open('bai_rephrases.jsonl', 'w') as f:
    for record in records:
        f.write(json.dumps(record) + '\n')

# Preview first few records
print(f"Created {len(records)} records")
print("\nFirst 3 records:")
for record in records[:3]:
    print(json.dumps(record, indent=2))

Created 504 records

First 3 records:
{
  "id": "bai_1_0",
  "prompt_text": "This month I haven't felt any numbness",
  "tags": {
    "symptom": "Numbness",
    "severity": 0,
    "disorder": "mdd",
    "version": "v1",
    "rephrase_source": "original_text"
  }
}
{
  "id": "bai_1_1",
  "prompt_text": "This month I've felt some mild numbness and tingling but it hasn't bothered me much",
  "tags": {
    "symptom": "Numbness",
    "severity": 1,
    "disorder": "mdd",
    "version": "v1",
    "rephrase_source": "original_text"
  }
}
{
  "id": "bai_1_2",
  "prompt_text": "This month I've felt some moderate numbness and tingling and it wasn't pleasant at times",
  "tags": {
    "symptom": "Numbness",
    "severity": 2,
    "disorder": "mdd",
    "version": "v1",
    "rephrase_source": "original_text"
  }
}
