In [5]:
import pandas as pd
import os
import json
import re

In [12]:
folder_path = "data"
dfs = []

for filename in sorted(os.listdir(folder_path)):
    if filename.endswith(".txt"):
        file_path = os.path.join(folder_path, filename)
        print(f"Processing {filename}...")

        with open(file_path, "r", encoding="utf-8") as f:
            raw = f.read().strip()

        # Optional: try to fix trailing comma issue
        if raw.endswith(","):
            raw = raw.rstrip(",")
        if not raw.startswith("["):
            raw = "[" + raw
        if not raw.endswith("]"):
            raw += "]"

        try:
            data = json.loads(raw)
            df = pd.DataFrame(data)
            dfs.append(df)
            print(f"✅ Loaded {len(df)} rows from {filename}")
        except json.JSONDecodeError as e:
            print(f"❌ Failed to load {filename}: {e}")

# Combine all
if dfs:
    df = pd.concat(dfs, ignore_index=True)
    print(f"\n✅ Total combined records: {len(df)}")
else:
    print("\n❌ No valid files loaded.")


Processing actionable_gemini1.txt...
✅ Loaded 500 rows from actionable_gemini1.txt
Processing gpt_actionable_1.txt...
✅ Loaded 171 rows from gpt_actionable_1.txt
Processing gpt_actionable_2.txt...
✅ Loaded 373 rows from gpt_actionable_2.txt
Processing meaningless_1.txt...
✅ Loaded 95 rows from meaningless_1.txt
Processing meaningless_2.txt...
✅ Loaded 162 rows from meaningless_2.txt

✅ Total combined records: 1301


In [13]:
df.head()

Unnamed: 0,text,label
0,Remind me to call Mom at 7 PM.,1
1,Set a reminder: pick up dry cleaning tomorrow.,1
2,Add 'dentist appointment' to my reminders for ...,1
3,Remind me about the team meeting in one hour.,1
4,Can you set a reminder to take out the trash t...,1


In [14]:
df.tail()

Unnamed: 0,text,label
1296,Through the echoing silence.,0
1297,Colors of invisible shade.,0
1298,In reflective refracted thought.,0
1299,Motionless movements move outside.,0
1300,Unspoken answers question forth.,0


In [15]:
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
df.head()

Unnamed: 0,text,label
0,I need to call my sister later to check in.,0
1,Summarize the latest news.,1
2,Zip zap zoom.,0
3,What’s playing at the cinema?,1
4,Do you believe in magic?,0


In [16]:
df.shape

(1301, 2)

In [18]:
df.to_csv("data.csv", index=False)
