In [1]:
import pandas as pd
import ast
import json

# Load the data
file_path = "/content/drive/MyDrive/sample_1111.xlsx"
df = pd.read_excel(file_path, sheet_name="Sheet1")

# ---------- Flatten Complaint Dictionary ----------
def parse_complaint(complaint_str):
    if pd.isna(complaint_str):
        return {}
    try:
        return json.loads(complaint_str.replace("'", '"'))  # fix formatting if needed
    except:
        try:
            return ast.literal_eval(complaint_str)
        except:
            return {}

complaints = df["Complaint"].apply(parse_complaint)
complaints_df = pd.json_normalize(complaints)

# Rename complaint columns
complaints_df.columns = [f"Complaint_{c}" for c in complaints_df.columns]

# ---------- Flatten Call Flags ----------
def parse_flags(flags_str):
    if pd.isna(flags_str):
        return []
    try:
        return json.loads(flags_str.replace("'", '"'))
    except:
        try:
            return ast.literal_eval(flags_str)
        except:
            return []

flags = df["Call Flags"].apply(parse_flags)

# Convert flags into indicator columns
all_flag_titles = set()
for f in flags:
    if isinstance(f, list):
        for entry in f:
            if isinstance(entry, dict) and "title" in entry:
                all_flag_titles.add(entry["title"])

for title in all_flag_titles:
    df[f"Flag_{title.replace(' ', '_')}"] = flags.apply(
        lambda x: 1 if isinstance(x, list) and any(entry.get("title") == title for entry in x) else 0
    )

# ---------- Merge back ----------
df = pd.concat([df, complaints_df], axis=1)

# Save the cleaned dataset for Power BI
df.to_excel("/content/drive/MyDrive/final_transformed_transcripts.xlsx", index=False)

print("✅ Transformation complete. New file: transformed_transcripts.xlsx")


✅ Transformation complete. New file: transformed_transcripts.xlsx
