# 17 - Save Test Data

In [39]:
import os
from supabase import create_client
import pandas as pd
from dotenv import load_dotenv
import json
from typing import List
import matplotlib.pyplot as plt
from matplotlib.patches import Patch

import seaborn as sns

import warnings
warnings.simplefilter(action='ignore')

In [40]:
# Load .env
load_dotenv()

# Setup
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")

if not SUPABASE_URL or not SUPABASE_KEY:
    raise ValueError("Supabase credentials not found in .env")

supabase = create_client(SUPABASE_URL, SUPABASE_KEY)

In [41]:
def fetch_table_as_df(table_name: str) -> pd.DataFrame:
    try:
        response = supabase.table(table_name).select("*").execute()
        # print("Raw Supabase Response:", response)
        data = response.data
        if not data:
            print("No data found.")
            return pd.DataFrame()
        return pd.DataFrame(data)
    except Exception as e:
        print("Error fetching table:", e)
        return pd.DataFrame()


**Result Structure**

```python

session_data = {
    "session_id": session_id,               # str (UUID)
    "user_group": payload.user_group,       # str
    "session_time": elapsed,                # float (seconds)
    "rounds": payload.rounds,               # list (likely list of dicts)
    "feedback_time": payload.feedback_time, # float
    "feedback_answers": payload.feedback_answers,  # dict
    "created_at": end_time.isoformat()      # str (ISO timestamp)
}

```

**example data:**

```python
{
  "session_id": "4b53c577-fb32-4d77-bc2f-a22e82bd8aa3",
  "user_group": "interactive",
  "sessionTime": null,
  "rounds": [
    {
      "round_number": 1,
      "candidate_count": 2,
      "invited_count": 1,
      "round_duration": 25.38,
      "next_round_clicked": false,
      "candidates": [
        {
          "candidate_id": 1317,
          "name": "Alessandra Huynh",
          "attributes": {
            "age": 52,
            "sex": "Female",
            "race": "White",
            "years_experience": 0,
            "technical_skills_score": 0,
            "certifications_score": 0
          },
          "good_fit": false,
          "recommended": false,
          "invited": true,
          "manipulated": false,
          "hover_events": []
        },
        {
          "candidate_id": 548,
          "name": "Saniya Bradley",
          "attributes": {
            "age": 38,
            "sex": "Female",
            "race": "White",
            "years_experience": 0,
            "technical_skills_score": 0,
            "certifications_score": 5
          },
          "good_fit": true,
          "recommended": true,
          "invited": false,
          "manipulated": true,
          "manipulations": [
            {
              "changed_attribute": "race",
              "new_value": "Black",
              "prediction_probability": 0.8899999856948853,
              "is_good_fit": true,
              "xai_features": [
                {
                  "Feature": "Education",
                  "SHAP Value": 0.6694134473800659
                },
                {
                  "Feature": "Basic Safety Certification",
                  "SHAP Value": 0.4990656077861786
                },
                {
                  "Feature": "ExperienceCategory",
                  "SHAP Value": 0.4310246407985687
                }
              ],
              "timestamp": "2025-03-31T12:01:26.847Z"
            },
            {
              "changed_attribute": "race",
              "new_value": "Asian",
              "prediction_probability": 0.8899999856948853,
              "is_good_fit": true,
              "xai_features": [
                {
                  "Feature": "Education",
                  "SHAP Value": 0.6694134473800659
                },
                {
                  "Feature": "Basic Safety Certification",
                  "SHAP Value": 0.4990656077861786
                },
                {
                  "Feature": "ExperienceCategory",
                  "SHAP Value": 0.4310246407985687
                }
              ],
              "timestamp": "2025-03-31T12:01:28.753Z"
            },
            {
              "changed_attribute": "race",
              "new_value": "White",
              "prediction_probability": 0.8899999856948853,
              "is_good_fit": true,
              "xai_features": [
                {
                  "Feature": "Education",
                  "SHAP Value": 0.6694134473800659
                },
                {
                  "Feature": "Basic Safety Certification",
                  "SHAP Value": 0.4990656077861786
                },
                {
                  "Feature": "ExperienceCategory",
                  "SHAP Value": 0.4310246407985687
                }
              ],
              "timestamp": "2025-03-31T12:01:31.384Z"
            },
            {
              "changed_attribute": "age",
              "new_value": "20-30",
              "prediction_probability": 0.8899999856948853,
              "is_good_fit": true,
              "xai_features": [
                {
                  "Feature": "Education",
                  "SHAP Value": 0.6694134473800659
                },
                {
                  "Feature": "Basic Safety Certification",
                  "SHAP Value": 0.4990656077861786
                },
                {
                  "Feature": "ExperienceCategory",
                  "SHAP Value": 0.4310246407985687
                }
              ],
              "timestamp": "2025-03-31T12:01:37.255Z"
            },
            {
              "changed_attribute": "age",
              "new_value": "50-60",
              "prediction_probability": 0.8899999856948853,
              "is_good_fit": true,
              "xai_features": [
                {
                  "Feature": "Education",
                  "SHAP Value": 0.6694134473800659
                },
                {
                  "Feature": "Basic Safety Certification",
                  "SHAP Value": 0.4990656077861786
                },
                {
                  "Feature": "ExperienceCategory",
                  "SHAP Value": 0.4310246407985687
                }
              ],
              "timestamp": "2025-03-31T12:01:37.917Z"
            },
            {
              "changed_attribute": "age",
              "new_value": "30-40",
              "prediction_probability": 0.8899999856948853,
              "is_good_fit": true,
              "xai_features": [
                {
                  "Feature": "Education",
                  "SHAP Value": 0.6694134473800659
                },
                {
                  "Feature": "Basic Safety Certification",
                  "SHAP Value": 0.4990656077861786
                },
                {
                  "Feature": "ExperienceCategory",
                  "SHAP Value": 0.4310246407985687
                }
              ],
              "timestamp": "2025-03-31T12:01:38.760Z"
            },
            {
              "changed_attribute": "gender",
              "new_value": "Male",
              "prediction_probability": 0.8899999856948853,
              "is_good_fit": true,
              "xai_features": [
                {
                  "Feature": "Education",
                  "SHAP Value": 0.6694134473800659
                },
                {
                  "Feature": "Basic Safety Certification",
                  "SHAP Value": 0.4990656077861786
                },
                {
                  "Feature": "ExperienceCategory",
                  "SHAP Value": 0.4310246407985687
                }
              ],
              "timestamp": "2025-03-31T12:01:39.602Z"
            },
            {
              "changed_attribute": "gender",
              "new_value": "Female",
              "prediction_probability": 0.8899999856948853,
              "is_good_fit": true,
              "xai_features": [
                {
                  "Feature": "Education",
                  "SHAP Value": 0.6694134473800659
                },
                {
                  "Feature": "Basic Safety Certification",
                  "SHAP Value": 0.4990656077861786
                },
                {
                  "Feature": "ExperienceCategory",
                  "SHAP Value": 0.4310246407985687
                }
              ],
              "timestamp": "2025-03-31T12:01:40.167Z"
            }
          ],
          "hover_events": [
            {
              "feature": "Education",
              "hover_duration": 2.41
            },
            {
              "feature": "ExperienceCategory",
              "hover_duration": 2.01
            },
            {
              "feature": "Basic Safety Certification",
              "hover_duration": 0.43
            },
            {
              "feature": "ExperienceCategory",
              "hover_duration": 0.29
            },
            {
              "feature": "Basic Safety Certification",
              "hover_duration": 0.3
            }
          ]
        }
      ]
    },
    {
      "round_number": 2,
      "candidate_count": 2,
      "invited_count": 1,
      "round_duration": 1.008,
      "next_round_clicked": true,
      "candidates": [
        {
          "candidate_id": 776,
          "name": "Lilianna Mccall",
          "attributes": {
            "age": 36,
            "sex": "Female",
            "race": "White",
            "years_experience": 0,
            "technical_skills_score": 0,
            "certifications_score": 5
          },
          "good_fit": true,
          "recommended": true,
          "invited": false,
          "manipulated": false,
          "hover_events": []
        },
        {
          "candidate_id": 335,
          "name": "Yadira Mcmillan",
          "attributes": {
            "age": 46,
            "sex": "Female",
            "race": "White",
            "years_experience": 0,
            "technical_skills_score": 0,
            "certifications_score": 2
          },
          "good_fit": false,
          "recommended": false,
          "invited": false,
          "manipulated": false,
          "hover_events": []
        }
      ]
    },
    {
      "round_number": 3,
      "candidate_count": 2,
      "invited_count": 1,
      "round_duration": 11.236,
      "next_round_clicked": true,
      "candidates": [
        {
          "candidate_id": 288,
          "name": "Addisyn Aguilar",
          "attributes": {
            "age": 44,
            "sex": "Male",
            "race": "White",
            "years_experience": 0,
            "technical_skills_score": 0,
            "certifications_score": 5
          },
          "good_fit": true,
          "recommended": true,
          "invited": false,
          "manipulated": false,
          "hover_events": []
        },
        {
          "candidate_id": 1055,
          "name": "Aspen Reyes",
          "attributes": {
            "age": 48,
            "sex": "Male",
            "race": "Black",
            "years_experience": 0,
            "technical_skills_score": 0,
            "certifications_score": 5
          },
          "good_fit": false,
          "recommended": false,
          "invited": false,
          "manipulated": false,
          "hover_events": []
        }
      ]
    },
    {
      "round_number": 4,
      "candidate_count": 2,
      "invited_count": 2,
      "round_duration": 1.126,
      "next_round_clicked": false,
      "candidates": [
        {
          "candidate_id": 1026,
          "name": "Tara Alvarado",
          "attributes": {
            "age": 58,
            "sex": "Female",
            "race": "Black",
            "years_experience": 0,
            "technical_skills_score": 0,
            "certifications_score": 2
          },
          "good_fit": true,
          "recommended": true,
          "invited": true,
          "manipulated": false,
          "hover_events": []
        },
        {
          "candidate_id": 889,
          "name": "Kenneth Singleton",
          "attributes": {
            "age": 46,
            "sex": "Female",
            "race": "White",
            "years_experience": 0,
            "technical_skills_score": 0,
            "certifications_score": 5
          },
          "good_fit": false,
          "recommended": false,
          "invited": false,
          "manipulated": false,
          "hover_events": []
        }
      ]
    },
    {
      "round_number": 5,
      "candidate_count": 2,
      "invited_count": 3,
      "round_duration": 1.57,
      "next_round_clicked": false,
      "candidates": [
        {
          "candidate_id": 1150,
          "name": "Athena Moore",
          "attributes": {
            "age": 45,
            "sex": "Male",
            "race": "White",
            "years_experience": 0,
            "technical_skills_score": 0,
            "certifications_score": 2
          },
          "good_fit": true,
          "recommended": true,
          "invited": true,
          "manipulated": false,
          "hover_events": []
        },
        {
          "candidate_id": 62,
          "name": "Marquise Santana",
          "attributes": {
            "age": 46,
            "sex": "Female",
            "race": "White",
            "years_experience": 0,
            "technical_skills_score": 0,
            "certifications_score": 0
          },
          "good_fit": false,
          "recommended": false,
          "invited": false,
          "manipulated": false,
          "hover_events": []
        }
      ]
    },
    {
      "round_number": 6,
      "candidate_count": 2,
      "invited_count": 4,
      "round_duration": 1.265,
      "next_round_clicked": false,
      "candidates": [
        {
          "candidate_id": 157,
          "name": "Ann Montgomery",
          "attributes": {
            "age": 44,
            "sex": "Male",
            "race": "White",
            "years_experience": 0,
            "technical_skills_score": 0,
            "certifications_score": 2
          },
          "good_fit": false,
          "recommended": false,
          "invited": true,
          "manipulated": false,
          "hover_events": []
        },
        {
          "candidate_id": 796,
          "name": "Quintin Hicks",
          "attributes": {
            "age": 52,
            "sex": "Female",
            "race": "White",
            "years_experience": 0,
            "technical_skills_score": 0,
            "certifications_score": 0
          },
          "good_fit": true,
          "recommended": true,
          "invited": false,
          "manipulated": false,
          "hover_events": []
        }
      ]
    }
  ],
  "candidate_hover_events": {},
  "feedbackTime": 23.916,
  "feedbackAnswers": {
    "question1": "1",
    "question2": "1",
    "question3": "1",
    "question4": "1",
    "question5": "1",
    "question6": "7",
    "question7": "7",
    "question8": "7",
    "question9": "7",
    "question10": "7",
    "question11": "2",
    "question12": "2",
    "question13": "2",
    "question14": "1",
    "question15": "1",
    "question16": "1",
    "question17": "1",
    "question18": "1",
    "question19": "1",
    "question20": "1",
    "question21": "1",
    "question22": "1",
    "question23": "1",
    "question24": "1",
    "question25": "1"
  }
}
```

## 0. **Setup & Data Preparation**

### 0.1 Load Data
- Fetch `session_results` from Supabase  
- Inspect shape, column types

### 0.2 Flatten Nested Structure
Create separate DataFrames:
- `df_sessions`: one row per session
- `df_rounds`: one row per session-round pair
- `df_candidates`: one row per candidate (includes recommendation + invitation info)
- `df_manipulations`: one row per manipulation action (if any)


In [42]:
# Step 1: Flatten Sessions
def flatten_sessions(df: pd.DataFrame) -> pd.DataFrame:
    return df[[
        'session_id', 'user_group', 'session_time',
        'feedback_time', 'feedback_answers', 'created_at'
    ]].copy()

# Step 2: Flatten Rounds
def flatten_rounds(df: pd.DataFrame) -> pd.DataFrame:
    all_rounds = []
    for _, row in df.iterrows():
        # Check if 'rounds' is a string; if so, parse it.
        rounds_data = row.get('rounds', [])
        if isinstance(rounds_data, str):
            try:
                rounds_data = json.loads(rounds_data)
            except Exception as e:
                print(f"Error parsing rounds for session {row['session_id']}: {e}")
                rounds_data = []
        for round_obj in rounds_data:
            round_flat = {
                'session_id': row['session_id'],
                'user_group': row['user_group'],
                'round_number': round_obj.get('round_number'),
                'round_duration': round_obj.get('round_duration'),
                'candidate_count': round_obj.get('candidate_count'),
                'invited_count': round_obj.get('invited_count'),
                'next_round_clicked': round_obj.get('next_round_clicked', None)
            }
            all_rounds.append(round_flat)
    return pd.DataFrame(all_rounds)

# Step 3: Flatten Candidates
def flatten_candidates(df: pd.DataFrame) -> pd.DataFrame:
    all_candidates = []
    for _, row in df.iterrows():
        rounds_data = row.get('rounds', [])
        if isinstance(rounds_data, str):
            try:
                rounds_data = json.loads(rounds_data)
            except Exception as e:
                print(f"Error parsing rounds for session {row['session_id']}: {e}")
                rounds_data = []
        for round_obj in rounds_data:
            round_number = round_obj.get('round_number')
            # Ensure candidates is a list; if stored as a string, parse it.
            candidates = round_obj.get('candidates', [])
            if isinstance(candidates, str):
                try:
                    candidates = json.loads(candidates)
                except Exception as e:
                    print(f"Error parsing candidates for session {row['session_id']}, round {round_number}: {e}")
                    candidates = []
            for cand in candidates:
                attr = cand.get('attributes', {})
                flat = {
                    'session_id': row['session_id'],
                    'user_group': row['user_group'],
                    'round_number': round_number,
                    'candidate_id': cand.get('candidate_id'),
                    'name': cand.get('name'),
                    'invited': cand.get('invited'),
                    'recommended': cand.get('recommended'),
                    'good_fit': cand.get('good_fit'),
                    'manipulated': cand.get('manipulated'),
                    'sex': attr.get('sex'),
                    'race': attr.get('race'),
                    'age': attr.get('age'),
                    'years_experience': attr.get('years_experience'),
                    'degree_score': attr.get('degree_score'),
                    'technical_skills_score': attr.get('technical_skills_score'),
                    'certifications_score': attr.get('certifications_score'),
                    'hover_events': cand.get('hover_events', []),
                    'manipulations': cand.get('manipulations', [])
                }
                all_candidates.append(flat)
    return pd.DataFrame(all_candidates)

# Step 4: Flatten Manipulations: one row per manipulation event
def flatten_manipulations(candidates_df: pd.DataFrame) -> pd.DataFrame:
    manip_list = []
    for _, row in candidates_df.iterrows():
        manipulations = row.get('manipulations', [])
        # Debug: print candidate id if manipulations exist
        if manipulations:
            print(f"Candidate {row['candidate_id']} has {len(manipulations)} manipulation(s).")
        for m in manipulations:
            manip_flat = {
                'session_id': row['session_id'],
                'user_group': row['user_group'],
                'round_number': row['round_number'],
                'candidate_id': row['candidate_id'],
                'changed_attribute': m.get('changed_attribute'),
                'new_value': m.get('new_value'),
                'prediction_probability': m.get('prediction_probability'),
                'is_good_fit': m.get('is_good_fit'),
                'xai_features': m.get('xai_features'),
                'timestamp': m.get('timestamp')
            }
            manip_list.append(manip_flat)
    return pd.DataFrame(manip_list)

In [43]:
# Execute all flattening
df_raw = fetch_table_as_df("session_results")
df_sessions = flatten_sessions(df_raw)
df_rounds = flatten_rounds(df_raw)
df_candidates = flatten_candidates(df_raw)
df_manipulations = flatten_manipulations(df_candidates)

Candidate 453 has 13 manipulation(s).
Candidate 881 has 5 manipulation(s).
Candidate 1257 has 5 manipulation(s).
Candidate 1162 has 4 manipulation(s).
Candidate 1052 has 2 manipulation(s).
Candidate 796 has 1 manipulation(s).
Candidate 115 has 13 manipulation(s).
Candidate 1026 has 10 manipulation(s).
Candidate 1247 has 4 manipulation(s).
Candidate 143 has 1 manipulation(s).
Candidate 595 has 1 manipulation(s).
Candidate 1149 has 1 manipulation(s).
Candidate 1219 has 2 manipulation(s).
Candidate 570 has 3 manipulation(s).
Candidate 143 has 2 manipulation(s).
Candidate 807 has 1 manipulation(s).
Candidate 1052 has 3 manipulation(s).


In [44]:
output_dir = "../data/experiment_results/test/"
os.makedirs(output_dir, exist_ok=True)

df_sessions.to_csv(os.path.join(output_dir, "sessions.csv"), index=False)
df_rounds.to_csv(os.path.join(output_dir, "rounds.csv"), index=False)
df_candidates.to_csv(os.path.join(output_dir, "candidates.csv"), index=False)
df_manipulations.to_csv(os.path.join(output_dir, "manipulations.csv"), index=False)
