In [1]:
baseline_path = '/home/ubuntu/portiloop-training/portinight_baseline2'
overfitting_path = '/home/ubuntu/portiloop-training/portinight_overfitting'

In [14]:
import pandas as pd
import json
import os

def create_df_from_portinight_RMS(overfitting_path, baseline_path):
    configs = ['Baseline', 'AdaThresh'] #, 'config_2', 'config_3', 'config_4', 'config_5']
    experiment_names_baseline = ['Baseline', 'AdaThresh'] #, 'Train', 'Both', 'TrainAlpha', 'TrainFreeze']
    
    rows = []
    baseline_data = {}
    for file_name in os.listdir(baseline_path):
        if file_name.endswith('.json'):
            file_path = os.path.join(baseline_path, file_name)
            with open(file_path, 'r') as file:
                json_data = json.load(file)
                # Append to the baseline data
                for night in json_data.keys():
                    baseline_data[night] = json_data[night]
                # baseline_data = json.load(file)

    for file_name in os.listdir(overfitting_path):
        if file_name.endswith('.json'):
            file_path = os.path.join(overfitting_path, file_name)
            with open(file_path, 'r') as file:
                json_data = json.load(file)

        night_num = -1
        for night in json_data.keys():
            if night not in baseline_data.keys():
                continue
            for experiment_name in json_data[night].keys():
                for rms_score in json_data[night][experiment_name]['metrics']['detect_spindle_metrics']['rms_scores']:
                    new_row = {}
                    new_row['night'] = night
                    night_num = json_data[night][experiment_name]['config']['night_num']
                    new_row['night_num'] = night_num
                    new_row['config'] = experiment_name
                    new_row['rms_score'] = rms_score

                    rows.append(new_row)

            if night_num == -1:
                continue
            for idx, config in enumerate(configs):
                for rms_score in baseline_data[night][config]['metrics']['detect_spindle_metrics']['rms_scores']:
                    new_row = {}
                    new_row['night'] = night
                    new_row['night_num'] = night_num
                    new_row['config'] = experiment_names_baseline[idx]
                    new_row['rms_score'] = rms_score
                    rows.append(new_row)

            

    df = pd.DataFrame(rows)
    # Remove all NaNs from the dataframe
    df = df.dropna()

    return df  

def create_df_from_portinight(overfitting_path, baseline_path):

    # First, we must get a dictionary with all necessary values from the baseline
    baseline_data = {}

    configs_baseline = ['Baseline', 'AdaThresh'] #, 'config_2', 'config_3', 'config_4', 'config_5']
    experiment_names_baseline = ['Baseline', 'AdaThresh'] #, 'Train', 'Both', 'TrainAlpha', 'TrainFreeze']

    for file_name in os.listdir(baseline_path):
        if file_name.endswith('.json'):
            file_path = os.path.join(baseline_path, file_name)
            with open(file_path, 'r') as file:
                json_data = json.load(file)

        for night in json_data.keys():
            baseline_data[night] = {}
            for idx, config in enumerate(configs_baseline):
                baseline_data[night][experiment_names_baseline[idx]] = {}
                baseline_data[night][experiment_names_baseline[idx]]['f1'] = json_data[night][config]['metrics']['detect_spindle_metrics_ola7gt']['f1']
                baseline_data[night][experiment_names_baseline[idx]]['precision'] = json_data[night][config]['metrics']['detect_spindle_metrics_ola7gt']['precision']
                baseline_data[night][experiment_names_baseline[idx]]['recall'] = json_data[night][config]['metrics']['detect_spindle_metrics_ola7gt']['recall']
                baseline_data[night][experiment_names_baseline[idx]]['rms_avg'] = json_data[night][config]['metrics']['detect_spindle_metrics']['avg_rms_score']
                baseline_data[night][experiment_names_baseline[idx]]['f1base'] = json_data[night][config]['metrics']['detect_spindle_metrics']['f1']
                baseline_data[night][experiment_names_baseline[idx]]['detect_spindles'] = json_data[night][config]['metrics']['detect_spindle_metrics_ola7gt']['tp'] + json_data[night][config]['metrics']['detect_spindle_metrics_ola7gt']['fp']
            baseline_data[night]['num_spindles_sla7'] = json_data[night]['Baseline']['metrics']['detect_spindle_metrics_ola7gt']['tp'] + json_data[night]['Baseline']['metrics']['detect_spindle_metrics_ola7gt']['fn']

    rows = []
    for file_name in os.listdir(overfitting_path):
        if file_name.endswith('.json'):
            file_path = os.path.join(overfitting_path, file_name)
            with open(file_path, 'r') as file:
                json_data = json.load(file)
        
        for night in json_data.keys():
            new_row = {}
            new_row['night'] = night
            if night not in baseline_data.keys():
                continue
            new_row['num_spindles_sla7'] = baseline_data[night]['num_spindles_sla7']
            new_row['night_num'] = json_data[night]['BothFreeze']['config']['night_num']
            # Add baseline data to the row
            for experiment_name in experiment_names_baseline:
                for key in baseline_data[night][experiment_name].keys():
                    new_row[f"{experiment_name} - {key}"] = baseline_data[night][experiment_name][key]

            # Add overfitting data to the row
            for experiment_name in json_data[night].keys():
                new_row[f"{experiment_name} - f1"] = json_data[night][experiment_name]['metrics']['detect_spindle_metrics_ola7gt']['f1']
                new_row[f"{experiment_name} - precision"] = json_data[night][experiment_name]['metrics']['detect_spindle_metrics_ola7gt']['precision']
                new_row[f"{experiment_name} - recall"] = json_data[night][experiment_name]['metrics']['detect_spindle_metrics_ola7gt']['recall']
                new_row[f"{experiment_name} - rms_avg"] = json_data[night][experiment_name]['metrics']['detect_spindle_metrics']['avg_rms_score']
                new_row[f"{experiment_name} - f1base"] = json_data[night][experiment_name]['metrics']['detect_spindle_metrics']['f1']
                new_row[f"{experiment_name} - detect_spindles"] = json_data[night][experiment_name]['metrics']['detect_spindle_metrics_ola7gt']['tp'] + json_data[night][experiment_name]['metrics']['detect_spindle_metrics_ola7gt']['fp']
            
            rows.append(new_row)

    df = pd.DataFrame(rows)
    return df

def create_baselinedf_from_portinight(folder_path):
    configs = ['config_0', 'config_1'] #, 'config_2', 'config_3', 'config_4', 'config_5']
    experiment_names = ['Baseline', 'AdaThresh'] #, 'Train', 'Both', 'TrainAlpha', 'TrainFreeze']
    headers =  [f"{experiment_name} - f1" for experiment_name in experiment_names] 
    headers +=  [f"{experiment_name} - precision" for experiment_name in experiment_names]
    headers +=  [f"{experiment_name} - recall" for experiment_name in experiment_names]
    headers +=  [f"{experiment_name} - rms_score" for experiment_name in experiment_names]    
    headers +=  [f"{experiment_name} - f1base" for experiment_name in experiment_names]
    headers +=  [f"{experiment_name} - detect_spindles" for experiment_name in experiment_names]
    headers +=  [f"num_spindles_sla7"]
    headers += ['Night']

    data = []

    for file_name in os.listdir(folder_path):
        if file_name.endswith('.json'):
            file_path = os.path.join(folder_path, file_name)
            with open(file_path, 'r') as file:
                json_data = json.load(file)

        for night in json_data.keys():
            subject_data = [json_data[night][config]['metrics']['detect_spindle_metrics_ola7gt']['f1'] for config in configs]
            subject_data += [json_data[night][config]['metrics']['detect_spindle_metrics_ola7gt']['precision'] for config in configs]
            subject_data += [json_data[night][config]['metrics']['detect_spindle_metrics_ola7gt']['recall'] for config in configs]
            subject_data += [json_data[night][config]['metrics']['detect_spindle_metrics']['avg_rms_score'] for config in configs]
            subject_data += [json_data[night][config]['metrics']['detect_spindle_metrics']['f1'] for config in configs]
            subject_data += [json_data[night][config]['metrics']['detect_spindle_metrics_ola7gt']['tp'] + json_data[night][config]['metrics']['detect_spindle_metrics_ola7gt']['fp'] for config in configs]
            subject_data += [json_data[night]['config_0']['metrics']['detect_spindle_metrics_ola7gt']['tp'] + json_data[night]['config_0']['metrics']['detect_spindle_metrics_ola7gt']['fn']]
            subject_data += [json_data[night]['config_0']['config']['night_num']]

            data.append(subject_data)

    df = pd.DataFrame(data, columns=headers)

    # Remove rows with 0.0 in the first column
    # df = df[(df[0] != 0.0)]

    # df.to_csv('portinight_baseline.csv', index=False, header=headers)

    return df

In [15]:
test_over = create_df_from_portinight(overfitting_path, baseline_path)
test_rms = create_df_from_portinight_RMS(overfitting_path, baseline_path)

In [19]:
test_over.to_csv('portinight_overfitting.csv', index=False)
test_rms.to_csv('portinight_overfitting_rms.csv', index=False)

In [16]:
baseline_data = {}
configs_baseline = ['Baseline', 'AdaThresh'] #, 'config_2', 'config_3', 'config_4', 'config_5']
experiment_names_baseline = ['Baseline', 'AdaThresh'] #, 'Train', 'Both', 'TrainAlpha', 'TrainFreeze']

# Get the data for one specific night
for file_name in os.listdir(baseline_path):
    if file_name.endswith('.json'):
        file_path = os.path.join(baseline_path, file_name)
        with open(file_path, 'r') as file:
            json_data = json.load(file)

    for night in json_data.keys():
        baseline_data[night] = {}
        for idx, config in enumerate(configs_baseline):
            num_spindles_detected = json_data[night][config]['metrics']['detect_spindle_metrics_ola7gt']['tp'] + json_data[night][config]['metrics']['detect_spindle_metrics_ola7gt']['fp']
            baseline_data[night][experiment_names_baseline[idx]] = (len(json_data[night][config]['metrics']['detect_spindle_metrics']['rms_scores']), num_spindles_detected)

In [17]:
# Load the first night in the overfitting path
first_nights = {}
for file_name in os.listdir(overfitting_path):
    if file_name.endswith('.json'):
        file_path = os.path.join(overfitting_path, file_name)
        with open(file_path, 'r') as file:
            json_data = json.load(file)

    night_0 = ""
    for night in json_data.keys():
        if json_data[night][list(json_data[night].keys())[0]]['config']['night_num'] == 0:
            night_0 = night
            break
    first_nights[night_0] = {}
    for experiment_name in json_data[night_0].keys():
        if experiment_name == 'JustTrain':
            new_name = 'Baseline'
        elif experiment_name == 'BothFreeze':
            new_name = 'AdaThresh'
        else:
            continue
        num_spindles_detected = json_data[night_0][experiment_name]['metrics']['detect_spindle_metrics_ola7gt']['tp'] + json_data[night_0][experiment_name]['metrics']['detect_spindle_metrics_ola7gt']['fp']
        first_nights[night_0][new_name] = (len(json_data[night_0][experiment_name]['metrics']['detect_spindle_metrics']['rms_scores']), num_spindles_detected)


In [18]:
night2watch = 'PN_04_GB_Night4'

print(first_nights[night2watch]['Baseline'])
print(baseline_data[night2watch]['Baseline'])

print(first_nights[night2watch]['AdaThresh'])
print(baseline_data[night2watch]['AdaThresh'])

(10931, 0)
(10931, 10931)
(3252, 0)
(3252, 3252)


In [37]:
mydf = create_df_from_portinight(interested_path)
mydf.head()

# Remove rows with 0.0 in the column for num spindles
mydf = mydf[(mydf['num_spindles_sla7'] != 0.0)]

# Remove rows with NaNs
mydf = mydf.dropna()

# Save to a CSV file
mydf.to_csv('portinight_baseline.csv', index=False, header=True)


In [30]:
mydf

Unnamed: 0,Baseline - f1,AdaThresh - f1,Baseline - precision,AdaThresh - precision,Baseline - recall,AdaThresh - recall,Baseline - rms_score,AdaThresh - rms_score,Baseline - f1base,AdaThresh - f1base,Baseline - detect_spindles,AdaThresh - detect_spindles,num_spindles_sla7,Night
0,0.0,0.0,0.0,0.0,0.0,0.0,1.10873,1.10873,0.0,0.0,1786,1786,11,0
1,0.268984,0.370523,0.159987,0.245499,0.84396,0.755034,1.973734,2.382583,0.250734,0.370796,3144,1833,596,0
2,0.180822,0.240838,0.1056,0.154362,0.628571,0.547619,2.006955,1.985524,0.0,0.0,1250,745,210,0
3,0.225725,0.328515,0.129773,0.206044,0.866091,0.809935,2.051673,2.357743,0.0,0.0,3090,1820,463,0
4,0.124333,0.135857,0.066577,0.073701,0.938389,0.867299,2.417044,2.540596,0.0,0.0,2974,2483,211,0
5,0.274523,0.359551,0.165189,0.232558,0.811881,0.792079,1.98415,2.178613,0.0,0.0,2482,1720,505,0
6,0.0,0.0,0.0,0.0,0.0,0.0,3.195437,2.156517,0.0,0.0,23,49,38,0
7,0.215501,0.177444,0.180952,0.13082,0.266355,0.275701,1.88023,1.72641,0.222222,0.154341,315,451,214,0
8,0.173746,0.224239,0.096249,0.133929,0.891803,0.688525,2.185811,2.139973,0.17881,0.222812,2826,1568,305,0
9,0.205382,0.237963,0.115559,0.148583,0.922222,0.597222,2.891181,2.75974,0.197523,0.247228,2873,1447,360,0


In [46]:
import pandas as pd
import json
import os

def create_df_from_portinight_rms(folder_path):
    configs = ['config_0', 'config_1'] #, 'config_2', 'config_3', 'config_4', 'config_5']
    experiment_names = ['Baseline', 'AdaThresh'] #, 'Train', 'Both', 'TrainAlpha', 'TrainFreeze']
    
    data = [[], []]

    for file_name in os.listdir(folder_path):
        if file_name.endswith('.json'):
            file_path = os.path.join(folder_path, file_name)
            with open(file_path, 'r') as file:
                json_data = json.load(file)

        for night in json_data.keys():
            rms_scores = [json_data[night][config]['metrics']['detect_spindle_metrics']['rms_scores'] for config in configs]
            for index, sublist in enumerate(rms_scores):
                data[index] += sublist
        # for night in json_data.keys():
        #     subject_data = [json_data[night][config]['metrics']['detect_spindle_metrics_ola7gt']['f1'] for config in configs]
        #     subject_data += [json_data[night][config]['metrics']['detect_spindle_metrics_ola7gt']['precision'] for config in configs]
        #     subject_data += [json_data[night][config]['metrics']['detect_spindle_metrics_ola7gt']['recall'] for config in configs]
        #     subject_data += [json_data[night][config]['metrics']['detect_spindle_metrics']['avg_rms_score'] for config in configs]
        #     subject_data += [json_data[night][config]['metrics']['detect_spindle_metrics']['f1'] for config in configs]
        #     subject_data += [json_data[night][config]['metrics']['detect_spindle_metrics_ola7gt']['tp'] + json_data[night][config]['metrics']['detect_spindle_metrics_ola7gt']['fp'] for config in configs]
        #     subject_data += [json_data[night]['config_0']['metrics']['detect_spindle_metrics_ola7gt']['tp'] + json_data[night]['config_0']['metrics']['detect_spindle_metrics_ola7gt']['fn']]
        #     subject_data += [json_data[night]['config_0']['config']['night_num']]

        #     data.append(subject_data)
                
    return data
    # return df

In [47]:
data = create_df_from_portinight_rms(interested_path)

In [51]:
len(data[0])

85242

In [61]:
# Create a pandas dataframe
df1 = pd.DataFrame(data[0], columns=['rms_scores'])
df1['Experiment'] = 'Baseline'
df2 = pd.DataFrame(data[1], columns=['rms_scores'])
df2['Experiment'] = 'AdaThresh'

df = pd.concat([df1, df2])

# Remove all values that are above 100 or NaN
df = df[(df['rms_scores'] < 100.0) & (df['rms_scores'] > 0.0)]
df.dropna()


# Save to a CSV file
df.to_csv('portinight_baseline_rms.csv', index=False, header=True)


In [20]:
configs = ['AdaThresh'] #, 'config_2', 'config_3', 'config_4', 'config_5']
experiment_names = ['AdaThresh'] #, 'Train', 'Both', 'TrainAlpha', 'TrainFreeze']

data_thresholds = []
night_num = 0

for file_name in os.listdir(baseline_path):
    if file_name.endswith('.json'):
        file_path = os.path.join(baseline_path, file_name)
        with open(file_path, 'r') as file:
            json_data = json.load(file)


    for night in json_data.keys():
        subject_id = json_data[night]['AdaThresh']['config']['subject'][:5]
        used_thresholds = json_data[night]['AdaThresh']['metrics']['threshold_metrics']
        for threshold in used_thresholds:
            subject_data = [subject_id, threshold, night_num]
            data_thresholds.append(subject_data)
        night_num += 1

In [21]:
# Createa a pandas dataframe
df_thresholds = pd.DataFrame(data_thresholds, columns=['Subject', 'Threshold', 'night_id'])

# Remove all values where threshold is 0.5
# df_thresholds = df_thresholds[(df_thresholds['Threshold'] != 0.5)]

# Save to a CSV file
df_thresholds.to_csv('portinight_thresholds.csv', index=False, header=True)