In [27]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import json
import os
import glob
import numpy as np

sns.set_context("talk", font_scale=1.0)

# Directory containing the JSON files
directory_path = 'validate/'

# List to store the processed data
all_experiments = []

# Iterate through all JSON files in the directory
for file_path in glob.glob(os.path.join(directory_path, '*/metrics_*.json'), recursive=True):
    with open(file_path, 'r') as file:
        data = json.load(file)
    
    # Extract the model name from the file path
    model_name = os.path.basename(os.path.dirname(file_path)).split("_")[0]
    dataset_name = os.path.basename(file_path).split("_")[1].split(".")[0]
    
    for experiment in data['experiments']:
        # Flatten the dictionary to include metrics at the top level
        experiment_flat = {**experiment, **experiment.pop('metrics')}
        # Add the model name and dataset name to the experiment data
        experiment_flat['model'] = model_name
        experiment_flat['dataset'] = dataset_name
        # Append the processed experiment to the list
        all_experiments.append(experiment_flat)

# Create a DataFrame from the combined data
df = pd.DataFrame(all_experiments)
df.drop("metrics", axis="columns", inplace=True)



# Set iterations and epsilon to 0 where 'attack' is 'none'
df.loc[df['attack'] == 'none', 'iterations'] = 0
df.loc[df['attack'] == 'none', 'epsilon'] = 0
# Replace 'epe_orig_preds' with 'epe' where 'attack' is 'none'
df.loc[df['attack'] == 'none', 'epe_orig_preds'] = df['epe']
# Replace 'epe_ground_truth' with 'epe' where 'attack' is 'none'
df.loc[df['attack'] == 'none', 'epe_ground_truth'] = df['epe']

# Get unique attack types excluding 'none'
attack_types = df['attack'].unique()
attack_types = attack_types[attack_types != 'none']

# Filter entries with attack == 'none'
none_entries = df[df['attack'] == 'none']

# Create copies of 'none' entries for each attack type
new_entries = []
for attack in attack_types:
    temp = none_entries.copy()
    temp['attack'] = attack
    new_entries.append(temp)

# Combine all new entries into a single DataFrame
new_entries_df = pd.concat(new_entries)

# Combine the new entries with the original DataFrame
result_df = pd.concat([df, new_entries_df])


# Keep only most recent results
# Convert 'start_time' to datetime
df['end_time'] = pd.to_datetime(df['end_time'])

# Sort the DataFrame by 'start_time'
df = df.sort_values(by='end_time')

# Drop duplicates, keeping the most recent entry for each combination of specified columns
unique_columns = ['model', 'checkpoint', 'attack', 'targeted', 'target', 'dataset', 'norm', 'epsilon', 'iterations', 'alpha', 'optim']
df = df.drop_duplicates(subset=unique_columns, keep='last')


# Replace optim=NaN with ground truth
df['optim'] = df['optim'].fillna("ground_truth")

# Replace ms with ms_raft+
df['model'] = df['model'].replace('ms', 'ms_raft+')


#df.to_csv("big_df.csv")
big_df = df

# Directory containing the JSON files
directory_path = 'validate/'

# List to store the processed data
all_experiments = []

# Iterate through all JSON files in the directory
for file_path in glob.glob(os.path.join(directory_path, '*/iteration_metrics_*.json'), recursive=True):
    with open(file_path, 'r') as file:
        data = json.load(file)
    
    # Extract the model name from the file path
    model_name = os.path.basename(os.path.dirname(file_path)).split("_")[0]
    dataset_name = os.path.basename(file_path).split("_")[2].split(".")[0]
    
    for experiment in data['experiments']:
        # Flatten the dictionary to include metrics at the top level
        experiment_flat = {**experiment, **experiment.pop('metrics')}
        # Add the model name and dataset name to the experiment data
        experiment_flat['model'] = model_name
        experiment_flat['dataset'] = dataset_name
        # Append the processed experiment to the list
        all_experiments.append(experiment_flat)

# Create a DataFrame from the combined data
iterations_df = pd.DataFrame(all_experiments)
iterations_df.drop("metrics", axis="columns", inplace=True)

# Keep only most recent results
# Convert 'start_time' to datetime
iterations_df['end_time'] = pd.to_datetime(df['end_time'])

# Sort the DataFrame by 'start_time'
iterations_df = iterations_df.sort_values(by='end_time')

# Drop duplicates, keeping the most recent entry for each combination of specified columns
unique_columns = ['model', 'checkpoint', 'attack', 'targeted', 'target', 'dataset', 'norm', 'epsilon', 'iterations', 'alpha', 'optim']
iterations_df = iterations_df.drop_duplicates(subset=unique_columns, keep='last')



# Add i0 to the iterations dataframe
none_df = df[df['attack'] == 'none']

# Select only the necessary columns for the join
none_df = none_df[['model', 'checkpoint', 'dataset', 'epe', "epe_initial_to_negative", "epe_initial_to_zero"]]
none_df.rename(columns={'epe': 'epe_gt_i0'}, inplace=True)



# Perform the join operation to add `epe_i0` to `iterations_df`
iterations_df = pd.merge(iterations_df, none_df, on=['model', 'checkpoint', 'dataset'], how='left')

iterations_df['epe_target_i0'] = iterations_df.apply(
    lambda row: row['epe_initial_to_negative'] if row['target'] == 'negative' else row['epe_initial_to_zero'],
    axis=1
)

# Sort the dataframe by 'start_time' to ensure it remains ordered
iterations_df = iterations_df.sort_values(by='start_time')

# Replace optim=NaN with ground truth
iterations_df['optim'] = iterations_df['optim'].fillna("ground_truth")

# Replace ms with ms_raft+
iterations_df['model'] = iterations_df['model'].replace('ms', 'ms_raft+')

# Display the updated dataframe
#iterations_df.to_csv("iteration_df.csv")

# Add missing columns to df1 with NaN values
#for col in missing_columns:
#    big_df[col] = np.nan
# Key columns excluding 'start_time' and 'end_time'
key_columns = ['model', 'checkpoint', 'attack', 'norm', 'epsilon',
               'targeted', 'target', 'loss', 'dataset',
               'iterations', 'alpha',
               'optim', 'boxconstraint']

# Perform an outer merge on all key columns
merged_df = pd.merge(iterations_df, big_df, how='outer', on=key_columns, suffixes=('_iter', '_big'))

# Retain only 'start_time' and 'end_time' from 'big_df'
# First, rename the 'start_time_big' and 'end_time_big' to 'start_time' and 'end_time'
merged_df['start_time'] = merged_df['start_time_big']
merged_df['end_time'] = merged_df['end_time_big']
merged_df['duration'] = merged_df['duration_big']
#merged_df['epe_initial_to_negative'] = merged_df['epe_initital_to_zero']

# Drop the other 'start_time' and 'end_time' columns from 'iterations_df' (i.e., '_iter' suffixed columns)
merged_df.drop(columns=['start_time_iter', 'end_time_iter', 'start_time_big', 'end_time_big', 'duration_big', 'duration_iter'], inplace=True)

# The resulting DataFrame will now have only the 'start_time' and 'end_time' from big_df
columns_to_process = ['epe_initial_to_zero', 'epe_initial_to_negative']

# Loop through each column and apply the logic
for col in columns_to_process:
    # Create the new column without suffix
    merged_df[col] = merged_df[col + '_big'].combine_first(merged_df[col + '_iter'])
    
    # Drop the _big and _iter columns now that we've merged them
    merged_df.drop(columns=[col + '_big', col + '_iter'], inplace=True)

# Now the dataframe contains only the new merged columns without suffixes
print(merged_df)

merged_df.to_csv("one_single.csv")

         model checkpoint attack norm  epsilon  iterations   alpha  targeted  \
0         ccmr      kitti    bim  inf   0.0157        20.0  0.0100     False   
1         ccmr      kitti    bim  inf   0.0314        20.0  0.0100     False   
2         ccmr      kitti    bim  inf   0.0500        20.0  0.0100      True   
3         ccmr      kitti    bim  inf   0.0500        20.0  0.0100      True   
4         ccmr      kitti    bim  two   0.0500        20.0  0.0001     False   
...        ...        ...    ...  ...      ...         ...     ...       ...   
3175  starflow     sintel    pgd  inf   0.0314        20.0  0.0100     False   
3176  starflow     sintel    pgd  inf   0.0314        20.0  0.0100      True   
3177  starflow     sintel    pgd  inf   0.0314        20.0  0.0100      True   
3178  starflow     sintel    pgd  inf   0.0314        20.0  0.0100      True   
3179  starflow     sintel    pgd  inf   0.0314        20.0  0.0100      True   

        target loss  ... epe_ground_tru

In [26]:
big_df

Unnamed: 0,start_time,model,checkpoint,attack,norm,epsilon,targeted,target,loss,end_time,...,own_epe_ground_truth_to_negative,own_epe_ground_truth_to_zero,epe_ground_truth_to_negative,epe_ground_truth_to_zero,optim,boxconstraint,conf_f1,val,name,severity
610,2024-05-14 22:22:34,rpknet,kitti,bim,inf,0.000,True,negative,epe,2024-05-14 22:28:59,...,,,,,ground_truth,,,,,
611,2024-05-14 22:31:41,rpknet,kitti,bim,inf,0.000,True,zero,epe,2024-05-14 22:37:11,...,,,,,ground_truth,,,,,
613,2024-05-14 22:47:53,rpknet,kitti,pgd,inf,0.000,True,negative,epe,2024-05-14 22:53:26,...,,,,,ground_truth,,,,,
614,2024-05-14 23:10:29,rpknet,kitti,pgd,inf,0.000,True,zero,epe,2024-05-14 23:15:14,...,,,,,ground_truth,,,,,
616,2024-05-14 23:24:18,rpknet,kitti,cospgd,inf,0.000,True,negative,epe,2024-05-14 23:29:10,...,,,,,ground_truth,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3476,2024-09-05 00:08:29,raft,sintel,pgd,two,0.251,True,negative,epe,2024-09-05 03:28:00,...,,,,,ground_truth,,,,,
1945,2024-09-05 02:39:02,maskflownet,sintel,cospgd,two,0.251,False,,epe,2024-09-05 03:30:52,...,,,,,ground_truth,,,,,
3477,2024-09-05 00:44:11,raft,sintel,pgd,two,0.251,True,zero,epe,2024-09-05 03:59:36,...,,,,,ground_truth,,,,,
3478,2024-09-05 00:46:36,raft,sintel,cospgd,two,0.251,True,negative,epe,2024-09-05 04:02:29,...,,,,,ground_truth,,,,,
