In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import json
import os
import glob
import numpy as np

sns.set_context("talk", font_scale=1.0)

# Directory containing the JSON files
directory_path = 'validate/'

# List to store the processed data
all_experiments = []

# Iterate through all JSON files in the directory
for file_path in glob.glob(os.path.join(directory_path, '*/[jlx]_metrics_*.json'), recursive=True):
    with open(file_path, 'r') as file:
        data = json.load(file)
    
    # Extract the model name from the file path
    model_name = os.path.basename(os.path.dirname(file_path)).split("_")[0]
    dataset_name = os.path.basename(file_path).split("_")[2].split(".")[0]
    
    for experiment in data['experiments']:
        # Flatten the dictionary to include metrics at the top level
        experiment_flat = {**experiment, **experiment.pop('metrics')}
        # Add the model name and dataset name to the experiment data
        experiment_flat['model'] = model_name
        experiment_flat['dataset'] = dataset_name
        # Append the processed experiment to the list
        all_experiments.append(experiment_flat)

# Create a DataFrame from the combined data
df = pd.DataFrame(all_experiments)
df.drop("metrics", axis="columns", inplace=True)



# Set iterations and epsilon to 0 where 'attack' is 'none'
df.loc[df['attack'] == 'none', 'iterations'] = 0
df.loc[df['attack'] == 'none', 'epsilon'] = 0
# Replace 'epe_orig_preds' with 'epe' where 'attack' is 'none'
df.loc[df['attack'] == 'none', 'epe_orig_preds'] = df['epe']
# Replace 'epe_ground_truth' with 'epe' where 'attack' is 'none'
df.loc[df['attack'] == 'none', 'epe_ground_truth'] = df['epe']

# Get unique attack types excluding 'none'
attack_types = df['attack'].unique()
attack_types = attack_types[attack_types != 'none']

# Filter entries with attack == 'none'
none_entries = df[df['attack'] == 'none']

# Create copies of 'none' entries for each attack type
new_entries = []
for attack in attack_types:
    temp = none_entries.copy()
    temp['attack'] = attack
    new_entries.append(temp)

# Combine all new entries into a single DataFrame
new_entries_df = pd.concat(new_entries)

# Combine the new entries with the original DataFrame
result_df = pd.concat([df, new_entries_df])


# Keep only most recent results
# Convert 'start_time' to datetime
df['end_time'] = pd.to_datetime(df['end_time'])

# Sort the DataFrame by 'start_time'
df = df.sort_values(by='end_time')

# Drop duplicates, keeping the most recent entry for each combination of specified columns
unique_columns = ['model', 'checkpoint', 'attack', 'targeted', 'target', 'dataset', 'norm', 'epsilon', 'iterations', 'alpha', 'optim', 'name', 'severity']
df = df.drop_duplicates(subset=unique_columns, keep='last')


# Replace optim=NaN with ground truth
df['optim'] = df['optim'].fillna("ground_truth")

# Replace ms with ms_raft+
df['model'] = df['model'].replace('ms', 'ms_raft+')

df['model'] = df['model'].replace('videoflow', 'videoflow_bof')


df.to_csv("big_df.csv")
big_df = df

# Directory containing the JSON files
directory_path = 'validate/'

# List to store the processed data
all_experiments = []

# Iterate through all JSON files in the directory
for file_path in glob.glob(os.path.join(directory_path, '*/[jlx]_iteration_metrics_*.json'), recursive=True):
    with open(file_path, 'r') as file:
        data = json.load(file)
    
    # Extract the model name from the file path
    model_name = os.path.basename(os.path.dirname(file_path)).split("_")[0]
    dataset_name = os.path.basename(file_path).split("_")[3].split(".")[0]
    
    for experiment in data['experiments']:
        # Flatten the dictionary to include metrics at the top level
        experiment_flat = {**experiment, **experiment.pop('metrics')}
        # Add the model name and dataset name to the experiment data
        experiment_flat['model'] = model_name
        experiment_flat['dataset'] = dataset_name
        # Append the processed experiment to the list
        all_experiments.append(experiment_flat)

# Create a DataFrame from the combined data
iterations_df = pd.DataFrame(all_experiments)
iterations_df.drop("metrics", axis="columns", inplace=True)

# Keep only most recent results
# Convert 'start_time' to datetime
iterations_df['end_time'] = pd.to_datetime(df['end_time'])

# Sort the DataFrame by 'start_time'
iterations_df = iterations_df.sort_values(by='end_time')

# Drop duplicates, keeping the most recent entry for each combination of specified columns
unique_columns = ['model', 'checkpoint', 'attack', 'targeted', 'target', 'dataset', 'norm', 'epsilon', 'iterations', 'alpha', 'optim']
iterations_df = iterations_df.drop_duplicates(subset=unique_columns, keep='last')



# Add i0 to the iterations dataframe
none_df = df[df['attack'] == 'none']

# Select only the necessary columns for the join
none_df = none_df[['model', 'checkpoint', 'dataset', 'epe', "epe_initial_to_negative", "epe_initial_to_zero"]]
none_df.rename(columns={'epe': 'epe_gt_i0'}, inplace=True)

iterations_df.to_csv("iteration_df.csv")

# Perform the join operation to add `epe_i0` to `iterations_df`
iterations_df = pd.merge(iterations_df, none_df, on=['model', 'checkpoint', 'dataset'], how='left')

iterations_df['epe_target_i0'] = iterations_df.apply(
    lambda row: row['epe_initial_to_negative'] if row['target'] == 'negative' else row['epe_initial_to_zero'],
    axis=1
)


iterations_df.to_csv("iteration_df.csv")

# Sort the dataframe by 'start_time' to ensure it remains ordered
iterations_df = iterations_df.sort_values(by='start_time')

iterations_df = iterations_df.dropna(subset=['epe_ground_truth_i20'])
# Replace optim=NaN with ground truth
iterations_df['optim'] = iterations_df['optim'].fillna("ground_truth")

# Replace ms with ms_raft+
iterations_df['model'] = iterations_df['model'].replace('ms', 'ms_raft+')

# Replace ms with ms_raft+
iterations_df['model'] = iterations_df['model'].replace('videoflow', 'videoflow_bof')

iterations_df.loc[(iterations_df["dataset"] == "kitti-2015") & (iterations_df["model"] == "liteflownet3"), "model"] = "liteflownet3_pseudoreg"

# Display the updated dataframe
iterations_df.to_csv("iteration_df.csv")

# Add missing columns to df1 with NaN values
#for col in missing_columns:
#    big_df[col] = np.nan
# Key columns excluding 'start_time' and 'end_time'
key_columns = ['model', 'checkpoint', 'attack', 'norm', 'epsilon',
               'targeted', 'target', 'loss', 'dataset',
               'iterations', 'alpha',
               'optim', 'boxconstraint']

# Perform an outer merge on all key columns
merged_df = pd.merge(iterations_df, big_df, how='outer', on=key_columns, suffixes=('_iter', '_big'))

# Retain only 'start_time' and 'end_time' from 'big_df'
# First, rename the 'start_time_big' and 'end_time_big' to 'start_time' and 'end_time'
merged_df['start_time'] = merged_df['start_time_big']
merged_df['end_time'] = merged_df['end_time_big']
merged_df['duration'] = merged_df['duration_big']
#merged_df['epe_initial_to_negative'] = merged_df['epe_initital_to_zero']

# Drop the other 'start_time' and 'end_time' columns from 'iterations_df' (i.e., '_iter' suffixed columns)
merged_df.drop(columns=['start_time_iter', 'end_time_iter', 'start_time_big', 'end_time_big', 'duration_big', 'duration_iter'], inplace=True)

# The resulting DataFrame will now have only the 'start_time' and 'end_time' from big_df
columns_to_process = ['epe_initial_to_zero', 'epe_initial_to_negative']

# Loop through each column and apply the logic
for col in columns_to_process:
    # Create the new column without suffix
    merged_df[col] = merged_df[col + '_big'].combine_first(merged_df[col + '_iter'])
    
    # Drop the _big and _iter columns now that we've merged them
    merged_df.drop(columns=[col + '_big', col + '_iter'], inplace=True)

# Now the dataframe contains only the new merged columns without suffixes
print(merged_df)
merged_df = merged_df.sort_values(by=['model', 'dataset', 'norm', 'attack'], ascending=True)

merged_df.to_csv("one_single.csv")

       model checkpoint  attack norm  epsilon  iterations   alpha  targeted  \
0     flow1d      kitti     pgd  inf    0.050        20.0  0.0100      True   
1     flow1d      kitti     bim  two    0.050        20.0  0.0001      True   
2     flow1d      kitti     pgd  two    0.050        20.0  0.0001      True   
3     flow1d      kitti     pgd  two    0.050        20.0  0.0001      True   
4     flow1d      kitti  cospgd  two    0.050        20.0  0.0001      True   
...      ...        ...     ...  ...      ...         ...     ...       ...   
4811     dip     sintel    fgsm  two    0.251         NaN     NaN      True   
4812  skflow     sintel  cospgd  two    0.251        20.0  0.1000     False   
4813  skflow     sintel  cospgd  two    0.251        20.0  0.1000     False   
4814     dip     sintel     pgd  two    0.251        20.0  0.1000     False   
4815     dip     sintel     bim  two    0.251        20.0  0.1000     False   

        target loss  ... epe_ground_truth_to_zero  

In [6]:
import pandas as pd

# Key columns for checking duplicates
key_columns = ['model', 'checkpoint', 'attack', 'norm', 'epsilon',
               'targeted', 'target', 'loss', 'dataset',
               'iterations', 'alpha', 'optim', 'boxconstraint']

# Step 1: Check for duplicate key combinations in merged_df
duplicates = merged_df.groupby(key_columns).size()
duplicate_rows = duplicates[duplicates > 1]

# Step 2: Check if the number of rows in merged_df matches the number of rows in big_df
row_count_matches = len(merged_df) == len(big_df)

# Output results
if not duplicate_rows.empty:
    print(f"Duplicate key combinations found:\n{duplicate_rows}")
else:
    print("No duplicate key combinations found.")

if row_count_matches:
    print("Row count matches between merged_df and big_df.")
else:
    print(f"Row count mismatch: merged_df has {len(merged_df)} rows, while big_df has {len(big_df)} rows.")


No duplicate key combinations found.
Row count mismatch: merged_df has 4206 rows, while big_df has 4194 rows.


In [2]:
import numpy as np

combinations_list = [
    # Targeted: True, epsilon: 0.0314, target: 'negative' or 'zero' (for attacks: pgd, cospgd, bim, norm: 'inf')
    {'attack': 'pgd', 'targeted': True, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'target': 'negative', 'norm': 'inf'},
    {'attack': 'pgd', 'targeted': True, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'target': 'zero', 'norm': 'inf'},
    {'attack': 'cospgd', 'targeted': True, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'target': 'negative', 'norm': 'inf'},
    {'attack': 'cospgd', 'targeted': True, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'target': 'zero', 'norm': 'inf'},
    {'attack': 'bim', 'targeted': True, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'target': 'negative', 'norm': 'inf'},
    {'attack': 'bim', 'targeted': True, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'target': 'zero', 'norm': 'inf'},

    # Targeted: True, epsilon: 0.2510, target: 'negative' or 'zero' (for attacks: pgd, cospgd, bim, norm: 'two')
    {'attack': 'pgd', 'targeted': True, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'target': 'negative', 'norm': 'two'},
    {'attack': 'pgd', 'targeted': True, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'target': 'zero', 'norm': 'two'},
    {'attack': 'cospgd', 'targeted': True, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'target': 'negative', 'norm': 'two'},
    {'attack': 'cospgd', 'targeted': True, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'target': 'zero', 'norm': 'two'},
    {'attack': 'bim', 'targeted': True, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'target': 'negative', 'norm': 'two'},
    {'attack': 'bim', 'targeted': True, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'target': 'zero', 'norm': 'two'},

    # Targeted: True, epsilon: 0.0314, target: 'negative' or 'zero' (for attack: fgsm, norm: 'inf')
    {'attack': 'fgsm', 'targeted': True, 'loss': 'epe', 'epsilon': 0.0314, 'target': 'negative', 'norm': 'inf'},
    {'attack': 'fgsm', 'targeted': True, 'loss': 'epe', 'epsilon': 0.0314, 'target': 'zero', 'norm': 'inf'},

    # Targeted: True, epsilon: 0.2510, target: 'negative' or 'zero' (for attack: fgsm, norm: 'two')
    {'attack': 'fgsm', 'targeted': True, 'loss': 'epe', 'epsilon': 0.2510, 'target': 'negative', 'norm': 'two'},
    {'attack': 'fgsm', 'targeted': True, 'loss': 'epe', 'epsilon': 0.2510, 'target': 'zero', 'norm': 'two'},

    # Targeted: False, epsilon: 0.0314 or 0.0157, target: None (for attacks: pgd, cospgd, bim, norm: 'inf')
    {'attack': 'pgd', 'targeted': False, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'target': None, 'norm': 'inf'},
    {'attack': 'pgd', 'targeted': False, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0157, 'target': None, 'norm': 'inf'},
    {'attack': 'cospgd', 'targeted': False, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'target': None, 'norm': 'inf'},
    {'attack': 'cospgd', 'targeted': False, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0157, 'target': None, 'norm': 'inf'},
    {'attack': 'bim', 'targeted': False, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'target': None, 'norm': 'inf'},
    {'attack': 'bim', 'targeted': False, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0157, 'target': None, 'norm': 'inf'},

    # Targeted: False, epsilon: 0.2510, target: None (for attacks: pgd, cospgd, bim, norm: 'two')
    {'attack': 'pgd', 'targeted': False, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'target': None, 'norm': 'two'},
    {'attack': 'cospgd', 'targeted': False, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'target': None, 'norm': 'two'},
    {'attack': 'bim', 'targeted': False, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'target': None, 'norm': 'two'},

    #Top 9 models configuration
    {'targeted': False, 'target': np.nan, 'attack': 'cospgd', 'epsilon': 0.0314, 'alpha': 0.01, 'iterations': 20.0, 'norm': 'inf', 'loss': 'epe', 'optim': "initial_flow"},
    {'targeted': False, 'target': np.nan, 'attack': 'pgd', 'epsilon': 0.0314, 'alpha': 0.01, 'iterations': 20.0, 'norm': 'inf', 'loss': 'epe', 'optim': "initial_flow"},

    # Targeted: False, epsilon: 0.0314 or 0.0157, target: None (for attack: fgsm, norm: 'inf')
    {'attack': 'fgsm', 'targeted': False, 'loss': 'epe', 'epsilon': 0.0314, 'target': None, 'norm': 'inf'},
    {'attack': 'fgsm', 'targeted': False, 'loss': 'epe', 'epsilon': 0.0157, 'target': None, 'norm': 'inf'},

    # Targeted: False, epsilon: 0.2510, target: None (for attack: fgsm, norm: 'two')
    {'attack': 'fgsm', 'targeted': False, 'loss': 'epe', 'epsilon': 0.2510, 'target': None, 'norm': 'two'},

    {'attack': 'common_corruptions', 'name': 'gaussian_noise', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'shot_noise', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'impulse_noise', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'defocus_blur', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'glass_blur', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'motion_blur', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'zoom_blur', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'snow', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'frost', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'fog', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'brightness', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'contrast', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'elastic_transform', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'pixelate', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'jpeg_compression', 'severity': 3.0},

    {'attack': 'none'},

]

# Create a DataFrame from the combinations_list for easy comparison
combinations_df = pd.DataFrame(combinations_list)

# Now we want to match specific columns from merged_df with combinations_df
# The columns to match are the ones that exist in combinations_list (excluding rows not found in the combinations)

# Define the columns to match for adversarial attacks (excluding 'fgsm' and 'common_corruptions')
columns_to_match_adversarial = ['attack', 'targeted', 'iterations', 'alpha', 'loss', 'epsilon', 'target', 'norm', 'optim']

# Define the columns to match for common corruptions (which are based on 'name' and 'severity')
columns_to_match_common_corruptions = ['attack', 'name', 'severity']

# Define the columns to match for FGSM attacks (which don't require 'iterations' or 'alpha')
columns_to_match_fgsm = ['attack', 'targeted', 'loss', 'epsilon', 'target', 'norm']

# Define the column to match for 'attack: none'
columns_to_match_none = ['attack']

# Fill missing columns with None or NaN in merged_df to avoid KeyErrors during comparison
for col in columns_to_match_adversarial + columns_to_match_common_corruptions + columns_to_match_fgsm + columns_to_match_none:
    if col not in merged_df.columns:
        merged_df[col] = None

# Filter the common corruption attacks
common_corruptions_df = combinations_df[combinations_df['attack'] == 'common_corruptions']
filtered_common_corruptions_df = merged_df.merge(common_corruptions_df, on=columns_to_match_common_corruptions, how='inner', suffixes=('', '_duplicate'))
filtered_common_corruptions_df.to_csv("fcc.csv")

# Filter the FGSM attacks separately
fgsm_df = combinations_df[combinations_df['attack'] == 'fgsm']
filtered_fgsm_df = merged_df.merge(fgsm_df, on=columns_to_match_fgsm, how='inner', suffixes=('', '_duplicate'))
filtered_fgsm_df.to_csv("ffa.csv")
# Filter the remaining adversarial attacks (excluding FGSM and common corruptions)
adversarial_attacks_df = combinations_df[(combinations_df['attack'] != 'common_corruptions') & (combinations_df['attack'] != 'fgsm') & (combinations_df['attack'] != 'none')]
filtered_adversarial_attacks_df = merged_df.merge(adversarial_attacks_df, on=columns_to_match_adversarial, how='inner', suffixes=('', '_duplicate'))
filtered_adversarial_attacks_df.to_csv("faa.csv")

# Filter the 'none' attack cases
none_attacks_df = combinations_df[combinations_df['attack'] == 'none']
filtered_none_attacks_df = merged_df.merge(none_attacks_df, on=columns_to_match_none, how='inner', suffixes=('', '_duplicate'))
filtered_none_attacks_df.to_csv("fn.csv")

# Combine the filtered dataframes for adversarial, FGSM, common corruptions, and none attacks
filtered_df = pd.concat([filtered_adversarial_attacks_df, filtered_fgsm_df, filtered_common_corruptions_df, filtered_none_attacks_df], ignore_index=True)
filtered_df = filtered_df.loc[:, ~filtered_df.columns.str.endswith('_duplicate')]


filtered_df = filtered_df.sort_values(by=['model', 'dataset', 'norm', 'attack'], ascending=True)




# filtered_df now contains only rows from merged_df that match combinations_list
print(filtered_df)

filtered_df.to_csv("filtered_single_df.csv")

          model checkpoint              attack norm  epsilon  iterations  \
54         ccmr      kitti                fgsm  inf   0.0157         NaN   
123        ccmr      kitti                fgsm  inf   0.0314         NaN   
348        ccmr      kitti  common_corruptions  NaN      NaN         NaN   
396        ccmr      kitti  common_corruptions  NaN      NaN         NaN   
444        ccmr      kitti  common_corruptions  NaN      NaN         NaN   
...         ...        ...                 ...  ...      ...         ...   
875   videoflow      kitti  common_corruptions  NaN      NaN         NaN   
923   videoflow      kitti  common_corruptions  NaN      NaN         NaN   
971   videoflow      kitti  common_corruptions  NaN      NaN         NaN   
1019  videoflow      kitti  common_corruptions  NaN      NaN         NaN   
1066  videoflow      kitti  common_corruptions  NaN      NaN         NaN   

      alpha targeted target loss  ... epe_ground_truth_to_zero  \
54      NaN    False 

In [9]:
# Set the dataset and norm
datasets = ["sintel-final", "sintel-clean", "kitti-2015"]

# Define the datasets and corresponding model names
kitti_model_names = [
    "raft", "gma", "rpknet", "ccmr", "craft", "csflow", "dicl", "dip", 
    "fastflownet", "maskflownet", "flow1d", "flowformer", "flowformer++", 
    "gmflow", "gmflownet", "hd3", "irr_pwc", "liteflownet", "liteflownet2", 
    "liteflownet3_pseudoreg", "llaflow", "matchflow", "ms_raft+", 
    "rapidflow", "scopeflow", "scv4", "separableflow", "skflow", 
    "starflow", "videoflow_bof"
]

sintel_model_names = [
    "raft", "pwcnet", "gma", "rpknet", "ccmr", "craft", "dicl", "dip", 
    "fastflownet", "maskflownet", "maskflownet_s", "flow1d", "flowformer", 
    "flowformer++", "gmflow", "hd3", "liteflownet", 
    "liteflownet2", "liteflownet3", "llaflow", "matchflow", 
    "ms_raft+", "rapidflow", "scopeflow", "scv4", "separableflow", 
    "skflow", "starflow", "videoflow_bof"
]

kitti_model_names_top_9 = [
    "raft", "rpknet", "craft", "maskflownet", "flow1d", "flowformer++", 
    "liteflownet3_pseudoreg", "ms_raft+", 
    "scopeflow", "gma"
]

sintel_model_names_top_9 = [
    "raft", "rpknet", "craft", "maskflownet", "flow1d", "flowformer++", 
    "liteflownet3", "ms_raft+", 
    "scopeflow", "gma"
]

import pandas as pd
import numpy as np
attributes_to_cast = {
    'attack': str,
    'targeted': 'boolean',  # Can use a boolean cast, True/False or None
    'iterations': float,
    'alpha': float,
    'loss': str,
    'epsilon': float,
    'target': str,
    'norm': str,
    'name': str,
    'severity': float
}

# Function to cast columns in merged_df
def cast_columns(merged_df, attributes_to_cast):
    for col, dtype in attributes_to_cast.items():
        if dtype == 'boolean':
            # Handle the conversion to boolean, keeping None values intact
            merged_df[col] = merged_df[col].astype('bool', errors='ignore')
        else:
            merged_df[col] = merged_df[col].astype(dtype, errors='ignore')
    return merged_df

# Cast the columns
merged_df = cast_columns(merged_df, attributes_to_cast)

def row_matches_combination(input_df, combination, output_df):
    # Initialize a new dataframe (empty) to store rows that match the combination

    # Iterate over each row in the dataframe
    for index, row in input_df.iterrows():
        row_in_combination = True  # Assume the row matches the combination initially
        row_dict = row.to_dict()
        # Loop through each key-value pair in the combination
        for key, value in combination.items():
            # If the row doesn't contain the key or the value doesn't match, set row_in_combination to False
            # This means the current row doesn't match the combination
            if row_dict[key] != value:
                row_in_combination = False
                break  # Exit the inner loop because we know this row doesn't match

        # If row_in_combination is still True after the inner loop, it means the row matches the combination
        if row_in_combination:
            # Append the matching row to the new dataframe
             output_df = pd.concat([output_df, row.to_frame().T], ignore_index=True)

    # Return the new dataframe that contains only the rows that matched the combination
    return output_df

def combination_matches_row(input_df, combination_list, output_df):
    # Initialize a new dataframe (empty) to store rows that match the combination
    for combination in combination_list:
        combination_in_input_df = False
    # Iterate over each row in the dataframe
        for index, row in input_df.iterrows():
            row_in_combination = True  # Assume the row matches the combination initially
            if combination_in_input_df:
                break
            # Loop through each key-value pair in the combination
            for key, value in combination.items():
                
                # If the row doesn't contain the key or the value doesn't match, set row_in_combination to False
                # This means the current row doesn't match the combination
                if row[key] != value:
                    row_in_combination = False
                    break  # Exit the inner loop because we know this row doesn't match

            # If row_in_combination is still True after the inner loop, it means the row matches the combination
            if row_in_combination:
                combination_in_input_df = True
                break
        if combination_in_input_df:
            #TODO: add the dictionary of the combination to the output df
            output_df = pd.concat([output_df, pd.DataFrame([combination])], ignore_index=True)
    # Return the new dataframe that contains only the rows that matched the combination
    return output_df


# Assuming df is your original dataframe
# And combinations_list is provided as you showed
# And datasets and model names lists are defined as above

# Create an empty dataframe to store missing combinations
missing_comb_df = pd.DataFrame()

# Iterate through each dataset
for dataset in datasets:
    # Select the relevant model names based on the dataset
    if "kitti" in dataset:
        model_names = kitti_model_names
    else:
        model_names = sintel_model_names
    
    # Iterate through each model in the dataset
    for model in model_names:
        # Filter the dataframe for the current dataset and model
        subset_df = merged_df[(merged_df['dataset'] == dataset) & (merged_df['model'] == model)]
        subset_df.to_csv("subset.csv")
        
        # Convert the filtered rows to a list of dictionaries (Python lists of dicts)
        rows_to_check = subset_df.to_dict(orient="records")
        print(f"\nChecking rows for model: {model}, dataset: {dataset}:")
        print(rows_to_check)  # Print the rows as a list of dictionaries
        
        # Check for each combination in the combinations_list
        for combination in combinations_list:
            missing_comb_df = row_matches_combination(subset_df, combination, missing_comb_df)

# Rearrange columns to have 'model' and 'dataset' first
columns_order = ['model', 'dataset'] + [col for col in missing_comb_df.columns if col not in ['model', 'dataset']]
missing_comb_df = missing_comb_df[columns_order]

# Sort by 'model', 'dataset', 'norm', and 'attack'
missing_comb_df = missing_comb_df.sort_values(by=['model', 'dataset', 'norm', 'attack'], ascending=True)

# Save the missing combinations dataframe to a CSV file
missing_comb_df.to_csv('filtered_one_df.csv', index=False)

print("Missing combinations saved to 'missing_combinations.csv'")




Checking rows for model: raft, dataset: sintel-final:
[{'model': 'raft', 'checkpoint': 'sintel', 'attack': 'bim', 'norm': 'inf', 'epsilon': 0.0314, 'iterations': 20.0, 'alpha': 0.01, 'targeted': True, 'target': 'negative', 'loss': 'epe', 'dataset': 'sintel-final', 'l2_delta1_i1': 0.03092398478020623, 'l2_delta2_i1': 0.03092644751379687, 'l2_delta12_i1': 0.03092522662236757, 'l0_delta1_i1': 1312506.446685879, 'l0_delta2_i1': 1312615.7473583093, 'l0_delta12_i1': 2625122.194044188, 'l_inf_delta1_i1': 0.03137257695198059, 'l_inf_delta2_i1': 0.03137257695198059, 'l_inf_delta12_i1': 0.03137257695198059, 'epe_ground_truth_i1': 9.445657196237306, 'epe_target_i1': 25.058036977688243, 'l2_delta1_i2': 0.022136325306481863, 'l2_delta2_i2': 0.022142512564557203, 'l2_delta12_i2': 0.022139475307821885, 'l0_delta1_i2': 757962.8559077809, 'l0_delta2_i2': 758228.0028818444, 'l0_delta12_i2': 1516190.8587896253, 'l_inf_delta1_i2': 0.03137257695198059, 'l_inf_delta2_i2': 0.03137257695198059, 'l_inf_delta1

In [6]:
import numpy as np

combinations_list = [
    # Targeted: True, epsilon: 0.0314, target: 'negative' or 'zero' (for attacks: pgd, cospgd, bim, norm: 'inf')
    {'attack': 'pgd', 'targeted': True, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'target': 'negative', 'norm': 'inf'},
    {'attack': 'pgd', 'targeted': True, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'target': 'zero', 'norm': 'inf'},
    {'attack': 'cospgd', 'targeted': True, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'target': 'negative', 'norm': 'inf'},
    {'attack': 'cospgd', 'targeted': True, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'target': 'zero', 'norm': 'inf'},
    {'attack': 'bim', 'targeted': True, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'target': 'negative', 'norm': 'inf'},
    {'attack': 'bim', 'targeted': True, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'target': 'zero', 'norm': 'inf'},

    # Targeted: True, epsilon: 0.2510, target: 'negative' or 'zero' (for attacks: pgd, cospgd, bim, norm: 'two')
    {'attack': 'pgd', 'targeted': True, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'target': 'negative', 'norm': 'two'},
    {'attack': 'pgd', 'targeted': True, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'target': 'zero', 'norm': 'two'},
    {'attack': 'cospgd', 'targeted': True, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'target': 'negative', 'norm': 'two'},
    {'attack': 'cospgd', 'targeted': True, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'target': 'zero', 'norm': 'two'},
    {'attack': 'bim', 'targeted': True, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'target': 'negative', 'norm': 'two'},
    {'attack': 'bim', 'targeted': True, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'target': 'zero', 'norm': 'two'},

    # Targeted: True, epsilon: 0.0314, target: 'negative' or 'zero' (for attack: fgsm, norm: 'inf')
    {'attack': 'fgsm', 'targeted': True, 'loss': 'epe', 'epsilon': 0.0314, 'target': 'negative', 'norm': 'inf'},
    {'attack': 'fgsm', 'targeted': True, 'loss': 'epe', 'epsilon': 0.0314, 'target': 'zero', 'norm': 'inf'},

    # Targeted: True, epsilon: 0.2510, target: 'negative' or 'zero' (for attack: fgsm, norm: 'two')
    {'attack': 'fgsm', 'targeted': True, 'loss': 'epe', 'epsilon': 0.2510, 'target': 'negative', 'norm': 'two'},
    {'attack': 'fgsm', 'targeted': True, 'loss': 'epe', 'epsilon': 0.2510, 'target': 'zero', 'norm': 'two'},

    # Targeted: False, epsilon: 0.0314 or 0.0157, target: None (for attacks: pgd, cospgd, bim, norm: 'inf')
    {'attack': 'pgd', 'targeted': False, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'norm': 'inf'},
    {'attack': 'pgd', 'targeted': False, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0157, 'norm': 'inf'},
    {'attack': 'cospgd', 'targeted': False, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'norm': 'inf'},
    {'attack': 'cospgd', 'targeted': False, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0157, 'norm': 'inf'},
    {'attack': 'bim', 'targeted': False, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0314, 'norm': 'inf'},
    {'attack': 'bim', 'targeted': False, 'iterations': 20.0, 'alpha': 0.01, 'loss': 'epe', 'epsilon': 0.0157, 'norm': 'inf'},

    # Targeted: False, epsilon: 0.2510, target: None (for attacks: pgd, cospgd, bim, norm: 'two')
    {'attack': 'pgd', 'targeted': False, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'norm': 'two'},
    {'attack': 'cospgd', 'targeted': False, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'norm': 'two'},
    {'attack': 'bim', 'targeted': False, 'iterations': 20.0, 'alpha': 0.1, 'loss': 'epe', 'epsilon': 0.2510, 'norm': 'two'},

    #Top 9 models configuration
    {'targeted': False, 'attack': 'cospgd', 'epsilon': 0.0314, 'alpha': 0.01, 'iterations': 20.0, 'norm': 'inf', 'loss': 'epe', 'optim': "initial_flow"},
    {'targeted': False, 'attack': 'pgd', 'epsilon': 0.0314, 'alpha': 0.01, 'iterations': 20.0, 'norm': 'inf', 'loss': 'epe', 'optim': "initial_flow"},

    # Targeted: False, epsilon: 0.0314 or 0.0157, target: None (for attack: fgsm, norm: 'inf')
    {'attack': 'fgsm', 'targeted': False, 'loss': 'epe', 'epsilon': 0.0314, 'norm': 'inf'},
    {'attack': 'fgsm', 'targeted': False, 'loss': 'epe', 'epsilon': 0.0157, 'norm': 'inf'},

    # Targeted: False, epsilon: 0.2510, target: None (for attack: fgsm, norm: 'two')
    {'attack': 'fgsm', 'targeted': False, 'loss': 'epe', 'epsilon': 0.2510, 'norm': 'two'},

    {'attack': 'common_corruptions', 'name': 'gaussian_noise', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'shot_noise', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'impulse_noise', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'defocus_blur', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'glass_blur', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'motion_blur', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'zoom_blur', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'snow', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'frost', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'fog', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'brightness', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'contrast', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'elastic_transform', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'pixelate', 'severity': 3.0},
    {'attack': 'common_corruptions', 'name': 'jpeg_compression', 'severity': 3.0},

    {'attack': 'none'},

]

# Set the dataset and norm
datasets = ["sintel-final", "sintel-clean", "kitti-2015"]

# Define the datasets and corresponding model names
kitti_model_names = [
    "raft", "gma", "rpknet", "ccmr", "craft", "csflow", "dicl", "dip", 
    "fastflownet", "maskflownet", "flow1d", "flowformer", "flowformer++", 
    "gmflow", "gmflownet", "hd3", "irr_pwc", "liteflownet", "liteflownet2", 
    "liteflownet3_pseudoreg", "llaflow", "matchflow", "ms_raft+", 
    "rapidflow", "scopeflow", "scv4", "separableflow", "skflow", 
    "starflow", "videoflow_bof"
]

sintel_model_names = [
    "raft", "pwcnet", "gma", "rpknet", "ccmr", "craft", "dicl", "dip", 
    "fastflownet", "maskflownet", "maskflownet_s", "flow1d", "flowformer", 
    "flowformer++", "gmflow", "hd3", "liteflownet", 
    "liteflownet2", "liteflownet3", "llaflow", "matchflow", 
    "ms_raft+", "rapidflow", "scopeflow", "scv4", "separableflow", 
    "skflow", "starflow", "videoflow_bof"
]

kitti_model_names_top_9 = [
    "raft", "rpknet", "craft", "maskflownet", "flow1d", "flowformer++", 
    "liteflownet3_pseudoreg", "ms_raft+", 
    "scopeflow", "gma"
]

sintel_model_names_top_9 = [
    "raft", "rpknet", "craft", "maskflownet", "flow1d", "flowformer++", 
    "liteflownet3", "ms_raft+", 
    "scopeflow", "gma"
]

import pandas as pd
import numpy as np

# Define the casting dictionary
attributes_to_cast = {
    'attack': str,
    'targeted': 'boolean',  # Can use a boolean cast, True/False or None
    'iterations': float,
    'alpha': float,
    'loss': str,
    'epsilon': float,
    'target': str,
    'norm': str,
    'name': str,
    'severity': float
}

# Function to cast columns in merged_df
def cast_columns(merged_df, attributes_to_cast):
    for col, dtype in attributes_to_cast.items():
        if dtype == 'boolean':
            merged_df[col] = merged_df[col].astype('bool', errors='ignore')
        else:
            merged_df[col] = merged_df[col].astype(dtype, errors='ignore')
    return merged_df

# Cast the columns
merged_df = cast_columns(merged_df, attributes_to_cast)

# Function to match combinations
def combination_matches_row(input_df, combination_list, output_df, model, dataset):
    # Iterate over each combination in the combination list
    for combination in combination_list:
        combination_in_input_df = False

        # **Check if "optim" exists in combination and restrict it to top 9 models**
        if "optim" in combination.keys() and combination["optim"] == "initial_flow":
            if dataset == "kitti-2015":
                if model not in kitti_model_names_top_9:
                    continue  # Skip this combination if the model is not in top 9 for kitti-2015
            else:
                if model not in sintel_model_names_top_9:
                    continue  # Skip this combination if the model is not in top 9 for other datasets

        # **Check for common corruptions only for kitti-2015 dataset**
        if combination['attack'] == 'common_corruptions' and dataset != 'kitti-2015':
            continue  # Skip common corruptions if not kitti-2015 dataset

        # Iterate over each row in the dataframe
        for index, row in input_df.iterrows():
            row_in_combination = True  # Assume the row matches the combination initially
            row_dict = row.to_dict()

            # Loop through each key-value pair in the combination
            for key, value in combination.items():
                if key not in row_dict.keys() or row_dict[key] != value:
                    row_in_combination = False
                    break  # Exit the inner loop because we know this row doesn't match

            # If row_in_combination is still True after the inner loop, it means the row matches the combination
            if row_in_combination:
                combination_in_input_df = True
                break  # No need to continue checking rows, as we found a match

        # If no matching row was found, add the combination to output_df
        if not combination_in_input_df:
            combination["model"] = model
            combination["dataset"] = dataset
            output_df = pd.concat([output_df, pd.DataFrame([combination])], ignore_index=True)
            del combination["model"]
            del combination["dataset"]

    return output_df

# Create an empty dataframe to store missing combinations
missing_comb_df = pd.DataFrame()

# Iterate through each dataset
for dataset in datasets:
    # Select the relevant model names based on the dataset
    if "kitti" in dataset:
        model_names = kitti_model_names
    else:
        model_names = sintel_model_names
    
    # Iterate through each model in the dataset
    for model in model_names:
        # Filter the dataframe for the current dataset and model
        subset_df = merged_df[(merged_df['dataset'] == dataset) & (merged_df['model'] == model)]
        missing_comb_df = combination_matches_row(subset_df, combinations_list, missing_comb_df, model, dataset)

# Rearrange columns to have 'model' and 'dataset' first
columns_order = ['model', 'dataset'] + [col for col in missing_comb_df.columns if col not in ['model', 'dataset']]
missing_comb_df = missing_comb_df[columns_order]

# Sort by 'model', 'dataset', 'norm', and 'attack'
missing_comb_df = missing_comb_df.sort_values(by=['model', 'dataset', 'norm', 'attack'], ascending=True)

# Save the missing combinations dataframe to a CSV file
missing_comb_df.to_csv('missing_combinations.csv', index=False)

print("Missing combinations saved to 'missing_combinations.csv'")




Missing combinations saved to 'missing_combinations.csv'
