In [54]:

import pandas as pd
import re
import os

def parse_training_results(file_path):
    """Parse training results from text file into a pandas DataFrame"""
    
    # Read the file
    with open(file_path, 'r') as f:
        content = f.read()
    
    # Initialize lists to store data
    epochs = []
    times = []
    train_losses = []
    train_acc_top1 = []
    train_acc_top5 = []
    test_losses = []
    test_acc_top1 = []
    test_acc_top5 = []
    
    # Regular expression to match each epoch line
    pattern = r'\[Epoch (\d+)\] Time: ([\d.]+)s.*?Train.*?Loss: ([\d.]+).*?Top1: ([\d.]+)%.*?Top5: ([\d.]+)%.*?Test.*?Loss: ([\d.]+).*?Top1: ([\d.]+)%.*?Top5: ([\d.]+)%'
    
    # Find all matches
    matches = re.findall(pattern, content)
    
    for match in matches:
        epochs.append(int(match[0]))
        times.append(float(match[1]))
        train_losses.append(float(match[2]))
        train_acc_top1.append(float(match[3]))
        train_acc_top5.append(float(match[4]))
        test_losses.append(float(match[5]))
        test_acc_top1.append(float(match[6]))
        test_acc_top5.append(float(match[7]))
    
    # Create DataFrame
    df = pd.DataFrame({
        'epoch': epochs,
        'time': times,
        'train_loss': train_losses,
        'train_accuracy_top1': train_acc_top1,
        'train_accuracy_top5': train_acc_top5,
        'test_loss': test_losses,
        'test_accuracy_top1': test_acc_top1,
        'test_accuracy_top5': test_acc_top5
    })
    
    return df


def n_last_average_stats(df, last_n_epochs=5):
    """Calculate average stats over the last N epochs"""
    if len(df) < last_n_epochs:
        last_n_epochs = len(df)
    
    return {
        "ave_train_loss": df['train_loss'].tail(last_n_epochs).mean(),
        "ave_test_loss": df['test_loss'].tail(last_n_epochs).mean(),
        "ave_train_accuracy_top1": df['train_accuracy_top1'].tail(last_n_epochs).mean(),
        "ave_train_accuracy_top5": df['train_accuracy_top5'].tail(last_n_epochs).mean(),
        "ave_test_accuracy_top1": df['test_accuracy_top1'].tail(last_n_epochs).mean(),
        "ave_test_accuracy_top5": df['test_accuracy_top5'].tail(last_n_epochs).mean()
    }

def best_stats(df):
    """Calculate best accuracy achieved during training"""
    return {
        "best_train_accuracy_top1": df['train_accuracy_top1'].max(),
        "best_train_accuracy_top5": df['train_accuracy_top5'].max(),
        "best_test_accuracy_top1": df['test_accuracy_top1'].max(),
        "best_test_accuracy_top5": df['test_accuracy_top5'].max()
    }

def last_epoch_stats(df):
    """Get stats from the last epoch"""
    last_row = df.iloc[-1]
    return {
        "last_epoch_train_loss": last_row['train_loss'],
        "last_epoch_test_loss": last_row['test_loss'],
        "last_epoch_train_accuracy_top1": last_row['train_accuracy_top1'],
        "last_epoch_train_accuracy_top5": last_row['train_accuracy_top5'],
        "last_epoch_test_accuracy_top1": last_row['test_accuracy_top1'],
        "last_epoch_test_accuracy_top5": last_row['test_accuracy_top5']
    }

def parse_params_gflops(file_dir):
    """Parse training results from text file into a pandas DataFrame"""

    train_path = os.path.join(file_dir, "train_eval_results.txt")
    args_path = os.path.join(file_dir, "args.txt")
    
    # Read the file
    with open(train_path, 'r') as f:
        train_content = f.readline()

    with open(args_path, 'r') as f:
        args_content = f.readlines()

    total_params = args_content[-2].strip().split(": ")[1]
    trainable_params = args_content[-1].strip().split(": ")[1]

    gflops = re.search(r'GFLOPs: ([\d.]+)', train_content).group(1)

    return {
        "total_params": int(total_params),
        "trainable_params": int(trainable_params),
        "gflops": float(gflops)
    }



In [55]:
import sys 
import os
import torch 
import torch.nn as nn

curr_dir = os.getcwd()
root_dir = os.path.abspath(os.path.join(curr_dir, '..', '..'))

print("Current Directory:", curr_dir)
print("Root Directory:", root_dir)


Current Directory: /Users/mingikang/Developer/Convolutional-Nearest-Neighbor
Root Directory: /Users/mingikang


# 1. Loss Test

In [56]:
save_path =  os.path.join(curr_dir, "Final_csv", "loss_test.csv")
save_dir = os.path.dirname(save_path)

results_path = os.path.join(curr_dir, "Final_Output", "loss_test_cos_first---/")
print("Results Path:", results_path)

Results Path: /Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/loss_test_cos_first---/


In [57]:
datasets = ["VGG11-CIFAR10", "VGG11-CIFAR100"]
lr = ["1e-3", "1e-4", "1e-5", "5e-4", "5e-5"]
branching_ratio = ["br0000", "br0500", "br1000"]

rows = [] 
for ds in datasets: 
    for l in lr: 
        for br in branching_ratio: 
            dir_path = os.path.join(results_path, ds, f"lr_{l}", f"BranchingConvNN_K9_col_col_{br}_s42")
            txt_path = os.path.join(dir_path, "train_eval_results.txt")

            df = parse_training_results(txt_path)
            avg_stats = n_last_average_stats(df, last_n_epochs=5)
            best_acc = best_stats(df)
            last_stats = last_epoch_stats(df)
            params_gflops = parse_params_gflops(dir_path) 

            if br == "br0000":
                model = "Convolution"
            elif br == "br0500":
                model = "Branching"
            elif br == "br1000":
                model = "ConvNN"
                
            row = {
                "dataset": ds,
                "lr": l,
                "Model": model,
                **avg_stats,
                **best_acc,
                **last_stats,
                **params_gflops,
            }

            rows.append(row)


df_results = pd.DataFrame(rows)
df_results.to_csv(save_path, index=False)

# 2. K Test

In [58]:
save_path =  os.path.join(curr_dir, "Final_csv", "k_test.csv")
save_dir = os.path.dirname(save_path)


results_path = os.path.join(curr_dir, "Final_Output", "K_test_pt4---/")
print("Results Path:", results_path)

Results Path: /Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/K_test_pt4---/


In [None]:
# Categories 
datasets = ["VGG11-CIFAR10", "VGG11-CIFAR100"]
KS = ["1", "2", "3"]
K = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"]
model_type = ["col_col_br0500_s0"]

rows = [] 
for ds in datasets: 
    for ks in KS: 
        for k in K: 
            for mt in model_type:
                dir_path = os.path.join(results_path, ds, f"BranchingConvNN_K{k}_KS{ks}_{mt}")
                txt_path = os.path.join(dir_path, "train_eval_results.txt")

                df = parse_training_results(txt_path)
                avg_stats = n_last_average_stats(df, last_n_epochs=5)
                best_acc = best_stats(df)
                last_stats = last_epoch_stats(df)
                params_gflops = parse_params_gflops(dir_path) 

                row = {
                    "dataset": ds,
                    "Model": "Branching",
                    "KS": ks,
                    "K": k,
                    **avg_stats,
                    **best_acc,
                    **last_stats,
                    **params_gflops,
                }

                rows.append(row)


        Conv2d_k1 = f"/Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/K_test_pt4---/{ds}/BranchingConvNN_K0_KS1_col_col_br0000_s0/train_eval_results.txt"

        Conv2d_k1_df = parse_training_results(Conv2d_k1)

        Conv2d_k2 = f"/Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/K_test_pt4---/{ds}/BranchingConvNN_K0_KS2_col_col_br0000_s0/train_eval_results.txt"
        Conv2d_k2_df = parse_training_results(Conv2d_k2)

        Conv2d_k3 = f"/Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/K_test_pt4---/{ds}/BranchingConvNN_K0_KS3_col_col_br0000_s0/train_eval_results.txt"
        Conv2d_k3_df = parse_training_results(Conv2d_k3)

        k1_ave = n_last_average_stats(Conv2d_k1_df)
        k2_ave = n_last_average_stats(Conv2d_k2_df)
        k3_ave = n_last_average_stats(Conv2d_k3_df)

        k1_best = best_stats(Conv2d_k1_df)
        k2_best = best_stats(Conv2d_k2_df)
        k3_best = best_stats(Conv2d_k3_df)

        k1_last = last_epoch_stats(Conv2d_k1_df)
        k2_last = last_epoch_stats(Conv2d_k2_df)
        k3_last = last_epoch_stats(Conv2d_k3_df)

        k1_gflops = parse_params_gflops(os.path.dirname(Conv2d_k1))
        k2_gflops = parse_params_gflops(os.path.dirname(Conv2d_k2))
        k3_gflops = parse_params_gflops(os.path.dirname(Conv2d_k3))
        
        rows.append({
            "dataset": ds, 
            "Model": "Convolution", 
            "KS": "1", 
            "K": "0", 
            **k1_ave, 
            **k1_best, 
            **k1_last,
            **k1_gflops
        })
        rows.append({
            "dataset": ds, 
            "Model": "Convolution", 
            "KS": "2", 
            "K": "0", 
            **k2_ave, 
            **k2_best, 
            **k2_last,
            **k2_gflops
        })
        rows.append({
            "dataset": ds, 
            "Model": "Convolution", 
            "KS": "3", 
            "K": "0", 
            **k3_ave, 
            **k3_best, 
            **k3_last,
            **k3_gflops
        })

df_results = pd.DataFrame(rows)
df_results.to_csv(save_path, index=False)

In [60]:
df_results.head()

Unnamed: 0,dataset,Model,KS,K,ave_train_loss,ave_test_loss,ave_train_accuracy_top1,ave_train_accuracy_top5,ave_test_accuracy_top1,ave_test_accuracy_top5,...,best_test_accuracy_top5,last_epoch_train_loss,last_epoch_test_loss,last_epoch_train_accuracy_top1,last_epoch_train_accuracy_top5,last_epoch_test_accuracy_top1,last_epoch_test_accuracy_top5,total_params,trainable_params,gflops
0,VGG11-CIFAR10,Branching,1,1,0.095935,2.640912,96.78078,99.98356,52.63868,91.15624,...,93.3789,0.095044,2.605284,96.7933,99.99,51.5625,90.6738,121822154,121822154,0.325333
1,VGG11-CIFAR10,Branching,1,2,0.103875,2.429809,96.50886,99.9801,53.9922,92.01174,...,93.7793,0.102022,2.382963,96.5167,99.9801,54.2285,92.0508,122334250,122334250,0.325333
2,VGG11-CIFAR10,Branching,1,3,0.098794,2.320335,96.70518,99.98088,56.75194,92.5625,...,93.9844,0.10112,2.22982,96.6239,99.9781,57.4609,92.8027,122846346,122846346,0.325333
3,VGG11-CIFAR10,Branching,1,4,0.090208,2.334719,97.04518,99.9733,57.21094,93.10938,...,94.2773,0.092456,2.280735,97.0599,99.9661,57.998,93.2422,123358442,123358442,0.325333
4,VGG11-CIFAR10,Branching,1,5,0.09082,2.320568,97.01626,99.9749,57.78124,93.1172,...,93.8965,0.089493,2.303369,97.0197,99.9841,57.4121,93.0957,123870538,123870538,0.325333


# 3. N Test

In [61]:
save_path =  os.path.join(curr_dir, "Final_csv", "n_test.csv")
save_dir = os.path.dirname(save_path)


results_path = os.path.join(curr_dir, "Final_Output", "N_test_cos---/")
print("Results Path:", results_path)

Results Path: /Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/N_test_cos---/


In [None]:
# Categories 
datasets = ["VGG11-CIFAR10", "VGG11-CIFAR100"]
type = ["random", "spatial"]
Ns = ["4", "6", "8", "10", "12", "14", "16", "18", "20", "22", "24", "26", "28", "30"]
branching_ratio = ["br0500", "br1000"]

rows = []
for ds in datasets:
    for t in type:
        for n in Ns:
            for br in branching_ratio:
                dir_path = os.path.join(results_path, ds, f"BranchingConvNN_{t}_NS{n}_col_col_{br}_s42")
                txt_path = os.path.join(dir_path, "train_eval_results.txt")

                df = parse_training_results(txt_path)
                avg_stats = n_last_average_stats(df, last_n_epochs=5)
                best_acc = best_stats(df)
                last_stats = last_epoch_stats(df)
                params_gflops = parse_params_gflops(dir_path) 
                if br == "br0500":
                    model = "Branching"
                elif br == "br1000":
                    model = "ConvNN"
                row = {
                    "dataset": ds,
                    "Type": t,
                    "N": int(n),
                    "Model": model,
                    **avg_stats,
                    **best_acc,
                    **last_stats,
                    **params_gflops,
                }

                rows.append(row)

    convnn = f"/Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/loss_test_cos_first---/{ds}/lr_1e-4/BranchingConvNN_K9_col_col_br1000_s42/train_eval_results.txt"

    branching = f"/Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/loss_test_cos_first---/{ds}/lr_1e-4/BranchingConvNN_K9_col_col_br0500_s42/train_eval_results.txt"

    conv2d = f"/Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/loss_test_cos_first---/{ds}/lr_1e-4/BranchingConvNN_K9_col_col_br0000_s42/train_eval_results.txt"


    ConvNN_df = parse_training_results(convnn)
    Branching_df = parse_training_results(branching)
    Conv2d_df = parse_training_results(conv2d)

    convnn_ave = n_last_average_stats(ConvNN_df)
    branching_ave = n_last_average_stats(Branching_df)
    conv2d_ave = n_last_average_stats(Conv2d_df)

    convnn_best = best_stats(ConvNN_df)
    branching_best = best_stats(Branching_df)
    conv2d_best = best_stats(Conv2d_df)

    convnn_last = last_epoch_stats(ConvNN_df)       
    branching_last = last_epoch_stats(Branching_df)
    conv2d_last = last_epoch_stats(Conv2d_df)

    convnn_gflops = parse_params_gflops(os.path.dirname(convnn))
    branching_gflops = parse_params_gflops(os.path.dirname(branching))
    conv2d_gflops = parse_params_gflops(os.path.dirname(conv2d))

    rows.append({
            "dataset": ds, 
            "Type": "all",
            "N": -1,
            "Model": "ConvNN",
            **convnn_ave, 
            **convnn_best, 
            **convnn_last,
            **convnn_gflops
        })
    rows.append({
            "dataset": ds, 
            "Type": "all",
            "N": -1,
            "Model": "Branching", 
            **branching_ave, 
            **branching_best, 
            **branching_last,
            **branching_gflops
        })
    rows.append({
            "dataset": ds, 
            "Type": "N/A",
            "N": -1,
            "Model": "Convolution",
            **conv2d_ave, 
            **conv2d_best, 
            **conv2d_last,
            **conv2d_gflops
        })

df_results = pd.DataFrame(rows)
df_results.to_csv(save_path, index=False)

In [63]:
df_results.head()

Unnamed: 0,dataset,Type,N,Model,ave_train_loss,ave_test_loss,ave_train_accuracy_top1,ave_train_accuracy_top5,ave_test_accuracy_top1,ave_test_accuracy_top5,...,best_test_accuracy_top5,last_epoch_train_loss,last_epoch_test_loss,last_epoch_train_accuracy_top1,last_epoch_train_accuracy_top5,last_epoch_test_accuracy_top1,last_epoch_test_accuracy_top5,total_params,trainable_params,gflops
0,VGG11-CIFAR10,random,4,Branching,0.018681,1.244146,99.49666,99.9984,81.78906,98.55274,...,98.7988,0.017633,1.318572,99.5233,100.0,81.8652,98.5547,130015690,130015690,0.297096
1,VGG11-CIFAR10,random,4,ConvNN,0.204216,2.011357,92.90942,99.92188,58.0703,93.50198,...,94.5996,0.20089,1.948419,92.9994,99.9004,58.8672,93.9844,130015690,130015690,0.297096
2,VGG11-CIFAR10,random,6,Branching,0.018173,1.260812,99.50838,99.9964,81.27344,98.3047,...,98.623,0.014343,1.351709,99.6134,99.998,80.9473,98.3594,130015690,130015690,0.300706
3,VGG11-CIFAR10,random,6,ConvNN,0.108229,2.383227,96.4135,99.9773,57.11914,92.71094,...,93.9648,0.105682,2.493048,96.4772,99.9801,57.6074,91.9336,130015690,130015690,0.300706
4,VGG11-CIFAR10,random,8,Branching,0.0191,1.190271,99.4803,99.9968,81.16016,98.1582,...,98.8281,0.016259,1.22633,99.5436,99.998,80.6348,97.9785,130015690,130015690,0.304211


# 4. Branching Test

In [66]:
save_path =  os.path.join(curr_dir, "Final_csv", "branching_test.csv")
save_dir = os.path.dirname(save_path)


results_path = os.path.join(curr_dir, "Final_Output", "BR_test_cos---/")
print("Results Path:", results_path)

Results Path: /Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/BR_test_cos---/


In [69]:
datasets = ["VGG11-CIFAR10", "VGG11-CIFAR100"]
branching_ratio = ["br0000", "br0125", "br0250", "br0375", "br0500", "br0625", "br0750", "br0875", "br1000"]
similarity_type = ["Col_Col", "Loc_Col_Col", "Loc_Col_Loc_Col"]

rows = []

for ds in datasets:
    for br in branching_ratio:
        for st in similarity_type:
            dir_path = os.path.join(results_path, ds, f"BranchingConvNN_All_K9_{st}_{br}_s42")
            txt_path = os.path.join(dir_path, "train_eval_results.txt")

            df = parse_training_results(txt_path)
            avg_stats = n_last_average_stats(df, last_n_epochs=5)
            best_acc = best_stats(df)
            last_stats = last_epoch_stats(df)
            params_gflops = parse_params_gflops(dir_path) 

            row = {
                "dataset": ds,
                "Sim_Agg_type": st,
                "Branching_Ratio": br,
                **avg_stats,
                **best_acc,
                **last_stats,
                **params_gflops,
            }

            rows.append(row)

df_results = pd.DataFrame(rows)
df_results.to_csv(save_path, index=False)
df_results.head()

Unnamed: 0,dataset,Sim_Agg_type,Branching_Ratio,ave_train_loss,ave_test_loss,ave_train_accuracy_top1,ave_train_accuracy_top5,ave_test_accuracy_top1,ave_test_accuracy_top5,best_train_accuracy_top1,...,best_test_accuracy_top5,last_epoch_train_loss,last_epoch_test_loss,last_epoch_train_accuracy_top1,last_epoch_train_accuracy_top5,last_epoch_test_accuracy_top1,last_epoch_test_accuracy_top5,total_params,trainable_params,gflops
0,VGG11-CIFAR10,Col_Col,br0000,0.016448,1.440566,99.59206,99.9968,80.23634,98.15234,99.6433,...,98.4863,0.015617,1.529582,99.6193,99.998,80.6543,98.0762,130015690,130015690,0.293683
1,VGG11-CIFAR10,Loc_Col_Col,br0000,0.015565,1.333217,99.59304,99.9976,80.9746,98.24412,99.6508,...,98.623,0.01733,1.201751,99.5835,99.994,81.4453,98.0566,130015690,130015690,0.293683
2,VGG11-CIFAR10,Loc_Col_Loc_Col,br0000,0.017055,1.454488,99.564,99.99432,80.10546,98.05272,99.6213,...,98.4961,0.016637,1.366405,99.599,99.9916,80.9277,98.1445,130015690,130015690,0.293683
3,VGG11-CIFAR10,Col_Col,br0125,0.015547,1.432531,99.5904,99.9972,80.63866,98.3379,99.613,...,98.623,0.01459,1.393544,99.613,99.998,81.1035,98.3984,130015690,130015690,0.325333
4,VGG11-CIFAR10,Loc_Col_Col,br0125,0.016121,1.361178,99.56752,99.998,81.1836,98.0918,99.6333,...,98.623,0.015158,1.420852,99.6333,99.994,80.9473,97.9883,130015690,130015690,0.331301
