## Main Parsing Function for train_eval_results.txt Files 

In [1]:
import pandas as pd
import re
import os

def parse_training_results(file_path):
    """Parse training results from text file into a pandas DataFrame"""
    
    # Read the file
    with open(file_path, 'r') as f:
        content = f.read()
    
    # Initialize lists to store data
    epochs = []
    times = []
    train_losses = []
    train_acc_top1 = []
    train_acc_top5 = []
    test_losses = []
    test_acc_top1 = []
    test_acc_top5 = []
    
    # Regular expression to match each epoch line
    pattern = r'\[Epoch (\d+)\] Time: ([\d.]+)s.*?Train.*?Loss: ([\d.]+).*?Top1: ([\d.]+)%.*?Top5: ([\d.]+)%.*?Test.*?Loss: ([\d.]+).*?Top1: ([\d.]+)%.*?Top5: ([\d.]+)%'
    
    # Find all matches
    matches = re.findall(pattern, content)
    
    for match in matches:
        epochs.append(int(match[0]))
        times.append(float(match[1]))
        train_losses.append(float(match[2]))
        train_acc_top1.append(float(match[3]))
        train_acc_top5.append(float(match[4]))
        test_losses.append(float(match[5]))
        test_acc_top1.append(float(match[6]))
        test_acc_top5.append(float(match[7]))
    
    # Create DataFrame
    df = pd.DataFrame({
        'epoch': epochs,
        'time': times,
        'train_loss': train_losses,
        'train_accuracy_top1': train_acc_top1,
        'train_accuracy_top5': train_acc_top5,
        'test_loss': test_losses,
        'test_accuracy_top1': test_acc_top1,
        'test_accuracy_top5': test_acc_top5
    })
    
    return df

### I. Loss Plot

In [9]:
Conv2d_Path = "/Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Output/Sep_24_Branching_NoSplit/vgg_1e-5_cos/CIFAR10/Col_Col_Branch/ConvBranch_K9_r000_s42/train_eval_results.txt"
ConvNN_Path = "/Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Output/Sep_24_Branching_NoSplit/vgg_1e-5_cos/CIFAR10/Col_Col_Branch/ConvBranch_K9_r050_s42/train_eval_results.txt"

df_Conv2d = parse_training_results(Conv2d_Path)
df_ConvNN = parse_training_results(ConvNN_Path)

In [10]:
print(df_Conv2d.tail())

    epoch     time  train_loss  train_accuracy_top1  train_accuracy_top5  \
55     56  13.5602    0.005601              99.8022                100.0   
56     57  13.5844    0.004974              99.8342                100.0   
57     58  13.5464    0.002998              99.8961                100.0   
58     59  13.5725    0.004239              99.8561                100.0   
59     60  13.5960    0.005837              99.8182                100.0   

    test_loss  test_accuracy_top1  test_accuracy_top5  
55   2.620213             69.3173             96.5565  
56   2.633557             69.9642             96.7755  
57   2.653949             70.1632             96.6063  
58   2.629530             70.0438             96.7158  
59   2.578960             69.7850             96.9347  


In [11]:
print(df_ConvNN.tail())

    epoch     time  train_loss  train_accuracy_top1  train_accuracy_top5  \
55     56  23.6237    0.131086              95.2925               99.948   
56     57  23.6186    0.119373              95.7361               99.970   
57     58  23.6360    0.120541              95.7101               99.962   
58     59  23.6289    0.118510              95.8520               99.968   
59     60  23.6361    0.109851              96.2096               99.966   

    test_loss  test_accuracy_top1  test_accuracy_top5  
55   1.195560             73.5768             98.3181  
56   1.223960             73.4076             98.1091  
57   1.218058             74.0048             98.2783  
58   1.199236             74.1441             98.1688  
59   1.233663             73.6166             98.0792  


In [12]:
Loss_df = pd.DataFrame({
    'epoch': df_Conv2d['epoch'],
    'Conv2d_Train_Loss': df_Conv2d['train_loss'],
    'Conv2d_Test_Loss': df_Conv2d['test_loss'],
    'ConvNN_Train_Loss': df_ConvNN['train_loss'],
    'ConvNN_Test_Loss': df_ConvNN['test_loss']
})

print(Loss_df.tail())
Loss_df.to_csv("csv/Loss_Comparison.csv", index=False)

    epoch  Conv2d_Train_Loss  Conv2d_Test_Loss  ConvNN_Train_Loss  \
55     56           0.005601          2.620213           0.131086   
56     57           0.004974          2.633557           0.119373   
57     58           0.002998          2.653949           0.120541   
58     59           0.004239          2.629530           0.118510   
59     60           0.005837          2.578960           0.109851   

    ConvNN_Test_Loss  
55          1.195560  
56          1.223960  
57          1.218058  
58          1.199236  
59          1.233663  


### II. Ks Plot

In [26]:
path1 = "/Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Output/Sep_25_Branching_NoSplit_KTest/vgg_1e-5_cos/CIFAR10/LocCol_LocCol_Branch/ConvBranch_"
path2 = "_s42/train_eval_results.txt"


Ks = list(range(1, 13))
Ks1_Ks = {}
Ks2_Ks = {}
Ks3_Ks = {}
KS = {}

ks1 = ["KS1_K1_r025", "KS1_K2_r025", "KS1_K3_r025", "KS1_K4_r025", "KS1_K5_r025", "KS1_K6_r025", "KS1_K7_r025", "KS1_K8_r025", "KS1_K9_r025", "KS1_K10_r025", "KS1_K11_r025", "KS1_K12_r025"]
ks2 = ["KS2_K1_r025", "KS2_K2_r025", "KS2_K3_r025", "KS2_K4_r025", "KS2_K5_r025", "KS2_K6_r025", "KS2_K7_r025", "KS2_K8_r025", "KS2_K9_r025", "KS2_K10_r025", "KS2_K11_r025", "KS2_K12_r025"]
ks3 = ["KS3_K1_r025", "KS3_K2_r025", "KS3_K3_r025", "KS3_K4_r025", "KS3_K5_r025", "KS3_K6_r025", "KS3_K7_r025", "KS3_K8_r025", "KS3_K9_r025", "KS3_K10_r025", "KS3_K11_r025", "KS3_K12_r025"]

ks = ["KS1_r000", "KS2_r000", "KS3_r000"]

for k in ks1:
    full_path = path1 + k + path2
    Ks1_Ks[k] = parse_training_results(full_path)

for k in ks2:
    full_path = path1 + k + path2
    Ks2_Ks[k] = parse_training_results(full_path)

for k in ks3:
    full_path = path1 + k + path2
    Ks3_Ks[k] = parse_training_results(full_path)

for k in ks:
    full_path = path1 + k + path2
    KS[k] = parse_training_results(full_path)


In [27]:
print(len(Ks))
print(len(Ks1_Ks))

12
12


In [28]:
final_dict = {
    "K": Ks, 
    "Ks1_K": [], 
    "Ks2_K": [],
    "Ks3_K": [],
    "Ks1": [],
    "Ks2": [],
    "Ks3": []
}


for k, df in Ks1_Ks.items():
    final_dict["Ks1_K"].append(df['test_accuracy_top1'].iloc[-1])

for k, df in Ks2_Ks.items():
    final_dict["Ks2_K"].append(df['test_accuracy_top1'].iloc[-1])

for k, df in Ks3_Ks.items():
    final_dict["Ks3_K"].append(df['test_accuracy_top1'].iloc[-1])

for i in range(1, 13):
    ks3 = KS["KS3_r000"]
    ks2 = KS["KS2_r000"]
    ks1 = KS["KS1_r000"]
    final_dict["Ks3"].append(ks3['test_accuracy_top1'].iloc[-1])
    final_dict["Ks2"].append(ks2['test_accuracy_top1'].iloc[-1])
    final_dict["Ks1"].append(ks1['test_accuracy_top1'].iloc[-1])

In [29]:
print(final_dict)

{'K': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], 'Ks1_K': [np.float64(57.1557), np.float64(57.8921), np.float64(59.2556), np.float64(60.211), np.float64(60.7385), np.float64(61.246), np.float64(62.4005), np.float64(61.9825), np.float64(61.9029), np.float64(62.4701), np.float64(62.5597), np.float64(61.3654)], 'Ks2_K': [np.float64(66.1027), np.float64(67.5159), np.float64(66.9686), np.float64(69.3173), np.float64(69.8547), np.float64(71.2082), np.float64(72.0243), np.float64(70.9494), np.float64(72.492), np.float64(72.1039), np.float64(72.8006), np.float64(71.5565)], 'Ks3_K': [np.float64(68.6803), np.float64(69.7154), np.float64(71.2082), np.float64(71.9347), np.float64(73.338), np.float64(74.1242), np.float64(74.0744), np.float64(74.2635), np.float64(75.4976), np.float64(74.9701), np.float64(74.9602), np.float64(75.5971)], 'Ks1': [np.float64(50.7663), np.float64(50.7663), np.float64(50.7663), np.float64(50.7663), np.float64(50.7663), np.float64(50.7663), np.float64(50.7663), np.float64(50

In [32]:
ks_df = pd.DataFrame(final_dict)


In [33]:
print(ks_df)
ks_df.to_csv("csv/Ks_Comparison.csv", index=False)

     K    Ks1_K    Ks2_K    Ks3_K      Ks1      Ks2      Ks3
0    1  57.1557  66.1027  68.6803  50.7663  65.3065  69.8945
1    2  57.8921  67.5159  69.7154  50.7663  65.3065  69.8945
2    3  59.2556  66.9686  71.2082  50.7663  65.3065  69.8945
3    4  60.2110  69.3173  71.9347  50.7663  65.3065  69.8945
4    5  60.7385  69.8547  73.3380  50.7663  65.3065  69.8945
5    6  61.2460  71.2082  74.1242  50.7663  65.3065  69.8945
6    7  62.4005  72.0243  74.0744  50.7663  65.3065  69.8945
7    8  61.9825  70.9494  74.2635  50.7663  65.3065  69.8945
8    9  61.9029  72.4920  75.4976  50.7663  65.3065  69.8945
9   10  62.4701  72.1039  74.9701  50.7663  65.3065  69.8945
10  11  62.5597  72.8006  74.9602  50.7663  65.3065  69.8945
11  12  61.3654  71.5565  75.5971  50.7663  65.3065  69.8945


### III. Random + Spatial Sampling Plots

In [3]:

def parse_params_gflops(file_dir):
    """Parse training results from text file into a pandas DataFrame"""

    train_path = os.path.join(file_dir, "train_eval_results.txt")
    args_path = os.path.join(file_dir, "args.txt")
    
    # Read the file
    with open(train_path, 'r') as f:
        train_content = f.readline()

    with open(args_path, 'r') as f:
        args_content = f.readlines()

    total_params = args_content[-2].strip().split(": ")[1]
    trainable_params = args_content[-1].strip().split(": ")[1]

    gflops = re.search(r'GFLOPs: ([\d.]+)', train_content).group(1)

    return {
        "total_params": int(total_params),
        "trainable_params": int(trainable_params),
        "gflops": float(gflops)
    }

In [4]:
N_test_path = test_path = "/Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Output/Sep_29_Branching_NTest/vgg_1e-5_cos/CIFAR10/LocCol_LocCol_Branch/ConvBranch_KS3_K9_r025_"

rand_nums = [16, 64, 144, 256, 400, 576, 784]
spat_nums = [4, 8, 12, 16, 20, 24, 28]
rand_nums = [str(num) for num in rand_nums]
spat_nums = [str(num) for num in spat_nums]

# Random First
rand_N_dict = {}
rand_acc_dict = {}
for n in rand_nums:
    full_path = N_test_path + "rand" + n + "_s42"
    rand_N_dict[n] = parse_params_gflops(full_path)["gflops"]

    # Get Top-1 Accuracy
    df = parse_training_results(full_path + "/train_eval_results.txt")
    rand_acc_dict[n+"_acc"] = df['test_accuracy_top1'].iloc[-1]
    

# Spatial Sampling
spat_N_dict = {}
spat_acc_dict = {}
for n in spat_nums:
    full_path = N_test_path + "spat" + n + "_s42"
    spat_N_dict[n] = parse_params_gflops(full_path)["gflops"]

    # Get Top-1 Accuracy
    df = parse_training_results(full_path + "/train_eval_results.txt")
    spat_acc_dict[n+"_acc"] = df['test_accuracy_top1'].iloc[-1]

# All Sampling
all_N_dict = {}
all_acc_dict = {}
for n in rand_nums:
    all_path = "/Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Output/Sep_25_Branching_NoSplit_KTest/vgg_1e-5_cos/CIFAR10/LocCol_LocCol_Branch/ConvBranch_KS3_K9_r025_s42"
    all_N_dict[n] = parse_params_gflops(all_path)["gflops"]

    # Get Top-1 Accuracy
    df = parse_training_results(all_path + "/train_eval_results.txt")
    all_acc_dict[n+"_acc"] = df['test_accuracy_top1'].iloc[-1]



conv_N_dict = {}
conv_acc_dict = {}
for n in rand_nums:
    conv_path = "/Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Output/Sep_24_Branching_NoSplit/vgg_1e-5_cos/CIFAR10/LocCol_LocCol_Branch/ConvBranch_K9_r000_s42"
    conv_N_dict[n + "_conv"] = parse_params_gflops(conv_path)["gflops"]

    # Get Top-1 Accuracy
    df = parse_training_results(conv_path + "/train_eval_results.txt")
    conv_acc_dict[n+ "_acc"] = df['test_accuracy_top1'].iloc[-1]

print(conv_acc_dict)
print(conv_N_dict)


{'16_acc': np.float64(69.9244), '64_acc': np.float64(69.9244), '144_acc': np.float64(69.9244), '256_acc': np.float64(69.9244), '400_acc': np.float64(69.9244), '576_acc': np.float64(69.9244), '784_acc': np.float64(69.9244)}
{'16_conv': 0.2936832, '64_conv': 0.2936832, '144_conv': 0.2936832, '256_conv': 0.2936832, '400_conv': 0.2936832, '576_conv': 0.2936832, '784_conv': 0.2936832}


In [5]:
gflops_df = pd.DataFrame({
    "N^2": list(rand_N_dict.keys()),
    "N": list(spat_N_dict.keys()),
    "Rand_GFlops": list(rand_N_dict.values()),
    "Rand_Top1": list(rand_acc_dict.values()),
    "Spat_GFlops": list(spat_N_dict.values()),
    "Spat_Top1": list(spat_acc_dict.values()),
    "All_GFlops": list(all_N_dict.values()),
    "All_Top1": list(all_acc_dict.values()), 
    "Conv_Top1": list(conv_acc_dict.values()),
    "Conv_GFlops": list(conv_N_dict.values())
    
})
print(gflops_df)
gflops_df.to_csv("csv/N_Samples_Comparison-Oct6.csv", index=False)

   N^2   N  Rand_GFlops  Rand_Top1  Spat_GFlops  Spat_Top1  All_GFlops  \
0   16   4     0.297320    74.3531     0.297320    69.1580    0.331301   
1   64   8     0.304764    75.3284     0.304765    72.8105    0.331301   
2  144  12     0.311904    75.7564     0.311905    73.9451    0.331301   
3  256  16     0.317989    75.0597     0.317990    74.3929    0.331301   
4  400  20     0.322562    75.9554     0.322562    74.5123    0.331301   
5  576  24     0.324596    75.5374     0.324597    74.7711    0.331301   
6  784  28     0.327001    75.1592     0.327002    74.3332    0.331301   

   All_Top1  Conv_Top1  Conv_GFlops  
0   75.4976    69.9244     0.293683  
1   75.4976    69.9244     0.293683  
2   75.4976    69.9244     0.293683  
3   75.4976    69.9244     0.293683  
4   75.4976    69.9244     0.293683  
5   75.4976    69.9244     0.293683  
6   75.4976    69.9244     0.293683  
