In [4]:
import pandas as pd
import re
import os
import sys 
import pandas



In [5]:

def parse_training_results(file_path):
    """Parse training results from text file into a pandas DataFrame"""
    
    # Read the file
    with open(file_path, 'r') as f:
        content = f.read()
    
    # Initialize lists to store data
    epochs = []
    times = []
    train_losses = []
    train_acc_top1 = []
    train_acc_top5 = []
    test_losses = []
    test_acc_top1 = []
    test_acc_top5 = []
    
    # Regular expression to match each epoch line
    pattern = r'\[Epoch (\d+)\] Time: ([\d.]+)s.*?Train.*?Loss: ([\d.]+).*?Top1: ([\d.]+)%.*?Top5: ([\d.]+)%.*?Test.*?Loss: ([\d.]+).*?Top1: ([\d.]+)%.*?Top5: ([\d.]+)%'
    
    # Find all matches
    matches = re.findall(pattern, content)
    
    for match in matches:
        epochs.append(int(match[0]))
        times.append(float(match[1]))
        train_losses.append(float(match[2]))
        train_acc_top1.append(float(match[3]))
        train_acc_top5.append(float(match[4]))
        test_losses.append(float(match[5]))
        test_acc_top1.append(float(match[6]))
        test_acc_top5.append(float(match[7]))
    
    # Create DataFrame
    df = pd.DataFrame({
        'epoch': epochs,
        'time': times,
        'train_loss': train_losses,
        'train_accuracy_top1': train_acc_top1,
        'train_accuracy_top5': train_acc_top5,
        'test_loss': test_losses,
        'test_accuracy_top1': test_acc_top1,
        'test_accuracy_top5': test_acc_top5
    })
    
    return df

In [7]:
datasets = ["VGG11-CIFAR10", "VGG11-CIFAR100"]
br = ["0000", "0125", "0250", "0375", "0500", "0625", "0750", "0875", "1000"]
type = ["Col_Col", "Loc_Col_Col", "Loc_Col_Loc_Col"]
base_path = "/Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/BR_test_cos/"

In [10]:
csv_path = os.path.join(base_path, "csv")
os.makedirs(csv_path, exist_ok=True)
for dataset in datasets:
    for b in br:
        for t in type:
            file_path = os.path.join(base_path, dataset, f"BranchingConvNN_All_K9_{t}_br{b}_s42", "train_eval_results.txt")
            if os.path.exists(file_path):
                df = parse_training_results(file_path)
                output_csv = os.path.join(csv_path, f"{dataset}_br{b}_{t}_training_results.csv")
                df.to_csv(output_csv, index=False)
                # print(f"Saved parsed results to {output_csv}")
            else:
                print(f"File not found: {file_path}")


In [14]:
final_df = pd.DataFrame(columns=["Dataset", "Branching Ratio", "Type", "Epoch", "Time", "Train Loss", "Train Acc Top1", "Train Acc Top5", "Test Loss", "Test Acc Top1", "Test Acc Top5"])

rows = []
for dataset in datasets:
    for b in br:
        for t in type:
            file_path = os.path.join(base_path, "csv", f"{dataset}_br{b}_{t}_training_results.csv")
            if os.path.exists(file_path):
                df = pd.read_csv(file_path)
                final_epoch = df.iloc[-1]
                rows.append({
                    "Dataset": dataset,
                    "Branching Ratio": b,
                    "Type": t,
                    "Epoch": final_epoch["epoch"],
                    "Time": final_epoch["time"],
                    "Train Loss": final_epoch["train_loss"],
                    "Train Acc Top1": final_epoch["train_accuracy_top1"],
                    "Train Acc Top5": final_epoch["train_accuracy_top5"],
                    "Test Loss": final_epoch["test_loss"],
                    "Test Acc Top1": final_epoch["test_accuracy_top1"],
                    "Test Acc Top5": final_epoch["test_accuracy_top5"]
                })
            else:
                print(f"File not found: {file_path}")

final_df = pd.DataFrame(rows)
# ...existing code...

In [16]:
final_df.head()

Unnamed: 0,Dataset,Branching Ratio,Type,Epoch,Time,Train Loss,Train Acc Top1,Train Acc Top5,Test Loss,Test Acc Top1,Test Acc Top5
0,VGG11-CIFAR10,0,Col_Col,150.0,6.3309,0.015617,99.6193,99.998,1.529582,80.6543,98.0762
1,VGG11-CIFAR10,0,Loc_Col_Col,150.0,6.3232,0.01733,99.5835,99.994,1.201751,81.4453,98.0566
2,VGG11-CIFAR10,0,Loc_Col_Loc_Col,150.0,6.3258,0.016637,99.599,99.9916,1.366405,80.9277,98.1445
3,VGG11-CIFAR10,125,Col_Col,150.0,15.6875,0.01459,99.613,99.998,1.393544,81.1035,98.3984
4,VGG11-CIFAR10,125,Loc_Col_Col,150.0,15.7074,0.015158,99.6333,99.994,1.420852,80.9473,97.9883


In [19]:

# select rows where Type == "Col_Col"
df_filtered = final_df[final_df["Type"] == "Col_Col"]
df_filtered

Unnamed: 0,Dataset,Branching Ratio,Type,Epoch,Time,Train Loss,Train Acc Top1,Train Acc Top5,Test Loss,Test Acc Top1,Test Acc Top5
0,VGG11-CIFAR10,0,Col_Col,150.0,6.3309,0.015617,99.6193,99.998,1.529582,80.6543,98.0762
3,VGG11-CIFAR10,125,Col_Col,150.0,15.6875,0.01459,99.613,99.998,1.393544,81.1035,98.3984
6,VGG11-CIFAR10,250,Col_Col,150.0,15.6732,0.014356,99.6349,100.0,1.467273,81.2109,98.2715
9,VGG11-CIFAR10,375,Col_Col,150.0,15.6445,0.017884,99.5615,99.994,1.422807,80.957,98.1836
12,VGG11-CIFAR10,500,Col_Col,150.0,15.6934,0.016097,99.5516,99.998,1.424643,81.2695,98.4961
15,VGG11-CIFAR10,625,Col_Col,150.0,15.7277,0.022943,99.3937,99.996,1.403118,80.791,97.8906
18,VGG11-CIFAR10,750,Col_Col,150.0,15.6309,0.023867,99.2742,99.998,1.479538,79.5898,97.9688
21,VGG11-CIFAR10,875,Col_Col,150.0,15.6893,0.031246,99.031,100.0,1.33242,78.1836,98.3789
24,VGG11-CIFAR10,1000,Col_Col,150.0,15.1682,0.081068,97.2983,99.9781,2.694548,54.0039,91.748
27,VGG11-CIFAR100,0,Col_Col,150.0,6.3551,0.062869,98.2402,99.9681,3.939457,44.834,72.0312


In [20]:

df_filtered = final_df[final_df["Type"] == "Loc_Col_Col"]
df_filtered

Unnamed: 0,Dataset,Branching Ratio,Type,Epoch,Time,Train Loss,Train Acc Top1,Train Acc Top5,Test Loss,Test Acc Top1,Test Acc Top5
1,VGG11-CIFAR10,0,Loc_Col_Col,150.0,6.3232,0.01733,99.5835,99.994,1.201751,81.4453,98.0566
4,VGG11-CIFAR10,125,Loc_Col_Col,150.0,15.7074,0.015158,99.6333,99.994,1.420852,80.9473,97.9883
7,VGG11-CIFAR10,250,Loc_Col_Col,150.0,15.6646,0.015725,99.589,99.998,1.447156,80.918,98.3203
10,VGG11-CIFAR10,375,Loc_Col_Col,150.0,15.7052,0.017055,99.5615,99.996,1.386375,82.0898,98.3496
13,VGG11-CIFAR10,500,Loc_Col_Col,150.0,15.6961,0.017969,99.4938,99.996,1.213133,81.7773,98.3301
16,VGG11-CIFAR10,625,Loc_Col_Col,150.0,15.7698,0.021617,99.3638,100.0,1.341861,80.8105,98.2227
19,VGG11-CIFAR10,750,Loc_Col_Col,150.0,15.6396,0.022762,99.2981,99.998,1.283942,80.2637,98.6719
22,VGG11-CIFAR10,875,Loc_Col_Col,150.0,15.6914,0.032157,99.0051,99.998,1.22533,78.7012,98.2129
25,VGG11-CIFAR10,1000,Loc_Col_Col,150.0,15.2447,0.079695,97.3944,99.9841,2.388611,57.1582,92.8125
28,VGG11-CIFAR100,0,Loc_Col_Col,150.0,6.3416,0.063973,98.176,99.9502,3.840885,44.3359,71.4941


In [21]:
df_filtered = final_df[final_df["Type"] == "Loc_Col_Loc_Col"]
df_filtered

Unnamed: 0,Dataset,Branching Ratio,Type,Epoch,Time,Train Loss,Train Acc Top1,Train Acc Top5,Test Loss,Test Acc Top1,Test Acc Top5
2,VGG11-CIFAR10,0,Loc_Col_Loc_Col,150.0,6.3258,0.016637,99.599,99.9916,1.366405,80.9277,98.1445
5,VGG11-CIFAR10,125,Loc_Col_Loc_Col,150.0,15.9568,0.016022,99.6034,99.992,1.393333,81.3477,98.4766
8,VGG11-CIFAR10,250,Loc_Col_Loc_Col,150.0,15.9365,0.015389,99.5607,99.996,1.265429,81.5137,98.3984
11,VGG11-CIFAR10,375,Loc_Col_Loc_Col,150.0,15.9642,0.016693,99.5596,100.0,1.52213,80.7227,98.2715
14,VGG11-CIFAR10,500,Loc_Col_Loc_Col,150.0,16.0069,0.019187,99.4296,99.996,1.218572,82.3438,98.3105
17,VGG11-CIFAR10,625,Loc_Col_Loc_Col,150.0,16.1549,0.022437,99.32,99.996,1.244447,81.582,98.6523
20,VGG11-CIFAR10,750,Loc_Col_Loc_Col,150.0,16.0433,0.0259,99.2622,99.998,1.232703,81.0645,98.3789
23,VGG11-CIFAR10,875,Loc_Col_Loc_Col,150.0,16.0676,0.033402,98.9951,99.996,1.248113,78.0469,98.3594
26,VGG11-CIFAR10,1000,Loc_Col_Loc_Col,150.0,15.5569,0.075058,97.4661,99.9821,1.916243,63.75,95.4688
29,VGG11-CIFAR100,0,Loc_Col_Loc_Col,150.0,6.3355,0.067733,98.1844,99.9641,3.898895,44.4141,71.5039
