In [1]:
import pandas as pd
import re
import os

def parse_training_results(file_path):
    """Parse training results from text file into a pandas DataFrame"""
    
    # Read the file
    with open(file_path, 'r') as f:
        content = f.read()
    
    # Initialize lists to store data
    epochs = []
    times = []
    train_losses = []
    train_acc_top1 = []
    train_acc_top5 = []
    test_losses = []
    test_acc_top1 = []
    test_acc_top5 = []
    
    # Regular expression to match each epoch line
    pattern = r'\[Epoch (\d+)\] Time: ([\d.]+)s.*?Train.*?Loss: ([\d.]+).*?Top1: ([\d.]+)%.*?Top5: ([\d.]+)%.*?Test.*?Loss: ([\d.]+).*?Top1: ([\d.]+)%.*?Top5: ([\d.]+)%'
    
    # Find all matches
    matches = re.findall(pattern, content)
    
    for match in matches:
        epochs.append(int(match[0]))
        times.append(float(match[1]))
        train_losses.append(float(match[2]))
        train_acc_top1.append(float(match[3]))
        train_acc_top5.append(float(match[4]))
        test_losses.append(float(match[5]))
        test_acc_top1.append(float(match[6]))
        test_acc_top5.append(float(match[7]))
    
    # Create DataFrame
    df = pd.DataFrame({
        'epoch': epochs,
        'time': times,
        'train_loss': train_losses,
        'train_accuracy_top1': train_acc_top1,
        'train_accuracy_top5': train_acc_top5,
        'test_loss': test_losses,
        'test_accuracy_top1': test_acc_top1,
        'test_accuracy_top5': test_acc_top5
    })
    
    return df


def n_last_average_stats(df, last_n_epochs=5):
    """Calculate average stats over the last N epochs"""
    if len(df) < last_n_epochs:
        last_n_epochs = len(df)
    
    return {
        "ave_train_loss": df['train_loss'].tail(last_n_epochs).mean(),
        "ave_test_loss": df['test_loss'].tail(last_n_epochs).mean(),
        "ave_train_accuracy_top1": df['train_accuracy_top1'].tail(last_n_epochs).mean(),
        "ave_train_accuracy_top5": df['train_accuracy_top5'].tail(last_n_epochs).mean(),
        "ave_test_accuracy_top1": df['test_accuracy_top1'].tail(last_n_epochs).mean(),
        "ave_test_accuracy_top5": df['test_accuracy_top5'].tail(last_n_epochs).mean()
    }

def best_stats(df):
    """Calculate best accuracy achieved during training"""
    return {
        "best_train_accuracy_top1": df['train_accuracy_top1'].max(),
        "best_train_accuracy_top5": df['train_accuracy_top5'].max(),
        "best_test_accuracy_top1": df['test_accuracy_top1'].max(),
        "best_test_accuracy_top5": df['test_accuracy_top5'].max()
    }


In [3]:
import sys 
import os
import torch 
import torch.nn as nn

curr_dir = os.getcwd()
root_dir = os.path.abspath(os.path.join(curr_dir, '..', '..'))

print("Current Directory:", curr_dir)
print("Root Directory:", root_dir)

Current Directory: /Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/K_test_pt4
Root Directory: /Users/mingikang/Developer/Convolutional-Nearest-Neighbor


In [5]:
# Categories 
data_set = ["VGG11-CIFAR10", "VGG11-CIFAR100"]
KS = ["1", "2", "3"]
K = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"]
model_type = ["col_col_br0500_s0"]

save_dir = os.path.join(curr_dir, "csv")
os.makedirs(save_dir, exist_ok=True)

for ds in data_set:
    rows = []  # Build a list of row dicts instead
    
    for ks in KS:
        for k in K:
            for mt in model_type:
                ks_temp = ks 
                if mt == "col_col_br1000_s42":
                    ks_temp = "0"
                else: 
                    ks_temp = ks

                model_name = f"BranchingConvNN_K{k}_KS{ks_temp}_{mt}"
                model_path = os.path.join(curr_dir, ds, model_name, "train_eval_results.txt")
                
                try:
                    model_df = parse_training_results(model_path)
                    best_stats_dict = best_stats(model_df)
                    n_last_average_stats_dict = n_last_average_stats(model_df)

                    display_name = ""
                    if "br1000" in model_name:
                        display_name = f"ConvNN_Ks0_K{k}"
                    elif "br0500" in model_name:
                        display_name = f"Branch"
                    
                    # Add one row dict per model
                    rows.append({
                        "Model": display_name,
                        "Dataset": ds,
                        "K": k,
                        "KS": ks_temp,
                        "BranchRatio": mt,
                        "BestAcc": best_stats_dict['best_test_accuracy_top1'],
                        "Last5AvgAcc": n_last_average_stats_dict["ave_test_accuracy_top1"]
                    })
                except FileNotFoundError:
                    print(f"File not found: {model_path}")

    Conv2d_k1 = f"/Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/K_test_pt4/{ds}/BranchingConvNN_K0_KS1_col_col_br0000_s0/train_eval_results.txt"

    Conv2d_k1_df = parse_training_results(Conv2d_k1)

    Conv2d_k2 = f"/Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/K_test_pt4/{ds}/BranchingConvNN_K0_KS2_col_col_br0000_s0/train_eval_results.txt"
    Conv2d_k2_df = parse_training_results(Conv2d_k2)

    Conv2d_k3 = f"/Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/K_test_pt4/{ds}/BranchingConvNN_K0_KS3_col_col_br0000_s0/train_eval_results.txt"
    Conv2d_k3_df = parse_training_results(Conv2d_k3)

    k1_ave = n_last_average_stats(Conv2d_k1_df)
    k2_ave = n_last_average_stats(Conv2d_k2_df)
    k3_ave = n_last_average_stats(Conv2d_k3_df)

    k1_best = best_stats(Conv2d_k1_df)
    k2_best = best_stats(Conv2d_k2_df)
    k3_best = best_stats(Conv2d_k3_df)
    
    rows.append({
        "Model": "Baseline", 
        "Dataset": ds,
        "K": "0", 
        "KS": "1", 
        "BranchRatio": 0.0, 
        "BestAcc": k1_best["best_test_accuracy_top1"],
        "Last5AvgAcc": k1_ave["ave_test_accuracy_top1"]
    })
    rows.append({
        "Model": "Baseline", 
        "Dataset": ds,
        "K": "0", 
        "KS": "2", 
        "BranchRatio": 0.0, 
        "BestAcc": k2_best["best_test_accuracy_top1"],
        "Last5AvgAcc": k2_ave["ave_test_accuracy_top1"]
    })
    rows.append({
        "Model": "Baseline", 
        "Dataset": ds,
        "K": "0", 
        "KS": "3", 
        "BranchRatio": 0.0, 
        "BestAcc": k3_best["best_test_accuracy_top1"],
        "Last5AvgAcc": k3_ave["ave_test_accuracy_top1"]
    })
    # Create DataFrame from list of rows
    results_df = pd.DataFrame(rows)
    save_path = os.path.join(curr_dir, "csv", f"{ds}_k_test.csv")
    results_df.to_csv(save_path, index=False)
    print(f"Saved: {save_path}")
    print(results_df.head())

Saved: /Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/K_test_pt4/csv/VGG11-CIFAR10_k_test.csv
    Model        Dataset  K KS        BranchRatio  BestAcc  Last5AvgAcc
0  Branch  VGG11-CIFAR10  1  1  col_col_br0500_s0  54.6680     52.63868
1  Branch  VGG11-CIFAR10  2  1  col_col_br0500_s0  56.0938     53.99220
2  Branch  VGG11-CIFAR10  3  1  col_col_br0500_s0  57.6074     56.75194
3  Branch  VGG11-CIFAR10  4  1  col_col_br0500_s0  58.1934     57.21094
4  Branch  VGG11-CIFAR10  5  1  col_col_br0500_s0  58.7793     57.78124
Saved: /Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/K_test_pt4/csv/VGG11-CIFAR100_k_test.csv
    Model         Dataset  K KS        BranchRatio  BestAcc  Last5AvgAcc
0  Branch  VGG11-CIFAR100  1  1  col_col_br0500_s0  30.8594     28.71874
1  Branch  VGG11-CIFAR100  2  1  col_col_br0500_s0  31.1328     29.72072
2  Branch  VGG11-CIFAR100  3  1  col_col_br0500_s0  33.1250     31.56446
3  Branch  VGG11-CIFAR100  4  1  col_co