In [38]:
import pandas as pd
import re
import os

def parse_training_results(file_path):
    """Parse training results from text file into a pandas DataFrame"""
    
    # Read the file
    with open(file_path, 'r') as f:
        content = f.read()
    
    # Initialize lists to store data
    epochs = []
    times = []
    train_losses = []
    train_acc_top1 = []
    train_acc_top5 = []
    test_losses = []
    test_acc_top1 = []
    test_acc_top5 = []
    
    # Regular expression to match each epoch line
    pattern = r'\[Epoch (\d+)\] Time: ([\d.]+)s.*?Train.*?Loss: ([\d.]+).*?Top1: ([\d.]+)%.*?Top5: ([\d.]+)%.*?Test.*?Loss: ([\d.]+).*?Top1: ([\d.]+)%.*?Top5: ([\d.]+)%'
    
    # Find all matches
    matches = re.findall(pattern, content)
    
    for match in matches:
        epochs.append(int(match[0]))
        times.append(float(match[1]))
        train_losses.append(float(match[2]))
        train_acc_top1.append(float(match[3]))
        train_acc_top5.append(float(match[4]))
        test_losses.append(float(match[5]))
        test_acc_top1.append(float(match[6]))
        test_acc_top5.append(float(match[7]))
    
    # Create DataFrame
    df = pd.DataFrame({
        'epoch': epochs,
        'time': times,
        'train_loss': train_losses,
        'train_accuracy_top1': train_acc_top1,
        'train_accuracy_top5': train_acc_top5,
        'test_loss': test_losses,
        'test_accuracy_top1': test_acc_top1,
        'test_accuracy_top5': test_acc_top5
    })
    
    return df

In [39]:
ex_path = "/Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/K_test/VGG11-CIFAR10/BranchingConvNN_K1_KS1_col_col_br0000_s42/train_eval_results.txt"

df_results = parse_training_results(ex_path)
df_results.head()

Unnamed: 0,epoch,time,train_loss,train_accuracy_top1,train_accuracy_top5,test_loss,test_accuracy_top1,test_accuracy_top5
0,1,5.9061,5.170553,17.3892,69.7425,2.284095,15.498,65.0391
1,2,5.6899,2.027567,22.6535,77.9735,1.882639,27.3828,83.0469
2,3,5.7065,1.919027,26.321,82.1867,1.962263,23.4375,81.25
3,4,5.7335,1.866853,28.7388,83.4291,1.809271,31.3477,85.0488
4,5,5.7562,1.831581,30.1339,84.2379,1.800368,30.6152,85.8301


In [40]:
import sys 
import os
import torch 
import torch.nn as nn

curr_dir = os.getcwd()
root_dir = os.path.abspath(os.path.join(curr_dir, '..', '..'))

print("Current Directory:", curr_dir)
print("Root Directory:", root_dir)

Current Directory: /Users/mingikang/Developer/Convolutional-Nearest-Neighbor/Final_Output/K_test
Root Directory: /Users/mingikang/Developer/Convolutional-Nearest-Neighbor


In [41]:
# Categories 
data_set = ["VGG11-CIFAR10", "VGG11-CIFAR100"]
KS = ["1", "2", "3"]
K = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"]
model_type = ["col_col_br0500_s42", "col_col_br1000_s42"]

save_dir = os.path.join(curr_dir, "csv")
os.makedirs(save_dir, exist_ok=True)

for ds in data_set:
    results = {}
    for ks in KS:
        baseline_model = f"BranchingConvNN_K1_KS{ks}_col_col_br0000_s42"
        baseline_path = os.path.join(curr_dir, ds, baseline_model, "train_eval_results.txt")
        baseline_df = parse_training_results(baseline_path)

        baseline_model = f"Conv2d_Ks{ks}_K0"
        results[baseline_model + "_Train_Loss"] = baseline_df['train_loss']
        results[baseline_model + "_Test_Loss"] = baseline_df['test_loss']
        results[baseline_model + "_Train_Accuracy"] = baseline_df['train_accuracy_top1']
        results[baseline_model + "_Test_Accuracy"] = baseline_df['test_accuracy_top1']

        for k in K:
            for mt in model_type:
                model_name = f"BranchingConvNN_K{k}_KS{ks}_{mt}"
                model_path = os.path.join(curr_dir, ds, model_name, "train_eval_results.txt")
                model_df = parse_training_results(model_path)

                if "br1000" in model_name:
                    model_name = f"ConvNN_Ks0_K{k}"
                elif "br0500" in model_name:
                    model_name = f"Branching_Ks{ks}_K{k}"
                

                results[model_name + "_Train_Loss"] = model_df['train_loss']
                results[model_name + "_Test_Loss"] = model_df['test_loss']
                results[model_name + "_Train_Accuracy"] = model_df['train_accuracy_top1']
                results[model_name + "_Test_Accuracy"] = model_df['test_accuracy_top1']
    results_df = pd.DataFrame(results)
    save_path = os.path.join(curr_dir, "csv", f"{ds}_k_test.csv")
    results['epoch'] = model_df['epoch']
    df = pd.DataFrame(results)
    df = df.reindex(columns=['epoch'] + [col for col in df.columns if col != 'epoch'])
    df.to_csv(save_path, index=False)
    
                
