In [1]:
import pandas as pd
import re
import os

def parse_training_results(file_path):
    """Parse training results from text file into a pandas DataFrame"""
    
    # Read the file
    with open(file_path, 'r') as f:
        content = f.read()
    
    # Initialize lists to store data
    epochs = []
    times = []
    train_losses = []
    train_acc_top1 = []
    train_acc_top5 = []
    test_losses = []
    test_acc_top1 = []
    test_acc_top5 = []
    
    # Regular expression to match each epoch line
    pattern = r'\[Epoch (\d+)\] Time: ([\d.]+)s.*?Train.*?Loss: ([\d.]+).*?Top1: ([\d.]+)%.*?Top5: ([\d.]+)%.*?Test.*?Loss: ([\d.]+).*?Top1: ([\d.]+)%.*?Top5: ([\d.]+)%'
    
    # Find all matches
    matches = re.findall(pattern, content)
    
    for match in matches:
        epochs.append(int(match[0]))
        times.append(float(match[1]))
        train_losses.append(float(match[2]))
        train_acc_top1.append(float(match[3]))
        train_acc_top5.append(float(match[4]))
        test_losses.append(float(match[5]))
        test_acc_top1.append(float(match[6]))
        test_acc_top5.append(float(match[7]))
    
    # Create DataFrame
    df = pd.DataFrame({
        'epoch': epochs,
        'time': times,
        'train_loss': train_losses,
        'train_accuracy_top1': train_acc_top1,
        'train_accuracy_top5': train_acc_top5,
        'test_loss': test_losses,
        'test_accuracy_top1': test_acc_top1,
        'test_accuracy_top5': test_acc_top5
    })
    
    return df


def n_last_average_stats(df, last_n_epochs=5):
    """Calculate average stats over the last N epochs"""
    if len(df) < last_n_epochs:
        last_n_epochs = len(df)
    
    return {
        "ave_train_loss": df['train_loss'].tail(last_n_epochs).mean(),
        "ave_test_loss": df['test_loss'].tail(last_n_epochs).mean(),
        "ave_train_accuracy_top1": df['train_accuracy_top1'].tail(last_n_epochs).mean(),
        "ave_train_accuracy_top5": df['train_accuracy_top5'].tail(last_n_epochs).mean(),
        "ave_test_accuracy_top1": df['test_accuracy_top1'].tail(last_n_epochs).mean(),
        "ave_test_accuracy_top5": df['test_accuracy_top5'].tail(last_n_epochs).mean()
    }

def best_stats(df):
    """Calculate best accuracy achieved during training"""
    return {
        "best_train_accuracy_top1": df['train_accuracy_top1'].max(),
        "best_train_accuracy_top5": df['train_accuracy_top5'].max(),
        "best_test_accuracy_top1": df['test_accuracy_top1'].max(),
        "best_test_accuracy_top5": df['test_accuracy_top5'].max()
    }

def parse_params_gflops(file_dir):
    """Parse training results from text file into a pandas DataFrame"""

    train_path = os.path.join(file_dir, "train_eval_results.txt")
    args_path = os.path.join(file_dir, "args.txt")
    
    # Read the file
    with open(train_path, 'r') as f:
        train_content = f.readline()

    with open(args_path, 'r') as f:
        args_content = f.readlines()

    total_params = args_content[-2].strip().split(": ")[1]
    trainable_params = args_content[-1].strip().split(": ")[1]

    gflops = re.search(r'GFLOPs: ([\d.]+)', train_content).group(1)

    return {
        "total_params": int(total_params),
        "trainable_params": int(trainable_params),
        "gflops": float(gflops)
    }



In [None]:
import pandas as pd
import re
import os

def parse_training_results(file_path):
    """Parse training results from text file into a pandas DataFrame"""
    
    # Read the file
    with open(file_path, 'r') as f:
        content = f.read()
    
    # Initialize lists to store data
    epochs = []
    times = []
    train_losses = []
    train_acc_top1 = []
    train_acc_top5 = []
    test_losses = []
    test_acc_top1 = []
    test_acc_top5 = []
    
    # Regular expression to match each epoch line
    pattern = r'\[Epoch (\d+)\] Time: ([\d.]+)s.*?Train.*?Loss: ([\d.]+).*?Top1: ([\d.]+)%.*?Top5: ([\d.]+)%.*?Test.*?Loss: ([\d.]+).*?Top1: ([\d.]+)%.*?Top5: ([\d.]+)%'
    
    # Find all matches
    matches = re.findall(pattern, content)
    
    for match in matches:
        epochs.append(int(match[0]))
        times.append(float(match[1]))
        train_losses.append(float(match[2]))
        train_acc_top1.append(float(match[3]))
        train_acc_top5.append(float(match[4]))
        test_losses.append(float(match[5]))
        test_acc_top1.append(float(match[6]))
        test_acc_top5.append(float(match[7]))
    
    # Create DataFrame
    df = pd.DataFrame({
        'epoch': epochs,
        'time': times,
        'train_loss': train_losses,
        'train_accuracy_top1': train_acc_top1,
        'train_accuracy_top5': train_acc_top5,
        'test_loss': test_losses,
        'test_accuracy_top1': test_acc_top1,
        'test_accuracy_top5': test_acc_top5
    })
    
    return df


def n_last_average_stats(df, last_n_epochs=5):
    """Calculate average stats over the last N epochs"""
    if len(df) < last_n_epochs:
        last_n_epochs = len(df)
    
    return {
        "ave_train_loss": df['train_loss'].tail(last_n_epochs).mean(),
        "ave_test_loss": df['test_loss'].tail(last_n_epochs).mean(),
        "ave_train_accuracy_top1": df['train_accuracy_top1'].tail(last_n_epochs).mean(),
        "ave_train_accuracy_top5": df['train_accuracy_top5'].tail(last_n_epochs).mean(),
        "ave_test_accuracy_top1": df['test_accuracy_top1'].tail(last_n_epochs).mean(),
        "ave_test_accuracy_top5": df['test_accuracy_top5'].tail(last_n_epochs).mean()
    }

def best_stats(df):
    """Calculate best accuracy achieved during training"""
    return {
        "best_train_accuracy_top1": df['train_accuracy_top1'].max(),
        "best_train_accuracy_top5": df['train_accuracy_top5'].max(),
        "best_test_accuracy_top1": df['test_accuracy_top1'].max(),
        "best_test_accuracy_top5": df['test_accuracy_top5'].max()
    }

def parse_params_gflops(file_dir):
    """Parse training results from text file into a pandas DataFrame"""

    train_path = os.path.join(file_dir, "train_eval_results.txt")
    args_path = os.path.join(file_dir, "args.txt")
    
    # Read the file
    with open(train_path, 'r') as f:
        train_content = f.readline()

    with open(args_path, 'r') as f:
        args_content = f.readlines()

    total_params = args_content[-2].strip().split(": ")[1]
    trainable_params = args_content[-1].strip().split(": ")[1]

    gflops = re.search(r'GFLOPs: ([\d.]+)', train_content).group(1)

    return {
        "total_params": int(total_params),
        "trainable_params": int(trainable_params),
        "gflops": float(gflops)
    }

?

In [16]:
import pandas as pd
import re
import os

def parse_training_results(file_path):
    """Parse training results from text file into a pandas DataFrame"""
    
    # Read the file
    with open(file_path, 'r') as f:
        content = f.read()
    
    # Initialize lists to store data
    epochs = []
    times = []
    train_losses = []
    train_acc_top1 = []
    train_acc_top5 = []
    test_losses = []
    test_acc_top1 = []
    test_acc_top5 = []
    
    # Regular expression to match each epoch line
    pattern = r'\[Epoch (\d+)\] Time: ([\d.]+)s.*?Train.*?Loss: ([\d.]+).*?Top1: ([\d.]+)%.*?Top5: ([\d.]+)%.*?Test.*?Loss: ([\d.]+).*?Top1: ([\d.]+)%.*?Top5: ([\d.]+)%'
    
    # Find all matches
    matches = re.findall(pattern, content)
    
    for match in matches:
        epochs.append(int(match[0]))
        times.append(float(match[1]))
        train_losses.append(float(match[2]))
        train_acc_top1.append(float(match[3]))
        train_acc_top5.append(float(match[4]))
        test_losses.append(float(match[5]))
        test_acc_top1.append(float(match[6]))
        test_acc_top5.append(float(match[7]))
    
    # Create DataFrame
    df = pd.DataFrame({
        'epoch': epochs,
        'time': times,
        'train_loss': train_losses,
        'train_accuracy_top1': train_acc_top1,
        'train_accuracy_top5': train_acc_top5,
        'test_loss': test_losses,
        'test_accuracy_top1': test_acc_top1,
        'test_accuracy_top5': test_acc_top5
    })
    
    return df


def n_last_average_stats(df, last_n_epochs=5):
    """Calculate average stats over the last N epochs"""
    if len(df) < last_n_epochs:
        last_n_epochs = len(df)
    
    return {
        "ave_train_loss": df['train_loss'].tail(last_n_epochs).mean(),
        "ave_test_loss": df['test_loss'].tail(last_n_epochs).mean(),
        "ave_train_accuracy_top1": df['train_accuracy_top1'].tail(last_n_epochs).mean(),
        "ave_train_accuracy_top5": df['train_accuracy_top5'].tail(last_n_epochs).mean(),
        "ave_test_accuracy_top1": df['test_accuracy_top1'].tail(last_n_epochs).mean(),
        "ave_test_accuracy_top5": df['test_accuracy_top5'].tail(last_n_epochs).mean()
    }

def best_stats(df):
    """Calculate best accuracy achieved during training"""
    return {
        "best_train_accuracy_top1": df['train_accuracy_top1'].max(),
        "best_train_accuracy_top5": df['train_accuracy_top5'].max(),
        "best_test_accuracy_top1": df['test_accuracy_top1'].max(),
        "best_test_accuracy_top5": df['test_accuracy_top5'].max()
    }

def parse_params_gflops(file_dir):
    """Parse training results from text file into a pandas DataFrame"""

    train_path = os.path.join(file_dir, "train_eval_results.txt")
    args_path = os.path.join(file_dir, "args.txt")
    
    # Read the file
    with open(train_path, 'r') as f:
        train_content = f.readline()

    with open(args_path, 'r') as f:
        args_content = f.readlines()

    total_params = args_content[-2].strip().split(": ")[1]
    trainable_params = args_content[-1].strip().split(": ")[1]

    gflops = re.search(r'GFLOPs: ([\d.]+)', train_content).group(1)

    return {
        "total_params": int(total_params),
        "trainable_params": int(trainable_params),
        "gflops": float(gflops)
    }



In [17]:
import sys 
import os
import torch 
import torch.nn as nn

curr_dir = os.getcwd()
root_dir = os.path.abspath(os.path.join(curr_dir, '..', '..'))

print("Current Directory:", curr_dir)
print("Root Directory:", root_dir)

Current Directory: /Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output
Root Directory: /Users/mingikang/Developer


## K Test

In [18]:
convnn_dir = f"/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/K_test_correct/ViT-Tiny-CIFAR10/ConvNNAttention_K1_s42"

kvt_dir = f"/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/K_test/ViT-Tiny-CIFAR10/KvtAttention_K3_s42"



In [19]:
datasets = ["ViT-Tiny-CIFAR10", "ViT-Tiny-CIFAR100"]
Ks = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "16", "25", "36"]
layer = ["ConvNNAttention", "KvtAttention"]

rows = []

for ds in datasets:
    for l in layer: 
        for k in Ks:
            if l == "ConvNNAttention":
                dir_path = f"/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/K_test_correct/{ds}/ConvNNAttention_K{k}_s42"
            else:
                dir_path = f"/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/K_test/{ds}/KvtAttention_K{k}_s42"

            # print(dir_path)
            
            df = parse_training_results(os.path.join(dir_path, "train_eval_results.txt"))
            avg_stats = n_last_average_stats(df, last_n_epochs=5)
            best_stat = best_stats(df)
            params_gflops = parse_params_gflops(dir_path)

            row = {
                "dataset": ds,
                "layer": l,
                "K": k,
                **avg_stats,
                **best_stat,
                **params_gflops
            }
            rows.append(row)


cifar10_attention_path = "/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/Baseline_Test**/ViT-Tiny-CIFAR10/Attention_NH1_s42/train_eval_results.txt"

cifar10_attention_df = parse_training_results(cifar10_attention_path)
cifar10_attention_avg_stats = n_last_average_stats(cifar10_attention_df, last_n_epochs=5)
cifar10_attention_best_stats = best_stats(cifar10_attention_df)
cifar10_attention_params_gflops = parse_params_gflops(os.path.dirname(cifar10_attention_path))



cifar100_attention_path = "/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/Baseline_Test**/ViT-Tiny-CIFAR100/Attention_NH1_s42/train_eval_results.txt"

cifar100_attention_df = parse_training_results(cifar100_attention_path)
cifar100_attention_avg_stats = n_last_average_stats(cifar100_attention_df, last_n_epochs=5)
cifar100_attention_best_stats = best_stats(cifar100_attention_df)
cifar100_attention_params_gflops = parse_params_gflops(os.path.dirname(cifar100_attention_path))

row = {
    "dataset": "ViT-Tiny-CIFAR10",
    "layer": "Attention",
    "K": "N/A",
    **cifar10_attention_avg_stats,
    **cifar10_attention_best_stats,
    **cifar10_attention_params_gflops
}

rows.append(row)

row = {
    "dataset": "ViT-Tiny-CIFAR100",
    "layer": "Attention",
    "K": "N/A",
    **cifar100_attention_avg_stats,
    **cifar100_attention_best_stats,
    **cifar100_attention_params_gflops
}

rows.append(row)

results_df = pd.DataFrame(rows)
results_df.to_csv(os.path.join(curr_dir, "csv", "K_test.csv"), index=False)
print(results_df.head())

            dataset            layer  K  ave_train_loss  ave_test_loss  \
0  ViT-Tiny-CIFAR10  ConvNNAttention  1        0.484201       0.853181   
1  ViT-Tiny-CIFAR10  ConvNNAttention  2        0.057783       1.677184   
2  ViT-Tiny-CIFAR10  ConvNNAttention  3        0.039318       1.808193   
3  ViT-Tiny-CIFAR10  ConvNNAttention  4        0.037307       1.880055   
4  ViT-Tiny-CIFAR10  ConvNNAttention  5        0.032257       1.890058   

   ave_train_accuracy_top1  ave_train_accuracy_top5  ave_test_accuracy_top1  \
0                 82.45680                 99.53148                73.44532   
1                 97.96586                 99.99880                73.24414   
2                 98.61320                 99.99920                73.35154   
3                 98.70406                 99.99960                72.72852   
4                 98.88218                 99.99880                73.23634   

   ave_test_accuracy_top5  best_train_accuracy_top1  best_train_accuracy_top5  \

## N Test

In [26]:

datasets = ["ViT-Tiny-CIFAR10", "ViT-Tiny-CIFAR100"]
Ns = ["16", "32", "48", "64", "80", "96", "112", "128", "144", "160", "176", "192"]
type = ["random", "spatial"]


rows = []
for ds in datasets:
    for n in Ns: 
        for t in type:
            dir_path = f"/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/N_test_correct/{ds}/ConvNNAttention_K9_N{n}_{t}_s42"

            print(dir_path)
            
            df = parse_training_results(os.path.join(dir_path, "train_eval_results.txt"))
            avg_stats = n_last_average_stats(df, last_n_epochs=5)
            best_stat = best_stats(df)
            params_gflops = parse_params_gflops(dir_path)

            row = {
                "dataset": ds,
                "layer": "ConvNNAttention",
                "N": n,
                "type": t,
                **avg_stats,
                **best_stat,
                **params_gflops
            }
            rows.append(row)

print(rows)



dir_path = f"/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/K_test_correct/ViT-Tiny-CIFAR10/ConvNNAttention_K9_s42"

df = parse_training_results(os.path.join(dir_path, "train_eval_results.txt"))
avg_stats = n_last_average_stats(df, last_n_epochs=5)
best_stat = best_stats(df)
params_gflops = parse_params_gflops(dir_path)

row = {
    "dataset": "ViT-Tiny-CIFAR10",
    "layer": "ConvNNAttention",
    "N": "N/A",
    "type": "all",
    **avg_stats,
    **best_stat,
    **params_gflops
}
rows.append(row)

dir_path = f"/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/K_test_correct/ViT-Tiny-CIFAR100/ConvNNAttention_K9_s42"

df = parse_training_results(os.path.join(dir_path, "train_eval_results.txt"))
avg_stats = n_last_average_stats(df, last_n_epochs=5)
best_stat = best_stats(df)
params_gflops = parse_params_gflops(dir_path)

row = {
    "dataset": "ViT-Tiny-CIFAR100",
    "layer": "ConvNNAttention",
    "N": "N/A",
    "type": "all",
    **avg_stats,
    **best_stat,
    **params_gflops
}
rows.append(row)


cifar10_attention_path = "/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/Baseline_Test**/ViT-Tiny-CIFAR10/Attention_NH1_s42/train_eval_results.txt"

cifar10_attention_df = parse_training_results(cifar10_attention_path)
cifar10_attention_avg_stats = n_last_average_stats(cifar10_attention_df, last_n_epochs=5)
cifar10_attention_best_stats = best_stats(cifar10_attention_df)
cifar10_attention_params_gflops = parse_params_gflops(os.path.dirname(cifar10_attention_path))



cifar100_attention_path = "/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/Baseline_Test**/ViT-Tiny-CIFAR100/Attention_NH1_s42/train_eval_results.txt"

cifar100_attention_df = parse_training_results(cifar100_attention_path)
cifar100_attention_avg_stats = n_last_average_stats(cifar100_attention_df, last_n_epochs=5)
cifar100_attention_best_stats = best_stats(cifar100_attention_df)
cifar100_attention_params_gflops = parse_params_gflops(os.path.dirname(cifar100_attention_path))

row = {
    "dataset": "ViT-Tiny-CIFAR10",
    "layer": "Attention",
    "N": "N/A",
    "type": "N/A",
    **cifar10_attention_avg_stats,
    **cifar10_attention_best_stats,
    **cifar10_attention_params_gflops
}

rows.append(row)

row = {
    "dataset": "ViT-Tiny-CIFAR100",
    "layer": "Attention",
    "N": "N/A",
    "type": "N/A",
    **cifar100_attention_avg_stats,
    **cifar100_attention_best_stats,
    **cifar100_attention_params_gflops
}

rows.append(row)

results_df = pd.DataFrame(rows)
results_df.to_csv(os.path.join(curr_dir, "csv", "N_test.csv"), index=False)

# 

/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/N_test_correct/ViT-Tiny-CIFAR10/ConvNNAttention_K9_N16_random_s42
/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/N_test_correct/ViT-Tiny-CIFAR10/ConvNNAttention_K9_N16_spatial_s42
/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/N_test_correct/ViT-Tiny-CIFAR10/ConvNNAttention_K9_N32_random_s42
/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/N_test_correct/ViT-Tiny-CIFAR10/ConvNNAttention_K9_N32_spatial_s42
/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/N_test_correct/ViT-Tiny-CIFAR10/ConvNNAttention_K9_N48_random_s42
/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/N_test_correct/ViT-Tiny-CIFAR10/ConvNNAttention_K9_N48_spatial_s42
/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/N_test_correct/ViT-Tiny-CIFAR10/Co

FileNotFoundError: [Errno 2] No such file or directory: '/Users/mingikang/Developer/Convolutional-Nearest-Neighbor-Attention/Final_Output/N_test_correct/ViT-Tiny-CIFAR100/ConvNNAttention_K9_N32_spatial_s42/train_eval_results.txt'

In [23]:
results_df

Unnamed: 0,dataset,layer,N,type,ave_train_loss,ave_test_loss,ave_train_accuracy_top1,ave_train_accuracy_top5,ave_test_accuracy_top1,ave_test_accuracy_top5,best_train_accuracy_top1,best_train_accuracy_top5,best_test_accuracy_top1,best_test_accuracy_top5,total_params,trainable_params,gflops
0,ViT-Tiny-CIFAR10,ConvNNAttention,,all,0.028123,1.931252,99.0267,99.9996,73.8828,97.98634,99.0804,100.0,74.3945,98.3203,5500042,5500042,2.333185
1,ViT-Tiny-CIFAR100,ConvNNAttention,,all,0.059221,3.959703,98.06632,99.98088,48.82422,76.52928,98.1481,99.996,49.1699,77.7734,5517412,5517412,2.33322
2,ViT-Tiny-CIFAR10,Attention,,,0.029531,1.605003,98.9895,100.0,77.1211,98.23828,99.0171,100.0,77.6367,98.6816,5479306,5479306,2.507932
3,ViT-Tiny-CIFAR100,Attention,,,0.058465,3.818906,98.10738,99.98604,49.69726,76.93358,98.2541,99.99,50.1855,78.2617,5496676,5496676,2.507966
