In [1]:
# general imports
import torch as th
import torch.nn.functional as F
from torch_geometric.loader import DataLoader as PygDataLoader
from torch.utils.data import SequentialSampler

In [2]:
import torch
from torch.nn import CrossEntropyLoss
from torch_geometric.data import DataLoader
from dataloader import NCaltech101Best
from model import GraphRes, GraphResModified, GraphResSimple, GraphTrans
from tqdm import tqdm

In [3]:
# params
batch_size= 16
lr=         10e-3 # decreases by 10 after each 20 epochs
loss=       th.nn.CrossEntropyLoss
batchsize=  16
K=          10 # data subsampling
nclasses=   100

In [4]:
%env CUDA_LAUNCH_BLOCKING=1

env: CUDA_LAUNCH_BLOCKING=1


In [5]:
# Set device to use for training
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = 'cpu'
print("using device:", device)

def single_run(batch_size, num_samples, trans_heads, dropout_trans, epochs):
    # Initialize dataset
    dataset = NCaltech101Best('./data/storage/', mode='train', num_samples=num_samples)

    # training data
    loader = PygDataLoader(dataset, batch_size=batch_size, shuffle=True)
    print("example datapoint:", dataset.get(0))
    
    # testing data and function
    test_dataset = NCaltech101Best('./data/storage/', mode='test')
    test_loader = PygDataLoader(test_dataset, batch_size=64, shuffle=True)
    def run_model_test(model, loader):
        model.eval()
        correct = 0
        for data in tqdm(loader):
            data = data.to(device)
            with torch.no_grad():
                out = model(data)
                pred = out.max(dim=1)[1]
                correct += pred.eq(data.y).sum().item()
        return correct / len(loader.dataset)

    # Initialize model
    model_input_shape = th.tensor((240, 180) + (3, ), device=device)
    print("INPUT SHAPE:", model_input_shape)
    model = GraphTrans('ncaltech101', model_input_shape, 101, dropout_trans=dropout_trans, heads=trans_heads).to(device)

    # Define loss function and optimizer
    criterion = CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)



    acc = []
    test_acc = []
    # Define training loop
    def train():
        model.train()
        losses = []
        correct = 0
        i = 0
        print("234 iterations in total:")
        progbar = tqdm(loader)
        for data in progbar:

            # inference
            data = data.to(device)
            optimizer.zero_grad()
            output = model(data)

            # weight updates
            y = F.one_hot(data.y, num_classes=101).type(torch.float32)
            loss = criterion(output, y)
            losses.append(loss)
            loss.backward()
            optimizer.step()

            # precision logging
            pred = output.max(dim=1)[1]
            correct += pred.eq(data.y).sum().item()
            i += 1
            # if i % 50 == 0:
            #     print(correct, i*16)
            #     print(correct / (i*16))
            # if i % 20 == 0 and i != 0:
            #     print((i, round(correct / (i*16), 2)), end='->')
            # if i % 100 == 0 and i != 0:   
            #     print("\n", end='')
            precision = round(correct / (i*batch_size), 2)
            progbar.set_description("precision so far is {}".format(precision))
            data = data.to('cpu')
        
        # test, to check for overfitting
        tacc = run_model_test(model, test_loader)
        test_acc.append(tacc)
        
        acc.append(correct / (len(loader) * batch_size))
        print("train accuracy: {}, test accuracy: {}".format(acc[len(acc) - 1], tacc))
        return losses

    def clean_losses(losses):
        clean = []
        for loss in losses:
            ll = [float(l.cpu().detach().numpy()) for l in loss]
            clean.append(ll)
        return clean

    # Run training loop for n epochs
    all_losses = []
    for epoch in range(epochs):
        print("computing epoch", epoch)
        losses = train()
        all_losses.append(losses)
        if epoch > 0 and test_acc[-1] < 0.1:
            return max(test_acc), test_acc, clean_losses(all_losses)
        if epoch > 10 and test_acc[-1] < test_acc[len(test_acc) - 6]:
            return max(test_acc), test_acc, clean_losses(all_losses)
    return max(test_acc), test_acc, clean_losses(all_losses)

using device: cpu


In [6]:
# [batch_size, num_samples, trans_heads, dropout_trans, epochs]
import itertools
batch_size = [16, 64, 128]
num_samples= [100, 1000, 10000]
trans_heads= [3, 1, 5]
dropout_trans = [False, True]
epochs = [10, 50]

combinations = [element for element in itertools.product(batch_size, num_samples, trans_heads, dropout_trans, epochs)]
print(combinations)

[(16, 100, 3, False, 10), (16, 100, 3, False, 50), (16, 100, 3, True, 10), (16, 100, 3, True, 50), (16, 100, 1, False, 10), (16, 100, 1, False, 50), (16, 100, 1, True, 10), (16, 100, 1, True, 50), (16, 100, 5, False, 10), (16, 100, 5, False, 50), (16, 100, 5, True, 10), (16, 100, 5, True, 50), (16, 1000, 3, False, 10), (16, 1000, 3, False, 50), (16, 1000, 3, True, 10), (16, 1000, 3, True, 50), (16, 1000, 1, False, 10), (16, 1000, 1, False, 50), (16, 1000, 1, True, 10), (16, 1000, 1, True, 50), (16, 1000, 5, False, 10), (16, 1000, 5, False, 50), (16, 1000, 5, True, 10), (16, 1000, 5, True, 50), (16, 10000, 3, False, 10), (16, 10000, 3, False, 50), (16, 10000, 3, True, 10), (16, 10000, 3, True, 50), (16, 10000, 1, False, 10), (16, 10000, 1, False, 50), (16, 10000, 1, True, 10), (16, 10000, 1, True, 50), (16, 10000, 5, False, 10), (16, 10000, 5, False, 50), (16, 10000, 5, True, 10), (16, 10000, 5, True, 50), (64, 100, 3, False, 10), (64, 100, 3, False, 50), (64, 100, 3, True, 10), (64, 10

In [7]:
# now we write the algo that runs experiments and keeps track of results, even after interruption
# we need to manage a json file
import json, os
# steps are:
# initialize the json
# figure out which combinations aren't done yet
# pick a random one, test
# update the json
import random
import signal
import sys

def signal_handler(sig, frame):
    print("Interrupt detected, saving results and exiting.")
    with open('results.json', 'w') as f:
        json.dump(results, f)
    sys.exit(0)

signal.signal(signal.SIGINT, signal_handler)

# Initialize the json
if os.path.exists('results.json'):
    with open('results.json', 'r') as f:
        results = json.load(f)
else:
    results = {}

# Loop until all combinations are done
while True:
    # Figure out which combinations aren't done yet
    pending_combinations = [c for c in combinations if str(c) not in results]

    if not pending_combinations:
        print("All combinations have been tested.")
        break

    # Pick a random combination
    random_combination = random.choice(pending_combinations)

    print(f"Testing combination: {random_combination}")
    try:
        max_test_acc, test_acc, all_losses = single_run(*random_combination)
        results[str(random_combination)] = {
            "max_test_acc": max_test_acc,
            "test_acc": test_acc,
            # "all_losses": all_losses
        }
        with open('results.json', 'w') as f:
            json.dump(results, f, indent=4)
    except Exception as e:
        print(type(e))
        # torch.cuda.OutOfMemoryError sometimes
        print(f"Error occurred while testing combination {random_combination}: {e}")

Testing combination: (16, 100, 5, True, 10)
loading classes...


Processing...
Done!
100%|██████████| 101/101 [00:00<00:00, 21980.22it/s]

example datapoint: Data(x=[100, 1], y=8, pos=[100, 3], edge_index=[2, 124], edge_attr=[124, 3])
loading classes...



Processing...
Done!
100%|██████████| 101/101 [00:00<00:00, 21734.38it/s]

INPUT SHAPE: tensor([240, 180,   3])





computing epoch 0
234 iterations in total:


precision so far is 0.24: 100%|██████████| 434/434 [01:00<00:00,  7.17it/s]
100%|██████████| 14/14 [00:05<00:00,  2.72it/s]


train accuracy: 0.2399193548387097, test accuracy: 0.24571428571428572
computing epoch 1
234 iterations in total:


precision so far is 0.27: 100%|██████████| 434/434 [00:53<00:00,  8.11it/s]
100%|██████████| 14/14 [00:04<00:00,  3.20it/s]


train accuracy: 0.2744815668202765, test accuracy: 0.2582857142857143
computing epoch 2
234 iterations in total:


precision so far is 0.29: 100%|██████████| 434/434 [00:55<00:00,  7.86it/s]
100%|██████████| 14/14 [00:04<00:00,  3.14it/s]


train accuracy: 0.28585829493087556, test accuracy: 0.2674285714285714
computing epoch 3
234 iterations in total:


precision so far is 0.3: 100%|██████████| 434/434 [00:56<00:00,  7.64it/s] 
100%|██████████| 14/14 [00:04<00:00,  3.15it/s]


train accuracy: 0.2955069124423963, test accuracy: 0.3017142857142857
computing epoch 4
234 iterations in total:


precision so far is 0.31: 100%|██████████| 434/434 [00:57<00:00,  7.51it/s]
100%|██████████| 14/14 [00:04<00:00,  3.07it/s]


train accuracy: 0.30630760368663595, test accuracy: 0.3097142857142857
computing epoch 5
234 iterations in total:


precision so far is 0.31: 100%|██████████| 434/434 [00:59<00:00,  7.27it/s]
100%|██████████| 14/14 [00:04<00:00,  3.01it/s]


train accuracy: 0.30601958525345624, test accuracy: 0.29828571428571427
computing epoch 6
234 iterations in total:


precision so far is 0.31: 100%|██████████| 434/434 [01:00<00:00,  7.14it/s]
100%|██████████| 14/14 [00:04<00:00,  2.96it/s]


train accuracy: 0.3109158986175115, test accuracy: 0.3062857142857143
computing epoch 7
234 iterations in total:


precision so far is 0.32: 100%|██████████| 434/434 [01:02<00:00,  6.95it/s]
100%|██████████| 14/14 [00:04<00:00,  2.85it/s]


train accuracy: 0.31610023041474655, test accuracy: 0.3028571428571429
computing epoch 8
234 iterations in total:


precision so far is 0.32: 100%|██████████| 434/434 [01:04<00:00,  6.77it/s]
100%|██████████| 14/14 [00:04<00:00,  2.80it/s]


train accuracy: 0.31581221198156684, test accuracy: 0.3142857142857143
computing epoch 9
234 iterations in total:


precision so far is 0.32: 100%|██████████| 434/434 [01:05<00:00,  6.59it/s]
100%|██████████| 14/14 [00:05<00:00,  2.74it/s]


train accuracy: 0.3153801843317972, test accuracy: 0.296
Testing combination: (64, 1000, 1, True, 10)
loading classes...


Processing...
Done!
100%|██████████| 101/101 [00:00<00:00, 15073.47it/s]


example datapoint: Data(x=[1000, 1], y=8, pos=[1000, 3], edge_index=[2, 5851], edge_attr=[5851, 3])
loading classes...


Processing...
Done!
100%|██████████| 101/101 [00:00<00:00, 18412.86it/s]


INPUT SHAPE: tensor([240, 180,   3])
computing epoch 0
234 iterations in total:


precision so far is 0.25: 100%|██████████| 109/109 [04:32<00:00,  2.50s/it]
100%|██████████| 14/14 [00:31<00:00,  2.24s/it]


train accuracy: 0.24569954128440366, test accuracy: 0.272
computing epoch 1
234 iterations in total:


precision so far is 0.27:  11%|█         | 12/109 [36:47<13:15:32, 492.09s/it]

In [None]:
# # # what to do with the losses...
# import numpy as np
# example_accuracies = [
#     np.array([0.256, 0.272, 0.26057142857142856, 0.2662857142857143, 0.272, 0.2674285714285714, 0.2925714285714286, 0.264]),
#     0.5 * np.array([0.256, 0.272, 0.26057142857142856, 0.2662857142857143, 0.272, 0.2674285714285714, 0.2925714285714286, 0.264]),
#     np.array([0.256, 0.272, 0.26057142857142856, 0.2662857142857143, 0.272, 0.2674285714285714, 0.2925714285714286, 0.264]) ** 2
# ]

# import matplotlib as mpl
# import matplotlib.pyplot as plt

# fig, ax = plt.subplots()
# for exacc in example_accuracies:
#     ax.plot(range(len(exacc)), exacc)
# fig.show()

In [None]:
# Initialize the json
import os, json
if os.path.exists('results.json'):
    with open('results.json', 'r') as f:
        results = json.load(f)

def calculate_average_accuracy(param_index, param_value):
    relevant_combinations = [c for c in results.keys() if eval(c)[param_index] == param_value]
    # total_accuracy = sum([results[c]['max_test_acc'] for c in relevant_combinations])
    # return total_accuracy / len(relevant_combinations)
    ret = [results[c]['max_test_acc'] for c in relevant_combinations]

    return round(max(ret), 2), round(sum(ret) / len(ret), 2), len(ret)

computed_combinations = [c for c in results.keys()]
print("combinations computed:", len(computed_combinations), "/75")

parameters = ["batch_size", "num_samples", "trans_heads", "dropout_trans", "epochs"]
values = [batch_size, num_samples, trans_heads, dropout_trans, epochs]

for i, param in enumerate(parameters):
    print(f"(max, avg, n) accuracy for {param}:")
    for value in values[i]:
        avg_accuracy = calculate_average_accuracy(i, value)
        print(f"  {value}: {avg_accuracy}")
    print()

scores = []
for r in results.keys():
    scores.append((results[r]['max_test_acc'], r))
scores.sort(reverse=True)
[print(s) for s in scores]

In [None]:
def clean():
    # Initialize the json
    results = {}
    if os.path.exists('results.json'):
        with open('results.json', 'r') as f:
            results = json.load(f)
    else:
        results = {}
    
    for k, v in results.items():
        if "all_losses" in v:
            v.pop("all_losses")
    with open('results.json', 'w') as f:
            json.dump(results, f, indent=4)

# clean()