In [None]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import wandb
from torchvision import datasets, transforms
from torch.utils.data import TensorDataset, Subset, DataLoader, Dataset

In [None]:
if not torch.backends.mps.is_available():
    if not torch.backends.mps.is_built():
        print("MPS not available because the current PyTorch install was not "
              "built with MPS enabled.")
    else:
        print("MPS not available because the current MacOS version is not 12.3+ "
              "and/or you do not have an MPS-enabled device on this machine.")

else:
    mps_device = torch.device("mps")

device = 'mps' # 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
checkpoints_path = '/Users/sienkadounia/lab/ai-futures/Project/ewdd/'
label_noise_path = '/Users/sienkadounia/lab/ai-futures/Project/label_noise/'
rlcts_path = '/Users/sienkadounia/lab/checkpoints/rlcts/ewdd/'
mwdd_path = '/Users/sienkadounia/lab/ai-futures/Project/mwdd'

In [None]:
batch_size = 128
epochs = 2000
num_classes = 10
lr = 0.1
use_label_noise =False
augmented = False
use_adam_op = False

In [None]:
# Load and preprocess the MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
mnist_train = datasets.MNIST('data/',
                             train=True,
                             download=True,
                             transform=transform)
mnist_test = datasets.MNIST('data/',
                            train=False,
                            download=True,
                            transform=transform)

# Randomly select 40k samples from the training set
indices = torch.randperm(len(mnist_train))[:40000]
train_data = torch.utils.data.Subset(mnist_train, indices)

train_loader = DataLoader(train_data,
                          batch_size=batch_size, 
                          shuffle=True)
test_loader = DataLoader(mnist_test,
                        batch_size = batch_size,
                        shuffle =True)

In [None]:
class FCNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(FCNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

In [None]:
def initialize_with_weight_reuse(larger_model, smaller_model, additional_hidden_size):
    # Copy weights for the first part of the network
    larger_model.fc1.weight.data[:smaller_model.fc1.weight.size(0)] = smaller_model.fc1.weight.data
    larger_model.fc1.bias.data[:smaller_model.fc1.bias.size(0)] = smaller_model.fc1.bias.data
    
    # Initialize additional weights normally
    nn.init.normal_(larger_model.fc1.weight.data[smaller_model.fc1.weight.size(0):], mean=0, std=0.01)
    nn.init.zeros_(larger_model.fc1.bias.data[smaller_model.fc1.bias.size(0):])
    
    # Initialize output layer weights with Glorot-uniform
    nn.init.xavier_uniform_(larger_model.fc2.weight)
    nn.init.zeros_(larger_model.fc2.bias)

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
from torch.optim import lr_scheduler
import math
def lr_lambda(epoch):
    base_lr = 0.1
    return base_lr/math.sqrt(epoch+1)

In [None]:
def train_network(model, train_loader, input_size, hidden_size, output_size):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.95) if not use_adam_op else optim.Adam(model.parameters(), lr=lr)
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda)

    wandb.init(project="SLT of Double Descent", 
    config = {'epochs': epochs,
              'classes': num_classes,
              'learning_rate': lr,
              'use_label_noise': use_label_noise,
              'dataset': "MNIST",
              'architecture': "FCNN",
              'model_width': count_parameters(model),
              'augmented': augmented,
              'adam optimizer': use_adam_op})

    wandb.watch(model)
    config = wandb.config

    test_losses = []
    train_losses = []
    train_accs = []
    test_accs = []
    step = 0
    for epoch in range(epochs):
        model.train()  # Set the model to training mode
        train_loss = 0.0
        train_acc = 0.0
        for _, data in enumerate(train_loader):
            images, labels = data
            images = images.view(-1, input_size)
            images = images.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            batch_loss = criterion(outputs, labels)
            predictions = outputs.argmax(axis=-1)
            train_acc += torch.sum(predictions == labels).item()
            train_loss += batch_loss.item()
            batch_loss.backward()
            optimizer.step()
            step += 1

            wandb.log({'batch_loss': batch_loss.item()}, step=step)

        # Calculate average training loss for the epoch
        train_loss /= len(train_loader)
        train_acc /= 40000
        train_losses.append(train_loss)
        train_accs.append(train_acc)

        # Validation (or test) loop
        model.eval()

        test_loss = 0.0
        test_acc = 0.0
        with torch.no_grad():
            for k, test_data in enumerate(test_loader):
                test_images, test_labels = test_data
                test_images = test_images.view(-1, input_size)
                test_images = test_images.to(device)
                test_labels = test_labels.to(device)

                outputs = model(test_images)
                batch_test_loss = criterion(outputs, test_labels)
                test_loss += batch_test_loss.item()

                predictions = outputs.argmax(axis=-1)
                test_acc += torch.sum(predictions == test_labels).item()

        # Let's calculate average test loss for the epoch
        test_loss /= len(test_loader)
        test_acc /= len(test_loader.dataset.data)
        test_losses.append(test_loss)
        test_accs.append(test_acc)
        torch.save(test_accs, rlcts_path + 'test_accs_'+ str(hidden_size)+'.pt')
        torch.save(test_losses, rlcts_path + 'test_losses_'+ str(hidden_size)+'.pt')

        wandb.log({'epoch': epoch,
                   'loss/train': train_loss,
                   'loss/test': test_loss,
                   'accuracy/train': train_acc,
                   'accuracy/test': test_acc
                   }, step=step)

        # Print or log the training and test losses for each epoch
        print(f'Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

        scheduler.step()
        
        # Implement stopping condition based on classification error or epoch limit
        # Note: Simplified, as the full stopping condition is not detailed here
    wandb.finish()
    torch.save(model.state_dict(), mwdd_path + 'fccnn_'+str(hidden_size)+'.pth')
    print(f"Training completed for hidden size: {hidden_size}")

In [None]:
input_size = 28 * 28  # MNIST images are 28x28
output_size = 10  # 10 classes
hidden_sizes = [2, 4, 8, 16, 32 ,64, 128]  # Example sizes

previous_model = None
for k in range (len(hidden_sizes)):
    hidden_size = hidden_sizes[k]
    model = FCNN(input_size, hidden_size, output_size).to(device)
    '''if previous_model:
        initialize_with_weight_reuse(model, previous_model, hidden_size)
    print('Started traing for model')
    train_network(model, train_loader, input_size, hidden_size, output_size)
    previous_model = model'''
    train_network(model, train_loader, input_size, hidden_size, output_size)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33msienka[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1/2000, Train Loss: 1.9905, Test Loss: 1.8072
Epoch 2/2000, Train Loss: 1.7684, Test Loss: 1.7422
Epoch 3/2000, Train Loss: 1.7239, Test Loss: 1.6965
Epoch 4/2000, Train Loss: 1.6987, Test Loss: 1.6853
Epoch 5/2000, Train Loss: 1.6882, Test Loss: 1.6775
Epoch 6/2000, Train Loss: 1.6825, Test Loss: 1.6703
Epoch 7/2000, Train Loss: 1.6695, Test Loss: 1.6657
Epoch 8/2000, Train Loss: 1.6677, Test Loss: 1.6609
Epoch 9/2000, Train Loss: 1.6630, Test Loss: 1.6535
Epoch 10/2000, Train Loss: 1.6609, Test Loss: 1.6555
Epoch 11/2000, Train Loss: 1.6583, Test Loss: 1.6490
Epoch 12/2000, Train Loss: 1.6549, Test Loss: 1.6537
Epoch 13/2000, Train Loss: 1.6553, Test Loss: 1.6475
Epoch 14/2000, Train Loss: 1.6514, Test Loss: 1.6385
Epoch 15/2000, Train Loss: 1.6510, Test Loss: 1.6411
Epoch 16/2000, Train Loss: 1.6478, Test Loss: 1.6408
Epoch 17/2000, Train Loss: 1.6487, Test Loss: 1.6323
Epoch 18/2000, Train Loss: 1.6468, Test Loss: 1.6487
Epoch 19/2000, Train Loss: 1.6453, Test Loss: 1.6512
Ep

wandb: Network error (ConnectTimeout), entering retry loop.


Epoch 851/2000, Train Loss: 1.5276, Test Loss: 1.5205
Epoch 852/2000, Train Loss: 1.5279, Test Loss: 1.5244
Epoch 853/2000, Train Loss: 1.5281, Test Loss: 1.5316
Epoch 854/2000, Train Loss: 1.5276, Test Loss: 1.5210
Epoch 855/2000, Train Loss: 1.5277, Test Loss: 1.5322
Epoch 856/2000, Train Loss: 1.5281, Test Loss: 1.5244
Epoch 857/2000, Train Loss: 1.5280, Test Loss: 1.5307
Epoch 858/2000, Train Loss: 1.5280, Test Loss: 1.5234
Epoch 859/2000, Train Loss: 1.5278, Test Loss: 1.5249
Epoch 860/2000, Train Loss: 1.5282, Test Loss: 1.5262
Epoch 861/2000, Train Loss: 1.5285, Test Loss: 1.5291
Epoch 862/2000, Train Loss: 1.5290, Test Loss: 1.5273
Epoch 863/2000, Train Loss: 1.5279, Test Loss: 1.5496
Epoch 864/2000, Train Loss: 1.5280, Test Loss: 1.5232
Epoch 865/2000, Train Loss: 1.5278, Test Loss: 1.5294
Epoch 866/2000, Train Loss: 1.5277, Test Loss: 1.5220
Epoch 867/2000, Train Loss: 1.5279, Test Loss: 1.5227
Epoch 868/2000, Train Loss: 1.5281, Test Loss: 1.5288
Epoch 869/2000, Train Loss: 



0,1
accuracy/test,▁▄▆▅▄▅▆██▆▆▇█▇▇▇▇▇▇▇▇▆▇█▇▇▇▇▇▆▇▇▇▇▇█▇▇▆▇
accuracy/train,▁▄▆▄▄▅▇█▇██▇███████████████▇██▇█▇███████
batch_loss,▆▄▆▅▅▁▄▄▄▄▂▂▅▂▆▄▅█▂▅▃▄▄▁▄▅▅▅▄▄▃▃▄▄▅▄▃▃▃▄
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss/test,█▆▅▃▃▂▂▂▂▂▁▂▁▂▁▁▁▁▁▁▁▂▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁
loss/train,█▆▄▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy/test,0.3997
accuracy/train,0.4022
batch_loss,1.23251
epoch,1999.0
loss/test,1.52275
loss/train,1.52469


Training completed for hidden size: 2


Epoch 1/2000, Train Loss: 1.2595, Test Loss: 0.9694
Epoch 2/2000, Train Loss: 0.9204, Test Loss: 0.8633
Epoch 3/2000, Train Loss: 0.8205, Test Loss: 0.7995
Epoch 4/2000, Train Loss: 0.7943, Test Loss: 0.7876
Epoch 5/2000, Train Loss: 0.7610, Test Loss: 0.7596
Epoch 6/2000, Train Loss: 0.7637, Test Loss: 0.7566
Epoch 7/2000, Train Loss: 0.7512, Test Loss: 0.7594
Epoch 8/2000, Train Loss: 0.7426, Test Loss: 0.7743
Epoch 9/2000, Train Loss: 0.7361, Test Loss: 0.7576
Epoch 10/2000, Train Loss: 0.7273, Test Loss: 0.7494
Epoch 11/2000, Train Loss: 0.7251, Test Loss: 0.7651
Epoch 12/2000, Train Loss: 0.7268, Test Loss: 0.7326
Epoch 13/2000, Train Loss: 0.7174, Test Loss: 0.7297
Epoch 14/2000, Train Loss: 0.7140, Test Loss: 0.7403
Epoch 15/2000, Train Loss: 0.7092, Test Loss: 0.7493
Epoch 16/2000, Train Loss: 0.7083, Test Loss: 0.7203
Epoch 17/2000, Train Loss: 0.7092, Test Loss: 0.7599
Epoch 18/2000, Train Loss: 0.7003, Test Loss: 0.7500
Epoch 19/2000, Train Loss: 0.7022, Test Loss: 0.7357
Ep



0,1
accuracy/test,▁▃▅▆▇▇▇▇█▆▇███▇▇▇████▇███▇█████▇████████
accuracy/train,▁▄▆▇▆▇▇▇▇▇██████████████████████████████
batch_loss,▇▃▃▆▆▃▆▂█▆▂▂▅▅▃▂▅▂▄▆▅▅▆▆▆▃▃▁▂▅▂▂▅▃▃▅▄▅▄▄
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss/test,█▅▃▁▁▃▂▂▃▄▂▁▂▂▁▂▂▃▁▃▂▂▂▁▃▂▂▂▁▃▂▃▂▂▂▂▂▂▃▁
loss/train,█▅▃▃▃▂▂▂▂▂▂▂▂▂▁▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy/test,0.803
accuracy/train,0.8088
batch_loss,0.55128
epoch,1999.0
loss/test,0.68451
loss/train,0.62273


Training completed for hidden size: 4


Epoch 1/2000, Train Loss: 0.6898, Test Loss: 0.3830
Epoch 2/2000, Train Loss: 0.3681, Test Loss: 0.3585
Epoch 3/2000, Train Loss: 0.3371, Test Loss: 0.3486
Epoch 4/2000, Train Loss: 0.3260, Test Loss: 0.3245
Epoch 5/2000, Train Loss: 0.3168, Test Loss: 0.3212
Epoch 6/2000, Train Loss: 0.3171, Test Loss: 0.3290
Epoch 7/2000, Train Loss: 0.3097, Test Loss: 0.3194
Epoch 8/2000, Train Loss: 0.3021, Test Loss: 0.3272
Epoch 9/2000, Train Loss: 0.2975, Test Loss: 0.3107
Epoch 10/2000, Train Loss: 0.2935, Test Loss: 0.3096
Epoch 11/2000, Train Loss: 0.2921, Test Loss: 0.3094
Epoch 12/2000, Train Loss: 0.2870, Test Loss: 0.3111
Epoch 13/2000, Train Loss: 0.2872, Test Loss: 0.3049
Epoch 14/2000, Train Loss: 0.2849, Test Loss: 0.3105
Epoch 15/2000, Train Loss: 0.2879, Test Loss: 0.3039
Epoch 16/2000, Train Loss: 0.2800, Test Loss: 0.3082
Epoch 17/2000, Train Loss: 0.2808, Test Loss: 0.2995
Epoch 18/2000, Train Loss: 0.2771, Test Loss: 0.3191
Epoch 19/2000, Train Loss: 0.2747, Test Loss: 0.3028
Ep

wandb: Network error (ConnectionError), entering retry loop.


Epoch 442/2000, Train Loss: 0.2156, Test Loss: 0.2978
Epoch 443/2000, Train Loss: 0.2157, Test Loss: 0.2961
Epoch 444/2000, Train Loss: 0.2155, Test Loss: 0.2943
Epoch 445/2000, Train Loss: 0.2161, Test Loss: 0.2951
Epoch 446/2000, Train Loss: 0.2152, Test Loss: 0.2944
Epoch 447/2000, Train Loss: 0.2156, Test Loss: 0.2990
Epoch 448/2000, Train Loss: 0.2157, Test Loss: 0.2989
Epoch 449/2000, Train Loss: 0.2152, Test Loss: 0.2981
Epoch 450/2000, Train Loss: 0.2159, Test Loss: 0.2947
Epoch 451/2000, Train Loss: 0.2159, Test Loss: 0.2959
Epoch 452/2000, Train Loss: 0.2147, Test Loss: 0.3061
Epoch 453/2000, Train Loss: 0.2167, Test Loss: 0.2987
Epoch 454/2000, Train Loss: 0.2164, Test Loss: 0.2994
Epoch 455/2000, Train Loss: 0.2159, Test Loss: 0.2953
Epoch 456/2000, Train Loss: 0.2160, Test Loss: 0.2948
Epoch 457/2000, Train Loss: 0.2154, Test Loss: 0.3042
Epoch 458/2000, Train Loss: 0.2154, Test Loss: 0.2945
Epoch 459/2000, Train Loss: 0.2146, Test Loss: 0.2964
Epoch 460/2000, Train Loss: 

wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)


0,1
accuracy/test,▁▇█▇▇▅▇▅▆▆▅▇███▇██▅██▅▇█▆▇▆▆▇▆▆▇▅▆▅▄▄▆▆▅
accuracy/train,▁▃▄▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇██▇█████████
batch_loss,▆▃▆█▆▃▄▃▅▄▅▂▂▅▁▆▄▆▅▄▄▃▇▄▂▄▃▅▃▅▅▄▄▃▃▄▂▂▃▃
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss/test,▅▁▁▁▂▃▂▄▃▃▃▃▂▃▃▂▃▃▄▃▄▅▄▅▅▄▅▆▅▅▅█▆▅▆▇▇▆▆▆
loss/train,█▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy/test,0.9153
accuracy/train,0.94303
batch_loss,0.11634
epoch,1999.0
loss/test,0.31979
loss/train,0.19693


Training completed for hidden size: 8


Epoch 1/2000, Train Loss: 0.5556, Test Loss: 0.3415
Epoch 2/2000, Train Loss: 0.3032, Test Loss: 0.2748
Epoch 3/2000, Train Loss: 0.2733, Test Loss: 0.2669
Epoch 4/2000, Train Loss: 0.2506, Test Loss: 0.2606
Epoch 5/2000, Train Loss: 0.2418, Test Loss: 0.2386
Epoch 6/2000, Train Loss: 0.2316, Test Loss: 0.2492
Epoch 7/2000, Train Loss: 0.2252, Test Loss: 0.2351
Epoch 8/2000, Train Loss: 0.2169, Test Loss: 0.2309
Epoch 9/2000, Train Loss: 0.2132, Test Loss: 0.2329
Epoch 10/2000, Train Loss: 0.2067, Test Loss: 0.2350
Epoch 11/2000, Train Loss: 0.2016, Test Loss: 0.2365
Epoch 12/2000, Train Loss: 0.1978, Test Loss: 0.2110
Epoch 13/2000, Train Loss: 0.1935, Test Loss: 0.2140
Epoch 14/2000, Train Loss: 0.1911, Test Loss: 0.2132
Epoch 15/2000, Train Loss: 0.1876, Test Loss: 0.2075
Epoch 16/2000, Train Loss: 0.1857, Test Loss: 0.2107
Epoch 17/2000, Train Loss: 0.1848, Test Loss: 0.2140
Epoch 18/2000, Train Loss: 0.1801, Test Loss: 0.2069
Epoch 19/2000, Train Loss: 0.1779, Test Loss: 0.2069
Ep

wandb: Network error (ConnectionError), entering retry loop.


Epoch 1276/2000, Train Loss: 0.0702, Test Loss: 0.2743
Epoch 1277/2000, Train Loss: 0.0703, Test Loss: 0.2665


[34m[1mwandb[0m: Network error resolved after 0:00:16.631944, resuming normal operation.


Epoch 1278/2000, Train Loss: 0.0703, Test Loss: 0.2682
Epoch 1279/2000, Train Loss: 0.0705, Test Loss: 0.2687
Epoch 1280/2000, Train Loss: 0.0702, Test Loss: 0.2740
Epoch 1281/2000, Train Loss: 0.0706, Test Loss: 0.2669
Epoch 1282/2000, Train Loss: 0.0704, Test Loss: 0.2718
Epoch 1283/2000, Train Loss: 0.0703, Test Loss: 0.2663
Epoch 1284/2000, Train Loss: 0.0703, Test Loss: 0.2663
Epoch 1285/2000, Train Loss: 0.0698, Test Loss: 0.2692
Epoch 1286/2000, Train Loss: 0.0697, Test Loss: 0.2709
Epoch 1287/2000, Train Loss: 0.0696, Test Loss: 0.2750
Epoch 1288/2000, Train Loss: 0.0702, Test Loss: 0.2666
Epoch 1289/2000, Train Loss: 0.0701, Test Loss: 0.2687
Epoch 1290/2000, Train Loss: 0.0699, Test Loss: 0.2679
Epoch 1291/2000, Train Loss: 0.0705, Test Loss: 0.2679
Epoch 1292/2000, Train Loss: 0.0706, Test Loss: 0.2673
Epoch 1293/2000, Train Loss: 0.0698, Test Loss: 0.2704
Epoch 1294/2000, Train Loss: 0.0698, Test Loss: 0.2683
Epoch 1295/2000, Train Loss: 0.0703, Test Loss: 0.2803
Epoch 1296



0,1
accuracy/test,▃███▇█▇▅█▆▇▆▅▆▆▅▄▄▅▅▄▄▃▃▃▃▃▃▃▃▂▂▁▁▁▂▂▁▁▁
accuracy/train,▁▃▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████
batch_loss,▅▄▃▅▃▂█▂▄▄▃▃▃▂▅▁▄▄▁▂▃▂▂▁▃▆▂▂▃▂▂▃▂▃▃▂▁▁▂▃
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss/test,▂▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▆▇▆▇▇▇▇▇▇██▇█
loss/train,█▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy/test,0.9366
accuracy/train,0.9833
batch_loss,0.02567
epoch,1999.0
loss/test,0.29642
loss/train,0.06207


Training completed for hidden size: 16


Epoch 1/2000, Train Loss: 0.5339, Test Loss: 0.3021
Epoch 2/2000, Train Loss: 0.2637, Test Loss: 0.2412
Epoch 3/2000, Train Loss: 0.2182, Test Loss: 0.2000
Epoch 4/2000, Train Loss: 0.1904, Test Loss: 0.1906
Epoch 5/2000, Train Loss: 0.1722, Test Loss: 0.1801
Epoch 6/2000, Train Loss: 0.1583, Test Loss: 0.1633
Epoch 7/2000, Train Loss: 0.1472, Test Loss: 0.1571
Epoch 8/2000, Train Loss: 0.1372, Test Loss: 0.1493
Epoch 9/2000, Train Loss: 0.1314, Test Loss: 0.1494
Epoch 10/2000, Train Loss: 0.1247, Test Loss: 0.1471
Epoch 11/2000, Train Loss: 0.1218, Test Loss: 0.1514
Epoch 12/2000, Train Loss: 0.1170, Test Loss: 0.1383
Epoch 13/2000, Train Loss: 0.1128, Test Loss: 0.1390
Epoch 14/2000, Train Loss: 0.1101, Test Loss: 0.1373
Epoch 15/2000, Train Loss: 0.1066, Test Loss: 0.1292
Epoch 16/2000, Train Loss: 0.1052, Test Loss: 0.1359
Epoch 17/2000, Train Loss: 0.1004, Test Loss: 0.1318
Epoch 18/2000, Train Loss: 0.0973, Test Loss: 0.1261
Epoch 19/2000, Train Loss: 0.0959, Test Loss: 0.1262
Ep

wandb: Network error (ConnectionError), entering retry loop.


Epoch 1369/2000, Train Loss: 0.0040, Test Loss: 0.2063
Epoch 1370/2000, Train Loss: 0.0040, Test Loss: 0.2069
Epoch 1371/2000, Train Loss: 0.0040, Test Loss: 0.2072
Epoch 1372/2000, Train Loss: 0.0040, Test Loss: 0.2070
Epoch 1373/2000, Train Loss: 0.0040, Test Loss: 0.2069
Epoch 1374/2000, Train Loss: 0.0040, Test Loss: 0.2072
Epoch 1375/2000, Train Loss: 0.0040, Test Loss: 0.2071
Epoch 1376/2000, Train Loss: 0.0040, Test Loss: 0.2067
Epoch 1377/2000, Train Loss: 0.0040, Test Loss: 0.2085
Epoch 1378/2000, Train Loss: 0.0040, Test Loss: 0.2070
Epoch 1379/2000, Train Loss: 0.0040, Test Loss: 0.2083
Epoch 1380/2000, Train Loss: 0.0040, Test Loss: 0.2115
Epoch 1381/2000, Train Loss: 0.0040, Test Loss: 0.2077
Epoch 1382/2000, Train Loss: 0.0040, Test Loss: 0.2071
Epoch 1383/2000, Train Loss: 0.0040, Test Loss: 0.2075
Epoch 1384/2000, Train Loss: 0.0040, Test Loss: 0.2071
Epoch 1385/2000, Train Loss: 0.0040, Test Loss: 0.2083
Epoch 1386/2000, Train Loss: 0.0040, Test Loss: 0.2072
Epoch 1387



0,1
accuracy/test,▁▇█▆▆▅▄▂▃▃▂▃▂▂▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▁▂▂▂▂▁▁
accuracy/train,▁▅▆▆▇▇▇█████████████████████████████████
batch_loss,█▅▆▂▂▂▂▁▂▂▁▁▁▁▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss/test,▂▁▁▂▂▂▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇█▇▇▇█████
loss/train,█▄▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy/test,0.9591
accuracy/train,1.0
batch_loss,0.00069
epoch,1999.0
loss/test,0.22159
loss/train,0.00283


Training completed for hidden size: 32


Epoch 1/2000, Train Loss: 0.5367, Test Loss: 0.2860
Epoch 2/2000, Train Loss: 0.2505, Test Loss: 0.2249
Epoch 3/2000, Train Loss: 0.2023, Test Loss: 0.1863
Epoch 4/2000, Train Loss: 0.1711, Test Loss: 0.1666
Epoch 5/2000, Train Loss: 0.1511, Test Loss: 0.1543
Epoch 6/2000, Train Loss: 0.1368, Test Loss: 0.1396
Epoch 7/2000, Train Loss: 0.1267, Test Loss: 0.1339
Epoch 8/2000, Train Loss: 0.1176, Test Loss: 0.1376
Epoch 9/2000, Train Loss: 0.1118, Test Loss: 0.1294
Epoch 10/2000, Train Loss: 0.1049, Test Loss: 0.1194
Epoch 11/2000, Train Loss: 0.0989, Test Loss: 0.1186
Epoch 12/2000, Train Loss: 0.0954, Test Loss: 0.1199
Epoch 13/2000, Train Loss: 0.0902, Test Loss: 0.1184
Epoch 14/2000, Train Loss: 0.0864, Test Loss: 0.1108
Epoch 15/2000, Train Loss: 0.0837, Test Loss: 0.1153
Epoch 16/2000, Train Loss: 0.0803, Test Loss: 0.1062
Epoch 17/2000, Train Loss: 0.0772, Test Loss: 0.1122
Epoch 18/2000, Train Loss: 0.0751, Test Loss: 0.1068
Epoch 19/2000, Train Loss: 0.0729, Test Loss: 0.1082
Ep

wandb: Network error (ConnectTimeout), entering retry loop.


Epoch 1644/2000, Train Loss: 0.0014, Test Loss: 0.1367
Epoch 1645/2000, Train Loss: 0.0014, Test Loss: 0.1367
Epoch 1646/2000, Train Loss: 0.0014, Test Loss: 0.1367
Epoch 1647/2000, Train Loss: 0.0014, Test Loss: 0.1374
Epoch 1648/2000, Train Loss: 0.0014, Test Loss: 0.1368
Epoch 1649/2000, Train Loss: 0.0014, Test Loss: 0.1370
Epoch 1650/2000, Train Loss: 0.0014, Test Loss: 0.1475
Epoch 1651/2000, Train Loss: 0.0014, Test Loss: 0.1368
Epoch 1652/2000, Train Loss: 0.0014, Test Loss: 0.1378
Epoch 1653/2000, Train Loss: 0.0014, Test Loss: 0.1378
Epoch 1654/2000, Train Loss: 0.0014, Test Loss: 0.1429
Epoch 1655/2000, Train Loss: 0.0014, Test Loss: 0.1368
Epoch 1656/2000, Train Loss: 0.0014, Test Loss: 0.1368
Epoch 1657/2000, Train Loss: 0.0014, Test Loss: 0.1378
Epoch 1658/2000, Train Loss: 0.0014, Test Loss: 0.1373
Epoch 1659/2000, Train Loss: 0.0014, Test Loss: 0.1369
Epoch 1660/2000, Train Loss: 0.0014, Test Loss: 0.1368
Epoch 1661/2000, Train Loss: 0.0014, Test Loss: 0.1368
Epoch 1662



0,1
accuracy/test,▁▇██▇▇█▆▇▇▇▇▇▆▇▇▇▆▇▆▇▇▆▆▆▆▇▆▆▆▆▆▆▆▆▆▆▆▆▆
accuracy/train,▁▆▇█████████████████████████████████████
batch_loss,█▇▄▃▂▂▂▂▃▂▂▂▁▂▁▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss/test,▂▁▁▁▂▃▃▃▄▅▄▄▄▅▆▅▅▅▅▆▅▆▆▇▇▆▆▆█▆▇▆▆▆▇▇▇▇▇▇
loss/train,█▄▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy/test,0.9703
accuracy/train,1.0
batch_loss,0.00152
epoch,1999.0
loss/test,0.13928
loss/train,0.0012


Training completed for hidden size: 64


Epoch 1/2000, Train Loss: 0.5117, Test Loss: 0.2641
Epoch 2/2000, Train Loss: 0.2314, Test Loss: 0.1935
Epoch 3/2000, Train Loss: 0.1759, Test Loss: 0.1742
Epoch 4/2000, Train Loss: 0.1506, Test Loss: 0.1458
Epoch 5/2000, Train Loss: 0.1300, Test Loss: 0.1374
Epoch 6/2000, Train Loss: 0.1183, Test Loss: 0.1379
Epoch 7/2000, Train Loss: 0.1089, Test Loss: 0.1259
Epoch 8/2000, Train Loss: 0.1009, Test Loss: 0.1148
Epoch 9/2000, Train Loss: 0.0928, Test Loss: 0.1114
Epoch 10/2000, Train Loss: 0.0872, Test Loss: 0.1114
Epoch 11/2000, Train Loss: 0.0825, Test Loss: 0.1055
Epoch 12/2000, Train Loss: 0.0769, Test Loss: 0.1029
Epoch 13/2000, Train Loss: 0.0731, Test Loss: 0.1014
Epoch 14/2000, Train Loss: 0.0700, Test Loss: 0.0980
Epoch 15/2000, Train Loss: 0.0667, Test Loss: 0.0979
Epoch 16/2000, Train Loss: 0.0630, Test Loss: 0.0953
Epoch 17/2000, Train Loss: 0.0611, Test Loss: 0.0977
Epoch 18/2000, Train Loss: 0.0591, Test Loss: 0.0953
Epoch 19/2000, Train Loss: 0.0556, Test Loss: 0.0957
Ep

KeyboardInterrupt: 

### Fixed learning rate, 4000 epochs

In [None]:
epochs = 400
lr = 0.002
use_adam_op = True
def train_network(model, train_loader, input_size, hidden_size, output_size):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.95) if not use_adam_op else optim.Adam(model.parameters(), lr=lr)
    #scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda)

    wandb.init(project="SLT of Double Descent", 
    config = {'epochs': epochs,
              'classes': num_classes,
              'learning_rate': lr,
              'use_label_noise': use_label_noise,
              'dataset': "MNIST",
              'architecture': "FCNN",
              'model_width': count_parameters(model),
              'augmented': augmented,
              'adam optimizer': use_adam_op})

    wandb.watch(model)
    config = wandb.config

    test_losses = []
    train_losses = []
    train_accs = []
    test_accs = []
    step = 0
    for epoch in range(epochs):
        model.train()  # Set the model to training mode
        train_loss = 0.0
        train_acc = 0.0
        for _, data in enumerate(train_loader):
            images, labels = data
            images = images.view(-1, input_size)
            images = images.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            batch_loss = criterion(outputs, labels)
            predictions = outputs.argmax(axis=-1)
            train_acc += torch.sum(predictions == labels).item()
            train_loss += batch_loss.item()
            batch_loss.backward()
            optimizer.step()
            step += 1

            wandb.log({'batch_loss': batch_loss.item()}, step=step)

        # Calculate average training loss for the epoch
        train_loss /= len(train_loader)
        train_acc /= 40000
        train_losses.append(train_loss)
        train_accs.append(train_acc)

        # Validation (or test) loop
        model.eval()

        test_loss = 0.0
        test_acc = 0.0
        with torch.no_grad():
            for k, test_data in enumerate(test_loader):
                test_images, test_labels = test_data
                test_images = test_images.view(-1, input_size)
                test_images = test_images.to(device)
                test_labels = test_labels.to(device)

                outputs = model(test_images)
                batch_test_loss = criterion(outputs, test_labels)
                test_loss += batch_test_loss.item()

                predictions = outputs.argmax(axis=-1)
                test_acc += torch.sum(predictions == test_labels).item()

        # Let's calculate average test loss for the epoch
        test_loss /= len(test_loader)
        test_acc /= len(test_loader.dataset.data)
        test_losses.append(test_loss)
        test_accs.append(test_acc)
        torch.save(test_accs, rlcts_path + 'test_accs_'+ str(hidden_size)+'.pt')
        torch.save(test_losses, rlcts_path + 'test_losses_'+ str(hidden_size)+'.pt')

        wandb.log({'epoch': epoch,
                   'loss/train': train_loss,
                   'loss/test': test_loss,
                   'accuracy/train': train_acc,
                   'accuracy/test': test_acc
                   }, step=step)

        # Print or log the training and test losses for each epoch
        print(f'Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

        #scheduler.step()
        
        # Implement stopping condition based on classification error or epoch limit
        # Note: Simplified, as the full stopping condition is not detailed here
    wandb.finish()
    torch.save(model.state_dict(), mwdd_path + 'fccnn_'+str(hidden_size)+'_fixed.pth')
    print(f"Training completed for hidden size: {hidden_size}")

In [None]:
input_size = 28 * 28  # MNIST images are 28x28
output_size = 10  # 10 classes
hidden_sizes = [2, 4, 8, 16, 32 ,64, 128, 512]  # Example sizes

previous_model = None
for k in range (len(hidden_sizes)):
    hidden_size = hidden_sizes[k]
    torch.manual_seed(42)
    model = FCNN(input_size, hidden_size, output_size).to(device)
    '''if previous_model:
        initialize_with_weight_reuse(model, previous_model, hidden_size)
    print('Started traing for model')
    train_network(model, train_loader, input_size, hidden_size, output_size)
    previous_model = model'''
    train_network(model, train_loader, input_size, hidden_size, output_size)



0,1
accuracy/test,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy/train,▁███████████████████████████████████████
batch_loss,▂▅▅▁▃▇▄▂▄▄▃▃▃▄▂▂▄▅▇▅▂▄▂▄▂▁▂▃▃▅▆▄▃▃▁▅▇▅█▃
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss/test,▅▄▆▆▂▂▃▅▄▄▄▇▅▃▃▁▆▅▄▄▄▄▅▁▃▆▂▅▂▃▃▅▄▄█▃▆▂▄▄
loss/train,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy/test,0.1135
accuracy/train,0.11238
batch_loss,2.30383
epoch,77.0
loss/test,2.30107
loss/train,2.30137


Epoch 1/400, Train Loss: 2.3235, Test Loss: 2.3033
Epoch 2/400, Train Loss: 2.3021, Test Loss: 2.3010
Epoch 3/400, Train Loss: 2.3013, Test Loss: 2.3005
Epoch 4/400, Train Loss: 2.3012, Test Loss: 2.3012
Epoch 5/400, Train Loss: 2.3013, Test Loss: 2.3011
Epoch 6/400, Train Loss: 2.3013, Test Loss: 2.3011
Epoch 7/400, Train Loss: 2.3013, Test Loss: 2.3010
Epoch 8/400, Train Loss: 2.3013, Test Loss: 2.3008
Epoch 9/400, Train Loss: 2.3013, Test Loss: 2.3011
Epoch 10/400, Train Loss: 2.3013, Test Loss: 2.3008
Epoch 11/400, Train Loss: 2.3013, Test Loss: 2.3009
Epoch 12/400, Train Loss: 2.3013, Test Loss: 2.3009
Epoch 13/400, Train Loss: 2.3013, Test Loss: 2.3010
Epoch 14/400, Train Loss: 2.3013, Test Loss: 2.3012
Epoch 15/400, Train Loss: 2.3012, Test Loss: 2.3009
Epoch 16/400, Train Loss: 2.3013, Test Loss: 2.3010
Epoch 17/400, Train Loss: 2.3012, Test Loss: 2.3011
Epoch 18/400, Train Loss: 2.3013, Test Loss: 2.3010
Epoch 19/400, Train Loss: 2.3013, Test Loss: 2.3010
Epoch 20/400, Train L



0,1
accuracy/test,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy/train,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
batch_loss,▅▇▅▂▅▁▁▃▂▇▃▇▇▃▂▅▅▄▇█▃▄▆▄▃▅▄▃▄▇▄▃▅▄▄▆▁▃▆▃
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss/test,▄▂▄▅▄▂▅▄▄█▄▂▂▂▂█▅▂▄▂▅█▄▃▄▃▃▄▂▃▃▅▅▆▃▃▂▅▁▃
loss/train,▁▄▄▆▄▂▄▂▅▄▆▇▅▃▆▅▅▆▇█▄▄▃▅▆▃▅▇▆▆▄▆▇▄▆▅▆▅▅▄

0,1
accuracy/test,0.1135
accuracy/train,0.11238
batch_loss,2.3041
epoch,399.0
loss/test,2.30098
loss/train,2.30125


Training completed for hidden size: 2


Epoch 1/400, Train Loss: 2.3132, Test Loss: 2.3023
Epoch 2/400, Train Loss: 2.3016, Test Loss: 2.3010
Epoch 3/400, Train Loss: 2.3013, Test Loss: 2.3008
Epoch 4/400, Train Loss: 2.3013, Test Loss: 2.3010
Epoch 5/400, Train Loss: 2.3012, Test Loss: 2.3011
Epoch 6/400, Train Loss: 2.3013, Test Loss: 2.3012
Epoch 7/400, Train Loss: 2.3012, Test Loss: 2.3008
Epoch 8/400, Train Loss: 2.3012, Test Loss: 2.3010
Epoch 9/400, Train Loss: 2.3013, Test Loss: 2.3011
Epoch 10/400, Train Loss: 2.3012, Test Loss: 2.3008
Epoch 11/400, Train Loss: 2.3012, Test Loss: 2.3009
Epoch 12/400, Train Loss: 2.3013, Test Loss: 2.3010
Epoch 13/400, Train Loss: 2.3013, Test Loss: 2.3012
Epoch 14/400, Train Loss: 2.3013, Test Loss: 2.3010
Epoch 15/400, Train Loss: 2.3013, Test Loss: 2.3008
Epoch 16/400, Train Loss: 2.3013, Test Loss: 2.3009
Epoch 17/400, Train Loss: 2.3012, Test Loss: 2.3014
Epoch 18/400, Train Loss: 2.3012, Test Loss: 2.3010
Epoch 19/400, Train Loss: 2.3013, Test Loss: 2.3013
Epoch 20/400, Train L

wandb: Network error (ConnectTimeout), entering retry loop.


Epoch 72/400, Train Loss: 2.3013, Test Loss: 2.3012
Epoch 73/400, Train Loss: 2.3013, Test Loss: 2.3009
Epoch 74/400, Train Loss: 2.3013, Test Loss: 2.3009
Epoch 75/400, Train Loss: 2.3012, Test Loss: 2.3013
Epoch 76/400, Train Loss: 2.3013, Test Loss: 2.3006
Epoch 77/400, Train Loss: 2.3013, Test Loss: 2.3009
Epoch 78/400, Train Loss: 2.3012, Test Loss: 2.3010
Epoch 79/400, Train Loss: 2.3013, Test Loss: 2.3009
Epoch 80/400, Train Loss: 2.3013, Test Loss: 2.3013
Epoch 81/400, Train Loss: 2.3013, Test Loss: 2.3009
Epoch 82/400, Train Loss: 2.3013, Test Loss: 2.3011
Epoch 83/400, Train Loss: 2.3012, Test Loss: 2.3012
Epoch 84/400, Train Loss: 2.3013, Test Loss: 2.3012
Epoch 85/400, Train Loss: 2.3013, Test Loss: 2.3009
Epoch 86/400, Train Loss: 2.3013, Test Loss: 2.3009
Epoch 87/400, Train Loss: 2.3013, Test Loss: 2.3009
Epoch 88/400, Train Loss: 2.3013, Test Loss: 2.3011
Epoch 89/400, Train Loss: 2.3012, Test Loss: 2.3013
Epoch 90/400, Train Loss: 2.3013, Test Loss: 2.3008
Epoch 91/400



0,1
accuracy/test,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy/train,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
batch_loss,▅▇▄▃▃▇▃▇▆▅▆▆▅▅▆▆▅▆▆▇▇▆▆▂▄▃▆▄▄▇▅▁▅▄▅▅▇▅▅█
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss/test,▅▅▅▇▂▄▄▇▇▄▄▄▅▅▅▅█▄█▄▆▆▆▆▅▆▄▄▆▆▁▃▇▆█▂▇▃▇▃
loss/train,▅▄▇▂▁▄▁█▄▃▂▁▅▁▄▃▂▅▃▅▆▂▃▄▄▄▅▃▆▄▃▂▃▅█▃▃▄▄▅

0,1
accuracy/test,0.1135
accuracy/train,0.11238
batch_loss,2.30775
epoch,399.0
loss/test,2.30079
loss/train,2.30129


Training completed for hidden size: 4


Epoch 1/400, Train Loss: 1.0635, Test Loss: 0.7412
Epoch 2/400, Train Loss: 0.6470, Test Loss: 0.5869
Epoch 3/400, Train Loss: 0.5792, Test Loss: 0.5527
Epoch 4/400, Train Loss: 0.5456, Test Loss: 0.5317
Epoch 5/400, Train Loss: 0.5293, Test Loss: 0.5041
Epoch 6/400, Train Loss: 0.5148, Test Loss: 0.5257
Epoch 7/400, Train Loss: 0.5060, Test Loss: 0.4908
Epoch 8/400, Train Loss: 0.4976, Test Loss: 0.4875
Epoch 9/400, Train Loss: 0.4926, Test Loss: 0.4805
Epoch 10/400, Train Loss: 0.4892, Test Loss: 0.4866
Epoch 11/400, Train Loss: 0.4837, Test Loss: 0.4881
Epoch 12/400, Train Loss: 0.4810, Test Loss: 0.4769
Epoch 13/400, Train Loss: 0.4791, Test Loss: 0.4689
Epoch 14/400, Train Loss: 0.4748, Test Loss: 0.4754
Epoch 15/400, Train Loss: 0.4731, Test Loss: 0.4862
Epoch 16/400, Train Loss: 0.4698, Test Loss: 0.4725
Epoch 17/400, Train Loss: 0.4674, Test Loss: 0.4838
Epoch 18/400, Train Loss: 0.4689, Test Loss: 0.4683
Epoch 19/400, Train Loss: 0.4690, Test Loss: 0.4772
Epoch 20/400, Train L

wandb: Network error (ConnectionError), entering retry loop.


Epoch 116/400, Train Loss: 0.4291, Test Loss: 0.4878
Epoch 117/400, Train Loss: 0.4290, Test Loss: 0.4641
Epoch 118/400, Train Loss: 0.4267, Test Loss: 0.4597
Epoch 119/400, Train Loss: 0.4286, Test Loss: 0.4654
Epoch 120/400, Train Loss: 0.4288, Test Loss: 0.4498
Epoch 121/400, Train Loss: 0.4268, Test Loss: 0.4487
Epoch 122/400, Train Loss: 0.4262, Test Loss: 0.4594
Epoch 123/400, Train Loss: 0.4272, Test Loss: 0.4623
Epoch 124/400, Train Loss: 0.4304, Test Loss: 0.4507
Epoch 125/400, Train Loss: 0.4282, Test Loss: 0.4552
Epoch 126/400, Train Loss: 0.4285, Test Loss: 0.4870
Epoch 127/400, Train Loss: 0.4293, Test Loss: 0.4569
Epoch 128/400, Train Loss: 0.4269, Test Loss: 0.4693
Epoch 129/400, Train Loss: 0.4272, Test Loss: 0.4469
Epoch 130/400, Train Loss: 0.4258, Test Loss: 0.4587
Epoch 131/400, Train Loss: 0.4258, Test Loss: 0.4586
Epoch 132/400, Train Loss: 0.4257, Test Loss: 0.4568
Epoch 133/400, Train Loss: 0.4235, Test Loss: 0.4679
Epoch 134/400, Train Loss: 0.4278, Test Loss: 

wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)


0,1
accuracy/test,▁▅▆▇▅▆▅▇▇▇▆▇▇▆█▆▇▄▇▇▇▇▇▇▇█▇▇▇▇▆█▇▇▇▇██▇█
accuracy/train,▁▅▆▆▇▇▇▇▇▇▇▇▇█▇██▇▇▇▇███████████████████
batch_loss,▇▃▄▄▄▁▃▆▁▃▃▆▂▆▁▄█▃▄▆▃▄█▄▄▃▅▅▅▆▂▂▇▅▃█▃▄▃▄
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss/test,█▃▂▂▃▂▂▂▂▂▃▂▁▃▂▂▁▄▂▂▂▁▁▁▁▁▂▁▂▁▂▁▂▂▁▁▁▂▂▁
loss/train,█▅▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy/test,0.8723
accuracy/train,0.88072
batch_loss,0.37154
epoch,399.0
loss/test,0.45131
loss/train,0.41348


Training completed for hidden size: 8


Epoch 1/400, Train Loss: 0.9031, Test Loss: 0.5339
Epoch 2/400, Train Loss: 0.5101, Test Loss: 0.4500
Epoch 3/400, Train Loss: 0.4586, Test Loss: 0.4303
Epoch 4/400, Train Loss: 0.4289, Test Loss: 0.4056
Epoch 5/400, Train Loss: 0.4097, Test Loss: 0.4107
Epoch 6/400, Train Loss: 0.3931, Test Loss: 0.3766
Epoch 7/400, Train Loss: 0.3800, Test Loss: 0.3651
Epoch 8/400, Train Loss: 0.3715, Test Loss: 0.3738
Epoch 9/400, Train Loss: 0.3599, Test Loss: 0.3633
Epoch 10/400, Train Loss: 0.3554, Test Loss: 0.3489
Epoch 11/400, Train Loss: 0.3479, Test Loss: 0.3679
Epoch 12/400, Train Loss: 0.3436, Test Loss: 0.3510
Epoch 13/400, Train Loss: 0.3403, Test Loss: 0.3506
Epoch 14/400, Train Loss: 0.3355, Test Loss: 0.3553
Epoch 15/400, Train Loss: 0.3345, Test Loss: 0.3474
Epoch 16/400, Train Loss: 0.3315, Test Loss: 0.3441
Epoch 17/400, Train Loss: 0.3312, Test Loss: 0.3299
Epoch 18/400, Train Loss: 0.3252, Test Loss: 0.3472
Epoch 19/400, Train Loss: 0.3244, Test Loss: 0.3521
Epoch 20/400, Train L

wandb: Network error (ConnectTimeout), entering retry loop.


Epoch 69/400, Train Loss: 0.2885, Test Loss: 0.3175
Epoch 70/400, Train Loss: 0.2901, Test Loss: 0.3317
Epoch 71/400, Train Loss: 0.2922, Test Loss: 0.3218
Epoch 72/400, Train Loss: 0.2877, Test Loss: 0.3273
Epoch 73/400, Train Loss: 0.2857, Test Loss: 0.3229
Epoch 74/400, Train Loss: 0.2879, Test Loss: 0.3349
Epoch 75/400, Train Loss: 0.2903, Test Loss: 0.3238
Epoch 76/400, Train Loss: 0.2900, Test Loss: 0.3235
Epoch 77/400, Train Loss: 0.2865, Test Loss: 0.3316
Epoch 78/400, Train Loss: 0.2878, Test Loss: 0.3248
Epoch 79/400, Train Loss: 0.2879, Test Loss: 0.3207
Epoch 80/400, Train Loss: 0.2840, Test Loss: 0.3363
Epoch 81/400, Train Loss: 0.2855, Test Loss: 0.3464
Epoch 82/400, Train Loss: 0.2856, Test Loss: 0.3171
Epoch 83/400, Train Loss: 0.2852, Test Loss: 0.3238
Epoch 84/400, Train Loss: 0.2834, Test Loss: 0.3130
Epoch 85/400, Train Loss: 0.2842, Test Loss: 0.3254
Epoch 86/400, Train Loss: 0.2851, Test Loss: 0.3229
Epoch 87/400, Train Loss: 0.2851, Test Loss: 0.3182
Epoch 88/400



0,1
accuracy/test,▁▄▆▆▇▇▇▇█▇▇▇▇▇▇▆▇▇█▆▇▇█▇▆▇▇▇██▇▇▆▇▆▇▇▆▇▇
accuracy/train,▁▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██▇███▇████████
batch_loss,▇█▆▇▅█▃▅▆▄▃▄▄▇▇▁▅▅▂▄▅▅█▅▄▅▂▃▆▇▄▃▂▃▄▅▅▄▄▆
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss/test,█▄▃▂▂▂▂▂▁▂▂▂▂▁▁▂▂▂▂▂▂▂▂▂▂▂▂▃▂▂▂▂▃▂▃▂▂▂▂▂
loss/train,█▅▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy/test,0.9085
accuracy/train,0.9258
batch_loss,0.11748
epoch,399.0
loss/test,0.32227
loss/train,0.25145


Training completed for hidden size: 16


Epoch 1/400, Train Loss: 0.6912, Test Loss: 0.3607
Epoch 2/400, Train Loss: 0.3552, Test Loss: 0.3194
Epoch 3/400, Train Loss: 0.3243, Test Loss: 0.3016
Epoch 4/400, Train Loss: 0.3058, Test Loss: 0.3072
Epoch 5/400, Train Loss: 0.2972, Test Loss: 0.2838
Epoch 6/400, Train Loss: 0.2870, Test Loss: 0.2852
Epoch 7/400, Train Loss: 0.2788, Test Loss: 0.2819
Epoch 8/400, Train Loss: 0.2744, Test Loss: 0.2836
Epoch 9/400, Train Loss: 0.2684, Test Loss: 0.2786
Epoch 10/400, Train Loss: 0.2667, Test Loss: 0.2712
Epoch 11/400, Train Loss: 0.2585, Test Loss: 0.2659
Epoch 12/400, Train Loss: 0.2547, Test Loss: 0.2694
Epoch 13/400, Train Loss: 0.2533, Test Loss: 0.2696
Epoch 14/400, Train Loss: 0.2497, Test Loss: 0.2715
Epoch 15/400, Train Loss: 0.2477, Test Loss: 0.2796
Epoch 16/400, Train Loss: 0.2441, Test Loss: 0.2527
Epoch 17/400, Train Loss: 0.2391, Test Loss: 0.2716
Epoch 18/400, Train Loss: 0.2393, Test Loss: 0.2576
Epoch 19/400, Train Loss: 0.2362, Test Loss: 0.2493
Epoch 20/400, Train L

wandb: Network error (ConnectTimeout), entering retry loop.


Epoch 160/400, Train Loss: 0.1799, Test Loss: 0.2618
Epoch 161/400, Train Loss: 0.1753, Test Loss: 0.2569
Epoch 162/400, Train Loss: 0.1733, Test Loss: 0.2790
Epoch 163/400, Train Loss: 0.1766, Test Loss: 0.2754
Epoch 164/400, Train Loss: 0.1787, Test Loss: 0.2670
Epoch 165/400, Train Loss: 0.1745, Test Loss: 0.2833
Epoch 166/400, Train Loss: 0.1764, Test Loss: 0.2606
Epoch 167/400, Train Loss: 0.1737, Test Loss: 0.2763
Epoch 168/400, Train Loss: 0.1781, Test Loss: 0.2693
Epoch 169/400, Train Loss: 0.1734, Test Loss: 0.2783
Epoch 170/400, Train Loss: 0.1761, Test Loss: 0.2645
Epoch 171/400, Train Loss: 0.1738, Test Loss: 0.2673
Epoch 172/400, Train Loss: 0.1755, Test Loss: 0.2634
Epoch 173/400, Train Loss: 0.1742, Test Loss: 0.2751
Epoch 174/400, Train Loss: 0.1744, Test Loss: 0.2740
Epoch 175/400, Train Loss: 0.1756, Test Loss: 0.2689
Epoch 176/400, Train Loss: 0.1779, Test Loss: 0.2838
Epoch 177/400, Train Loss: 0.1754, Test Loss: 0.2693
Epoch 178/400, Train Loss: 0.1735, Test Loss: 



0,1
accuracy/test,▁▅▅▇▆▇██▇█▇█▆▇▆▇▇▆▇▇▆▆▆▇▇▅▆▇▇▇▆▆▇▆▅▅▆▆▆▆
accuracy/train,▁▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██▇██▇███████
batch_loss,█▇▆▅▄▃▄▂▅▆▃▅▅▄▂▂▄▅▁▄▂▄▅▃▂▃▄▃▄▅▄▄▄▁▇▁▄▆▁▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss/test,█▄▄▂▃▃▁▁▃▂▄▂▃▃▃▃▄▅▄▄▅▆▅▄▅▆▅▇▄▆▅▇▆▆██▇▆█▇
loss/train,█▆▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy/test,0.9257
accuracy/train,0.9507
batch_loss,0.06031
epoch,399.0
loss/test,0.30324
loss/train,0.1606


Training completed for hidden size: 32


Epoch 1/400, Train Loss: 0.5138, Test Loss: 0.3291
Epoch 2/400, Train Loss: 0.2922, Test Loss: 0.2527
Epoch 3/400, Train Loss: 0.2437, Test Loss: 0.2143
Epoch 4/400, Train Loss: 0.2098, Test Loss: 0.2112
Epoch 5/400, Train Loss: 0.1815, Test Loss: 0.1751
Epoch 6/400, Train Loss: 0.1622, Test Loss: 0.1597
Epoch 7/400, Train Loss: 0.1455, Test Loss: 0.1496
Epoch 8/400, Train Loss: 0.1385, Test Loss: 0.1456
Epoch 9/400, Train Loss: 0.1270, Test Loss: 0.1358
Epoch 10/400, Train Loss: 0.1151, Test Loss: 0.1415
Epoch 11/400, Train Loss: 0.1128, Test Loss: 0.1367
Epoch 12/400, Train Loss: 0.1058, Test Loss: 0.1486
Epoch 13/400, Train Loss: 0.1024, Test Loss: 0.1330
Epoch 14/400, Train Loss: 0.0965, Test Loss: 0.1348
Epoch 15/400, Train Loss: 0.0944, Test Loss: 0.1314
Epoch 16/400, Train Loss: 0.0898, Test Loss: 0.1529
Epoch 17/400, Train Loss: 0.0848, Test Loss: 0.1248
Epoch 18/400, Train Loss: 0.0831, Test Loss: 0.1332
Epoch 19/400, Train Loss: 0.0791, Test Loss: 0.1387
Epoch 20/400, Train L



0,1
accuracy/test,▁▅▇▇▅▇▇▆▇▇▆▆▇▇▇▆▇▆▅▆▇▇▇▇▇▇▇▇█▇▆███▇▇██▇▄
accuracy/train,▁▄▅▆▆▆▇▇▇▇▇▇██▇▇█▇█▇█▇███████▇██████████
batch_loss,▇▄█▃▅▂▂▄▂▁▁▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss/test,▂▁▁▁▂▂▂▃▂▃▃▃▃▃▄▅▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆▅▆█
loss/train,█▅▃▃▂▂▂▂▂▂▂▂▁▁▂▁▁▃▁▂▁▂▁▁▂▁▁▁▁▃▁▁▁▁▁▂▁▁▁▁

0,1
accuracy/test,0.9496
accuracy/train,0.99672
batch_loss,0.00638
epoch,399.0
loss/test,0.63521
loss/train,0.01057


Training completed for hidden size: 64


Epoch 1/400, Train Loss: 0.4593, Test Loss: 0.2533
Epoch 2/400, Train Loss: 0.2388, Test Loss: 0.1929
Epoch 3/400, Train Loss: 0.1813, Test Loss: 0.1684
Epoch 4/400, Train Loss: 0.1476, Test Loss: 0.1676
Epoch 5/400, Train Loss: 0.1281, Test Loss: 0.1515
Epoch 6/400, Train Loss: 0.1160, Test Loss: 0.1165
Epoch 7/400, Train Loss: 0.0971, Test Loss: 0.1191
Epoch 8/400, Train Loss: 0.0925, Test Loss: 0.1247
Epoch 9/400, Train Loss: 0.0815, Test Loss: 0.1129
Epoch 10/400, Train Loss: 0.0746, Test Loss: 0.1235
Epoch 11/400, Train Loss: 0.0728, Test Loss: 0.1086
Epoch 12/400, Train Loss: 0.0642, Test Loss: 0.1013
Epoch 13/400, Train Loss: 0.0581, Test Loss: 0.1046
Epoch 14/400, Train Loss: 0.0524, Test Loss: 0.1011
Epoch 15/400, Train Loss: 0.0502, Test Loss: 0.1085
Epoch 16/400, Train Loss: 0.0541, Test Loss: 0.1076
Epoch 17/400, Train Loss: 0.0477, Test Loss: 0.1207
Epoch 18/400, Train Loss: 0.0469, Test Loss: 0.1133
Epoch 19/400, Train Loss: 0.0441, Test Loss: 0.1075
Epoch 20/400, Train L



0,1
accuracy/test,▁▇▆▄▆▅▆▆▇▆▇▆▆▆▆██▇▆████▄█▆▇▇▇▇▇▆▇██████▆
accuracy/train,▁▅▆▆▇▇▇▇▇██▇█▇█████████▇█▇▇████████████▇
batch_loss,▆▃▄▁▃▁▁▂▂▁▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁▁▄▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss/test,▂▁▂▂▂▃▃▃▃▃▃▄▄▄▅▄▄▅▅▄▄▄▄▇▅▆▇▆▆▇▇▇▆▆▇▆▇▆▆█
loss/train,█▄▃▂▂▂▁▂▂▁▁▂▁▂▁▁▁▁▁▁▁▁▁▃▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▂

0,1
accuracy/test,0.9698
accuracy/train,0.99623
batch_loss,0.00023
epoch,399.0
loss/test,0.51566
loss/train,0.02435


Training completed for hidden size: 128


Epoch 1/400, Train Loss: 0.4057, Test Loss: 0.2001
Epoch 2/400, Train Loss: 0.1794, Test Loss: 0.1435
Epoch 3/400, Train Loss: 0.1321, Test Loss: 0.1180
Epoch 4/400, Train Loss: 0.1033, Test Loss: 0.1199
Epoch 5/400, Train Loss: 0.0877, Test Loss: 0.0997
Epoch 6/400, Train Loss: 0.0752, Test Loss: 0.0991
Epoch 7/400, Train Loss: 0.0673, Test Loss: 0.1271
Epoch 8/400, Train Loss: 0.0607, Test Loss: 0.0965
Epoch 9/400, Train Loss: 0.0507, Test Loss: 0.0953
Epoch 10/400, Train Loss: 0.0485, Test Loss: 0.1077
Epoch 11/400, Train Loss: 0.0479, Test Loss: 0.1378
Epoch 12/400, Train Loss: 0.0413, Test Loss: 0.1180
Epoch 13/400, Train Loss: 0.0372, Test Loss: 0.1196
Epoch 14/400, Train Loss: 0.0409, Test Loss: 0.1111
Epoch 15/400, Train Loss: 0.0313, Test Loss: 0.1135
Epoch 16/400, Train Loss: 0.0328, Test Loss: 0.1206
Epoch 17/400, Train Loss: 0.0302, Test Loss: 0.1140
Epoch 18/400, Train Loss: 0.0308, Test Loss: 0.1334
Epoch 19/400, Train Loss: 0.0272, Test Loss: 0.1242
Epoch 20/400, Train L

wandb: Network error (ConnectionError), entering retry loop.


Epoch 344/400, Train Loss: 0.0000, Test Loss: 0.5579
Epoch 345/400, Train Loss: 0.0000, Test Loss: 0.5576
Epoch 346/400, Train Loss: 0.0000, Test Loss: 0.5574
Epoch 347/400, Train Loss: 0.0000, Test Loss: 0.5571
Epoch 348/400, Train Loss: 0.0000, Test Loss: 0.5569
Epoch 349/400, Train Loss: 0.0000, Test Loss: 0.5567
Epoch 350/400, Train Loss: 0.0000, Test Loss: 0.5564
Epoch 351/400, Train Loss: 0.0000, Test Loss: 0.5731
Epoch 352/400, Train Loss: 0.0000, Test Loss: 0.5559
Epoch 353/400, Train Loss: 0.0000, Test Loss: 0.5557
Epoch 354/400, Train Loss: 0.0000, Test Loss: 0.5554
Epoch 355/400, Train Loss: 0.0000, Test Loss: 0.5552
Epoch 356/400, Train Loss: 0.0000, Test Loss: 0.5550
Epoch 357/400, Train Loss: 0.0000, Test Loss: 0.5547
Epoch 358/400, Train Loss: 0.0000, Test Loss: 0.5545
Epoch 359/400, Train Loss: 0.0000, Test Loss: 0.5542
Epoch 360/400, Train Loss: 0.0000, Test Loss: 0.5569
Epoch 361/400, Train Loss: 0.0000, Test Loss: 0.5537
Epoch 362/400, Train Loss: 0.0000, Test Loss: 



0,1
accuracy/test,▁▃▃▅▅▇▄▆▆▆▆▆▅▅▆▅▅▅▆▆▆▅▇▆▆▆▇▇▆█▆▆████████
accuracy/train,▁▅▆▆▇▇▇▇▇▇▇▇▇▇████████████▇█████████████
batch_loss,█▂▃▁▂▁▁▁▁▁▁▁▁▁▁▂▁▂▆▁▁▁▁▁▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
loss/test,▁▁▁▁▂▂▂▃▂▃▄▄▄▅▄▅▄▅▅▅▅▆▅▆▆▆▆▇▇▇▇█▆▆▆▆▆▆▆▆
loss/train,█▄▃▂▃▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▃▂▂▁▂▁▁▁▁▁▁▁▁▁

0,1
accuracy/test,0.982
accuracy/train,1.0
batch_loss,0.0
epoch,399.0
loss/test,0.55091
loss/train,0.0


Training completed for hidden size: 512
