TODO: test batch size other than 1

# Preparation

In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from IPython.display import clear_output
from sklearn.metrics import r2_score
from BinvoxDataset import CustomDataset
from Networks import ConvNetScalarLabel, count_parameters

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [3]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mchangli_824[0m ([33madditive-parts[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

# Create dataset

In [4]:
def transform(voxel):
    # return torch.unsqueeze(torch.tensor(condense_voxel_array(voxel, 64), dtype = torch.float32), 0)
    return torch.unsqueeze(torch.tensor(voxel, dtype = torch.float32), 0)

In [5]:
import json
configs = json.load(open('config.json', 'r'))
input_folder_path = configs['input_folder_path']
label_file_path = configs['label_file_path']
label_type = configs['label_type']

In [6]:
# Small values of max_count is for debugging and testing. Use max_count = None for full runs for training data.
dataset = CustomDataset(input_folder_path = input_folder_path, label_file_path = label_file_path, transform = transform, max_count = None, ram_limit = 1000, label_type = label_type)
dataset_limited = CustomDataset(input_folder_path = input_folder_path, label_file_path = label_file_path, transform = transform, max_count = 1000, ram_limit = 1000, label_type = label_type)

In [7]:
len(dataset)

22258

# Define Training Logic

In [8]:
def train_epoch(model, training_loader, optimizer, loss_fn):
    cumulative_loss = 0.0
    for i, data in enumerate(training_loader):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)
        labels = torch.squeeze(labels)

        # Zero the gradients
        optimizer.zero_grad()

        # Make predictions
        outputs = model(inputs)

        # Compute loss and its gradients
        # print('label shape', labels.shape)
        loss = loss_fn(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        cumulative_loss += loss.item()
        
        wandb.log({'batch loss': loss.item()})
    return cumulative_loss / len(training_loader), cumulative_loss

In [9]:
def train(config, loss_fn):
    clear_output(wait = True)
    
    # initialize a wandb run
    wandb.init(config = config)

    # copy the config
    config = wandb.config
    
    print('config:', config)

    # get training loader
    training_loader = DataLoader(dataset, batch_size = config.batch_size, shuffle = False)

    # initialize model
    if config.activation_fn == 'ReLU':
        activation_fn = nn.ReLU()
    
    if config.activation_fn == 'Sigmoid':
        activation_fn = nn.Sigmoid()
    
    model = ConvNetScalarLabel(kernel_size = config.kernel_size, activation_fn = activation_fn).to(device)
    
    optimizer = torch.optim.SGD(model.parameters(), lr = config.learning_rate, momentum = 0.9)

    for epoch in range(config.epochs_choice):
        avg_loss_per_batch, cumulative_loss = train_epoch(model, training_loader, optimizer, loss_fn)
        wandb.log({'avg_loss_per_batch': avg_loss_per_batch, 'cumulative_loss': cumulative_loss})
        print(f'Loss for epoch {epoch}: {cumulative_loss}')
    
    return model

In [10]:
def test(config, model, loss_fn):
    # copy the config
    config = wandb.config
    
    # get testing loader
    testing_loader = DataLoader(dataset_limited, batch_size = config.batch_size, shuffle = False)
    
    testing_loss = 0.0
    y_true = []
    y_pred = []
    for i, data in enumerate(testing_loader):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        testing_loss += loss.item()

        y_true.extend(labels.cpu().numpy().tolist())
        y_pred.extend(outputs.cpu().detach().numpy().tolist())
    return testing_loss / len(testing_loader), testing_loss, r2_score(y_true = y_true, y_pred = y_pred)

In [11]:
def evaluate(config = None):
    loss_fn = nn.L1Loss()
    model = train(config, loss_fn)
    avg_loss_per_batch_test, testing_loss, r2 = test(config, model, loss_fn)
    wandb.log({'avg_loss_per_batch_test': avg_loss_per_batch_test, 'testing_loss': testing_loss, 'r2': r2})

# Training settings

In [12]:
sweep_config = {
    'method': 'grid'
    }
metric = {
    'name': 'testing_loss',
    'goal': 'minimize'
    }
sweep_config['metric'] = metric
parameters_dict = {
    'kernel_size': {
        'values': [3, 4, 5]
    },
    'activation_fn': {
        'values': ['ReLU', 'Sigmoid']
    },
    'epochs_choice': {
          'values': [5, 10, 20]
    },
    'learning_rate': {
        'values': [1e-4, 1e-3, 1e-2]
    },
    'batch_size': {
        'values': [4]
    },
}

parameters_dict = {
    'kernel_size': {
        'values': [3]
    },
    'activation_fn': {
        'values': ['ReLU']
    },
    'epochs_choice': {
          'values': [20]
    },
    'learning_rate': {
        'values': [1e-4]
    },
    'batch_size': {
        'values': [4]
    },
}

sweep_config['parameters'] = parameters_dict

# Start

In [13]:
sweep_id = wandb.sweep(sweep_config, project = 'CNN_sweep_scalar')

Create sweep with ID: pv5wbs1h
Sweep URL: https://wandb.ai/additive-parts/CNN_sweep_scalar/sweeps/pv5wbs1h


In [None]:
wandb.agent(sweep_id = sweep_id, function = evaluate)

config: {'activation_fn': 'ReLU', 'batch_size': 4, 'epochs_choice': 20, 'kernel_size': 3, 'learning_rate': 0.0001}
Loading samples 0 through 999
