# Version 2: ConvNet + Cross Entropy

In [1]:
import torch, sys
import argparse
import torch.nn as nn
import torch.nn.functional as F

import torch.utils.data.dataset
from torch.utils.data import DataLoader
from tqdm import tqdm

import wandb
from torch.utils.data import Dataset, DataLoader, Sampler
from training_config import doodles, reals, doodle_size, real_size, NUM_CLASSES
from utils import *  # bad practice, nvm
from losses import compute_contrastive_loss_from_feats

from torchvision import transforms
import random
import numpy as np
import cv2

In [2]:
wandb.init(project="cs4243-project", entity="rish-16")

[34m[1mwandb[0m: Currently logged in as: [33mrish-16[0m (use `wandb login --relogin` to force relogin)


In [3]:
def combined_dataset(datasets, size):
    combined_dataset = {}
    for name, dataset in datasets.items():
        for class_name, class_data in dataset.items():
            if class_name not in combined_dataset:
                combined_dataset[class_name] = []
            # resize data so they can be stacked
            resized = []
            for data in class_data:
                resized.append(cv2.resize(data, (size, size), interpolation=cv2.INTER_AREA))
            resized = np.stack(resized, axis=0)
            combined_dataset[class_name].append(resized)
    for class_name, lst_datasets in combined_dataset.items():
        combined_dataset[class_name] = np.concatenate(lst_datasets, axis=0)
    return combined_dataset


class ImageDataset(Dataset):
    DATASET_DIR = {True: 'dataset/dataset_train.npy', False: 'dataset/dataset_test.npy'}

    def __init__(self, doodles_list, real_list, doodle_size, real_size, train: bool):
        super(ImageDataset, self).__init__()

        dataset = np.load(self.DATASET_DIR[train], allow_pickle=True)[()]

        doodle_datasets = {name: data for name, data in dataset.items() if name in doodles_list}
        real_datasets = {name: data for name, data in dataset.items() if name in real_list}
        self.doodle_dict = combined_dataset(doodle_datasets, doodle_size)
        self.real_dict = combined_dataset(real_datasets, real_size)

        # sanity check
        assert set(self.doodle_dict.keys()) == set(self.real_dict.keys()), \
            f'doodle and real images label classes do not match'

        # process classes
        label_idx = {}
        for key in self.doodle_dict.keys():
            if key not in label_idx:
                label_idx[key] = len(label_idx)
        self.label_idx = label_idx

        # parse data and labels
        self.doodle_data, self.doodle_label = self._return_x_y_pairs(self.doodle_dict, label_idx)
        self.real_data, self.real_label = self._return_x_y_pairs(self.real_dict, label_idx)

        # data preprocessing
        self.doodle_preprocess = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize(doodle_size),
            transforms.ToTensor(),
            transforms.Normalize((self.doodle_data/255).mean(), (self.doodle_data/255).std())   # IMPORTANT / 255
        ])

        self.real_preprocess = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize(real_size),
            transforms.ToTensor(),
            transforms.Normalize((self.real_data/255).mean(axis=(0, 1, 2)), (self.real_data/255).std(axis=(0, 1, 2)))
        ])

        print(f'Train = {train}. Doodle list: {doodles_list}, \n real list: {real_list}. \n classes: {label_idx.keys()} \n'
              f'Doodle data size {len(self.doodle_data)}, real data size {len(self.real_data)}, '
              f'ratio {len(self.doodle_data)/len(self.real_data)}')

    def _return_x_y_pairs(self, data_dict, category_mapping):
        xs, ys = [], []
        for key in data_dict.keys():
            data = data_dict[key]
            labels = [category_mapping[key]] * len(data)
            xs.append(data)
            ys.extend(labels)
        return np.concatenate(xs, axis=0), np.array(ys)

    def __getitem__(self, idx):
        # naive sampling scheme - sample with replacement
        # sample label first so that doodle and real data belong to the same category
        label = random.choice(list(self.label_idx.keys()))
        doodle_data = self.doodle_preprocess(random.choice(self.doodle_dict[label]))
        real_data = self.real_preprocess(random.choice(self.real_dict[label]))
        numer_label = self.label_idx[label]
        return doodle_data, numer_label, real_data, numer_label

    def __len__(self):
        return max(len(self.doodle_data), len(self.real_data)) # could be arbitrary number

In [4]:
class V2ConvNet(nn.Module):
    def __init__(self, in_c, 
                 num_classes, 
                 channel_list=[64, 128, 192, 256, 512], 
                 pool_option=(1,1), 
                 hidden=256, 
                 dropout=0.2, 
                 add_layers=False):
        super().__init__()
        
        layer1 = nn.Conv2d(in_c, channel_list[0], kernel_size=3)
        layer2 = nn.Conv2d(channel_list[0], channel_list[0], kernel_size=3)
        layers = [layer1, layer2]
        
        for i in range(1, len(channel_list)):
            layers.append(
                nn.Conv2d(channel_list[i-1], channel_list[i], kernel_size=3, stride=2, padding=1, bias=True)
            )
            layers.append(
                nn.Conv2d(channel_list[i], channel_list[i], kernel_size=3, stride=2, padding=1, bias=True)
            )
            layers.append(
                nn.BatchNorm2d(channel_list[i])
            )
            layers.append(
                nn.Dropout(dropout)
            )
            layers.append(nn.ReLU())
            
        self.conv = nn.Sequential(*layers)
        
        self.flatten = nn.AdaptiveAvgPool2d(pool_option)
            
        self.fc = nn.Sequential(*[
            nn.Linear(pool_option[0] * pool_option[1] * channel_list[-1], hidden),
            nn.Linear(hidden, num_classes)
        ])

    def forward(self, x, return_feats=False):
        feats = self.conv(x)
        x = x.view(x.size(0), 512, -1).mean(2)
        x = self.fc(x)

        if return_feats:
            return x, feats

        return x

In [5]:
from torchinfo import summary

print (summary(V2ConvNet(3, 9, [32, 128, 512])))

Layer (type:depth-idx)                   Param #
V2ConvNet                                --
├─Sequential: 1-1                        --
│    └─Conv2d: 2-1                       896
│    └─Conv2d: 2-2                       9,248
│    └─Conv2d: 2-3                       36,992
│    └─Conv2d: 2-4                       147,584
│    └─BatchNorm2d: 2-5                  256
│    └─Dropout: 2-6                      --
│    └─ReLU: 2-7                         --
│    └─Conv2d: 2-8                       590,336
│    └─Conv2d: 2-9                       2,359,808
│    └─BatchNorm2d: 2-10                 1,024
│    └─Dropout: 2-11                     --
│    └─ReLU: 2-12                        --
├─AdaptiveAvgPool2d: 1-2                 --
├─Sequential: 1-3                        --
│    └─Linear: 2-13                      131,328
│    └─Linear: 2-14                      2,313
Total params: 3,279,785
Trainable params: 3,279,785
Non-trainable params: 0


In [6]:
x = torch.rand(100, 3, 64, 64)
net = V2ConvNet(3, 9, [128, 256, 512])
y = net(x)
print (y.shape)

torch.Size([100, 9])


In [7]:
fix_seed(0)  # zero seed by default
os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1"

In [29]:
def train(model1, model2, train_loader, val_loader, tqdm_on, num_epochs, batch_size, learning_rate, c1, c2, t):
    # cuda side setup
    model1 = nn.DataParallel(model1).cuda()
    model2 = nn.DataParallel(model2).cuda()

    # training side
    optimizer = torch.optim.AdamW(params=list(model1.parameters()) + list(model2.parameters()),
                                  lr=learning_rate, weight_decay=3e-4)
    criterion = nn.CrossEntropyLoss()
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)

    # training loop
    for epoch in range(num_epochs):
        loss1_model1 = AverageMeter()
        loss1_model2 = AverageMeter()
        loss2_model1 = AverageMeter()
        loss2_model2 = AverageMeter()
        loss3_combined = AverageMeter()
        acc_model1 = AverageMeter()
        acc_model2 = AverageMeter()

        model1.train()
        model2.train()
        pg = tqdm(train_loader, leave=False, total=len(train_loader), disable=not tqdm_on)
        total_loss = 0
        
        for i, (x1, y1, x2, y2) in enumerate(pg):
            # doodle, label, real, label
            x1, y1, x2, y2 = x1.cuda(), y1.cuda(), x2.cuda(), y2.cuda()

            # train model1 (doodle)
            pred1, feats1 = model1(x1, return_feats=True)
            loss_1 = criterion(pred1, y1)  # classification loss
            loss_2 = compute_contrastive_loss_from_feats(feats1, y1, t)
            loss1_model1.update(loss_1.item())
            loss2_model1.update(loss_2.item())
            loss_model1 = loss_1 + c1 * loss_2

            # train model2 (real)
            pred2, feats2 = model2(x2, return_feats=True)
            loss_1 = criterion(pred2, y2)  # classification loss
            loss_2 = compute_contrastive_loss_from_feats(feats2, y2, t)
            loss1_model2.update(loss_1.item())
            loss2_model2.update(loss_2.item())
            loss_model2 = loss_1 + c1 * loss_2

            # the third loss
            combined_feat = feats1 * feats2
            loss_3 = compute_contrastive_loss_from_feats(combined_feat, y1, t)
            loss3_combined.update(loss_3.item())

            loss = loss_model1 + loss_model2 + c2 * loss_3
            total_loss += loss.item()

            # statistics
            acc_model1.update(compute_accuracy(pred1, y1))
            acc_model2.update(compute_accuracy(pred2, y2))

            # optimization
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            # display
            pg.set_postfix({
                'acc 1': '{:.6f}'.format(acc_model1.avg),
                'acc 2': '{:.6f}'.format(acc_model2.avg),
                'l1m1': '{:.6f}'.format(loss1_model1.avg),
                'l2m1': '{:.6f}'.format(loss2_model1.avg),
                'l1m2': '{:.6f}'.format(loss1_model2.avg),
                'l2m2': '{:.6f}'.format(loss2_model2.avg),
                'train epoch': '{:03d}'.format(epoch)
            })

        print(
            f'train epoch {epoch}, acc 1={acc_model1.avg:.3f}, acc 2={acc_model2.avg:.3f}, l1m1={loss1_model1.avg:.3f},'
            f'l1m2={loss1_model2.avg:.3f}, l2m1={loss2_model1.avg:.3f}, l2m2={loss2_model2.avg:.3f}, '
            f'l3={loss3_combined.avg:.3f}')
        
        wandb.log({
            'train acc 1': float(acc_model1.avg),
            'train acc 2': float(acc_model2.avg),
            'train epoch': epoch,
            'l1m1': float(loss1_model1.avg),
            'l2m1': float(loss2_model1.avg),
            'l1m2': float(loss1_model2.avg),
            'l2m2': float(loss2_model2.avg),
            'total_loss': float(total_loss / base_bs)
        })

        # validation
        model1.eval(), model1.eval()
        acc_model1.reset(), acc_model2.reset()
        pg = tqdm(val_loader, leave=False, total=len(val_loader), disable=not tqdm_on)
        with torch.no_grad():
            for i, (x1, y1, x2, y2) in enumerate(pg):
                pred1, feats1 = model1(x1, return_feats=True)
                pred2, feats2 = model2(x2, return_feats=True)
                acc_model1.update(compute_accuracy(pred1, y1))
                acc_model2.update(compute_accuracy(pred2, y2))

                # display
                pg.set_postfix({
                    'acc 1': '{:.6f}'.format(acc_model1.avg),
                    'acc 2': '{:.6f}'.format(acc_model2.avg),
                    'val epoch': '{:03d}'.format(epoch)
                })

        print(f'validation epoch {epoch}, acc 1 (doodle) = {acc_model1.avg:.3f}, acc 2 (real) = {acc_model2.avg:.3f}')
        
        wandb.log({
            'val epoch': epoch,
            'val acc 1': float(acc_model1.avg),
            'val acc 2': float(acc_model2.avg),
        })

        scheduler.step()

    print(f'training finished')

    # save checkpoint
    # exp_dir = f'exp_data/{id}'
    # save_model(exp_dir, f'{id}_model1.pt', model1)
    # save_model(exp_dir, f'{id}_model2.pt', model2)

Error in callback <function _WandbInit._resume_backend at 0x7fc5df385ef0> (for pre_run_cell):


Exception: The wandb backend process has shutdown

Error in callback <function _WandbInit._pause_backend at 0x7fc5df385950> (for post_run_cell):


Exception: The wandb backend process has shutdown

In [21]:
sweep_config = {
    'method': 'bayes'
    }

metric = {
    'name': 'total_loss',
    'goal': 'minimize'   
    }

sweep_config['metric'] = metric

In [22]:
parameters_dict = {
    'channel_combos': {
        'values': [
                [64, 128, 192, 256, 512],
                [64, 128, 256, 512],
                [128, 256, 512],
                [64, 256, 512],
                [32, 64, 192, 256],
                [32, 128, 512]
            ]
        },
    'hidden_dim': {
        "values": [64, 128, 256, 512]
    },
    'dropout': {
          'values': [0.3, 0.4, 0.5]
        }
    }

sweep_config['parameters'] = parameters_dict

In [23]:
parameters_dict.update({
    'epochs': {
        'value': 20
    }
})

In [24]:
parameters_dict.update({
    'learning_rate': {
        # a flat distribution between 0 and 0.1
        'distribution': 'uniform',
        'min': 0,
        'max': 0.05
    }
})

In [25]:
def build_model(doodle_channels, real_channels, hidden, dropout):
    doodle_model = V2ConvNet(1, 9, channel_list=doodle_channels, hidden=hidden)
    real_model = V2ConvNet(3, 9, channel_list=real_channels, hidden=hidden)
    
    return doodle_model, real_model

In [30]:
def build_dataset(batch_size):
    train_set = ImageDataset(doodles, reals, doodle_size, real_size, train=True)
    val_set = ImageDataset(doodles, reals, doodle_size, real_size, train=False)
    
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, drop_last=True)
    val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, drop_last=True)
    
    return train_loader, val_loader

Error in callback <function _WandbInit._resume_backend at 0x7fc5df385ef0> (for pre_run_cell):


Exception: The wandb backend process has shutdown

Error in callback <function _WandbInit._pause_backend at 0x7fc5df385950> (for post_run_cell):


Exception: The wandb backend process has shutdown

In [26]:
def train_model(config=None):
    tqdm_on = True  # progress bar
    num_epochs, base_bs, base_lr = 20, 512, 2e-2
    c1, c2, t = 0, 0, 0.1 # contrastive learning. if you want vanilla (cross-entropy) training, set c1 and c2 to 0.
    
    with wandb.init(config=config):
        config = wandb.config
        
        doodle_model, real_model = build_model(
                                        config.channel_combos,
                                        config.channel_combos,
                                        config.hidden_dim,
                                        config.dropout
                                    )
        
        train_loader, val_loader = build_dataset(base_bs)

        train(
            doodle_model, 
            real_model, 
            train_loader, 
            val_loader
            tqdm_on, 
            num_epochs, 
            base_bs, 
            config.learning_rate, 
            c1, c2, t
        )

In [27]:
sweep_id = wandb.sweep(sweep_config, project="cs4243-project", entity="rish-16")

Create sweep with ID: nmt99lp3
Sweep URL: https://wandb.ai/rish-16/cs4243-project/sweeps/nmt99lp3


In [28]:
wandb.agent(sweep_id, train_model, count=20)

[34m[1mwandb[0m: Agent Starting Run: 41l7oovl with config:
[34m[1mwandb[0m: 	channel_combos: [32, 64, 192, 256]
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_dim: 512
[34m[1mwandb[0m: 	learning_rate: 0.03246289196567496


Train = True. Doodle list: ['sketchy_doodle', 'tuberlin', 'google_doodles'], 
 real list: ['sketchy_real', 'google_real', 'cifar']. 
 classes: dict_keys(['airplane', 'car', 'cat', 'dog', 'frog', 'horse', 'truck', 'bird', 'ship']) 
Doodle data size 7022, real data size 46364, ratio 0.15145371408851696
Train = False. Doodle list: ['sketchy_doodle', 'tuberlin', 'google_doodles'], 
 real list: ['sketchy_real', 'google_real', 'cifar']. 
 classes: dict_keys(['airplane', 'car', 'cat', 'dog', 'frog', 'horse', 'truck', 'bird', 'ship']) 
Doodle data size 1764, real data size 9341, ratio 0.18884487742211756


                                                                                                                                                             

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: [32m[41mERROR[0m Run 41l7oovl errored: RuntimeError('Caught RuntimeError in replica 0 on device 0.\nOriginal Traceback (most recent call last):\n  File "/home/rishabh/miniconda3/envs/Rish4243/lib/python3.7/site-packages/torch/nn/parallel/parallel_apply.py", line 61, in _worker\n    output = module(*input, **kwargs)\n  File "/home/rishabh/miniconda3/envs/Rish4243/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl\n    return forward_call(*input, **kwargs)\n  File "/tmp/ipykernel_2545573/3997546909.py", line 42, in forward\n    x = self.fc(x)\n  File "/home/rishabh/miniconda3/envs/Rish4243/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl\n    return forward_call(*input, **kwargs)\n  File "/home/rishabh/miniconda3/envs/Rish4243/lib/python3.7/site-packages/torch/nn/modules/container.py", line 141, in forward\n    input = module(input)\n  File "/home/rishabh/miniconda3/envs/Rish4243/lib/python3.7/site-pac