In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
! pip install --upgrade --force-reinstall --no-deps kaggle
! mkdir ~/.kaggle
! cp /content/drive/MyDrive/kaggle.json ~/.kaggle

Collecting kaggle
  Downloading kaggle-1.5.12.tar.gz (58 kB)
[?25l[K     |█████▋                          | 10 kB 32.2 MB/s eta 0:00:01[K     |███████████▏                    | 20 kB 9.0 MB/s eta 0:00:01[K     |████████████████▊               | 30 kB 7.9 MB/s eta 0:00:01[K     |██████████████████████▎         | 40 kB 3.6 MB/s eta 0:00:01[K     |███████████████████████████▉    | 51 kB 4.0 MB/s eta 0:00:01[K     |████████████████████████████████| 58 kB 2.9 MB/s 
[?25hBuilding wheels for collected packages: kaggle
  Building wheel for kaggle (setup.py) ... [?25l[?25hdone
  Created wheel for kaggle: filename=kaggle-1.5.12-py3-none-any.whl size=73051 sha256=ccd676dbf6923f13e4534039220f50dc73217afca1cbc0d3b0d7a7a6ab4a0d83
  Stored in directory: /root/.cache/pip/wheels/62/d6/58/5853130f941e75b2177d281eb7e44b4a98ed46dd155f556dc5
Successfully built kaggle
Installing collected packages: kaggle
  Attempting uninstall: kaggle
    Found existing installation: kaggle 1.5.12
    Unins

In [None]:
!kaggle competitions download -c 11-785-s22-hw2p2-classification
!kaggle competitions download -c 11-785-s22-hw2p2-verification

!unzip -q 11-785-s22-hw2p2-classification.zip
!unzip -q 11-785-s22-hw2p2-verification.zip

!ls

Downloading 11-785-s22-hw2p2-classification.zip to /content
100% 2.35G/2.35G [00:36<00:00, 93.0MB/s]
100% 2.35G/2.35G [00:36<00:00, 70.1MB/s]
Downloading 11-785-s22-hw2p2-verification.zip to /content
 92% 242M/263M [00:07<00:00, 39.0MB/s]
100% 263M/263M [00:07<00:00, 36.1MB/s]
11-785-s22-hw2p2-classification.zip   sample_data
11-785-s22-hw2p2-verification.zip     train_subset
classification			      verification
classification_sample_submission.csv  verification_sample_submission.csv
drive


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import torchvision
import torchvision.transforms as ttf

import os
import os.path as osp

from tqdm import tqdm
from PIL import Image
from sklearn.metrics import roc_auc_score
import numpy as np

In [None]:
class ResBlock(nn.Module):

    def __init__(self, channels):

        super().__init__()
        
        self.pointwise = nn.Sequential(
            nn.Conv2d(channels, channels, kernel_size = 3, padding = 1, bias = False),
            nn.BatchNorm2d(channels),
            nn.ReLU(),
            nn.Conv2d(channels, channels, kernel_size = 3, padding = 1, bias = False),
            nn.BatchNorm2d(channels),
            nn.ReLU()
        )

    def forward(self, x):
        out = self.pointwise(x)
        out = x + out
        return out

In [None]:
class Downsample(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.downsample = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size = 2, stride = 2),
            nn.BatchNorm2d(out_channels)
        )

    def forward(self, x):
        out = self.downsample(x)
        return out 

In [None]:
class ResNet(nn.Module):
    
    def __init__(self, classes = 7000):
        super().__init__()

        self.classes = classes
        self.stem = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size = 7, padding = 3, bias = False, stride = 2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )

        self.stages = [
            [64, 3],
            [128, 4],
            [256, 6],
            [512, 3],
        ]

        layers = self.make_layers()
        self.layers = nn.Sequential(*layers)

        final_channels = 512

        self.cls_layer = nn.Sequential(
            nn.AdaptiveAvgPool2d((1,1)),
            nn.Flatten(),
            nn.Linear(final_channels, classes),
        )

    def make_layers(self):
        layers = []

        for idx, curr_stage in enumerate(self.stages):

            in_channels, num_blocks = curr_stage
            for __ in range(num_blocks):
                layers.append(ResBlock(channels = in_channels))
                
            if idx != (len(self.stages)-1):
                out_channels = self.stages[idx+1][0]
                layers.append(Downsample(in_channels = in_channels, out_channels = out_channels))

        return layers


    def forward(self, x, return_feats = False):
        out = self.stem(x)
        out = self.layers(out)
        if return_feats:
            return out

        out = self.cls_layer(out)
        return out

In [None]:
batch_size = 128
lr = 0.1
epochs = 60

DATA_DIR = "/content"
TRAIN_DIR = osp.join(DATA_DIR, "classification/classification/train")
VAL_DIR = osp.join(DATA_DIR, "classification/classification/dev")
TEST_DIR = osp.join(DATA_DIR, "classification/classification/test")


train_transforms = ttf.Compose([
                    ttf.RandomHorizontalFlip(),
                    ttf.ColorJitter(),
                    # ttf.RandomPerspective(0.3, 0.4),
                    ttf.ToTensor(),
                    ttf.Normalize((0.51301944, 0.40335497, 0.35214797), (0.30744416, 0.2702129 , 0.25891313)), 
                    ])

val_transforms = ttf.Compose([ttf.ToTensor(),
                  ttf.Normalize((0.51301944, 0.40335497, 0.35214797), (0.30744416, 0.2702129 , 0.25891313))])

val_dataset = torchvision.datasets.ImageFolder(VAL_DIR,
                                               transform = val_transforms)
train_dataset = torchvision.datasets.ImageFolder(TRAIN_DIR,
                                                 transform = train_transforms)
train_loader = DataLoader(train_dataset, batch_size=batch_size,
                          shuffle=True, drop_last=True, num_workers=2)

val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False,
                        drop_last=True, num_workers=1)

In [None]:
num_trainable_parameters = 0
for p in model.parameters():
    num_trainable_parameters += p.numel()
print("Number of Params: {}".format(num_trainable_parameters))

criterion = torch.nn.CrossEntropyLoss(label_smoothing = 0.25)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.95, weight_decay = 1.5e-4, nesterov = True)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(len(train_loader) * epochs))

scaler = torch.cuda.amp.GradScaler()

Number of Params: 26940952


In [None]:
for epoch in range(1,epochs+1):
    # Quality of life tip: leave=False and position=0 are needed to make tqdm usable in jupyter
    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train') 
    model.train()
    num_correct = 0
    total_loss = 0

    for i, (x, y) in enumerate(train_loader):
        optimizer.zero_grad()

        x = x.cuda()
        y = y.cuda()

        with torch.cuda.amp.autocast():     
            outputs = model(x)
            loss = criterion(outputs, y)

        num_correct += int((torch.argmax(outputs, axis=1) == y).sum())
        total_loss += float(loss)

        batch_bar.set_postfix(
            acc="{:.04f}%".format(100 * num_correct / ((i + 1) * batch_size)),
            loss="{:.04f}".format(float(total_loss / (i + 1))),
            num_correct=num_correct,
            lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
        
        
        scaler.scale(loss).backward() 
        scaler.step(optimizer) 
        scaler.update() 
        scheduler.step()
        batch_bar.update()

        
    batch_bar.close() 
    
    print("Epoch {}/{}: Train Acc {:.04f}%, Train Loss {:.04f}, Learning Rate {:.04f}".format(
        epoch,
        epochs,
        100 * num_correct / (len(train_loader) * batch_size),
        float(total_loss / len(train_loader)),
        float(optimizer.param_groups[0]['lr'])))

    if epoch % 5 == 0:
        model.eval()
        batch_bar = tqdm(total=len(val_loader), dynamic_ncols=True, position=0, leave=False, desc='Val')
        num_correct = 0
        for i, (x, y) in enumerate(val_loader):

            x = x.cuda()
            y = y.cuda()

            with torch.no_grad():
                outputs = model(x)

            num_correct += int((torch.argmax(outputs, axis=1) == y).sum())
            batch_bar.set_postfix(acc="{:.04f}%".format(100 * num_correct / ((i + 1) * batch_size)))

            batch_bar.update()
            
        batch_bar.close()
        print("Validation: {:.04f}%".format(100 * num_correct / len(val_dataset)))
    
        ss = '/content/drive/MyDrive/HW2P2/models-res-res/convii'+str(epoch)
        torch.save(model, ss)



Epoch 1/60: Train Acc 0.0465%, Train Loss 8.7470, Learning Rate 0.0999




Epoch 2/60: Train Acc 0.6353%, Train Loss 8.1413, Learning Rate 0.0997




Epoch 3/60: Train Acc 4.5523%, Train Loss 7.3326, Learning Rate 0.0994




Epoch 4/60: Train Acc 15.2544%, Train Loss 6.5016, Learning Rate 0.0989




Epoch 5/60: Train Acc 30.9874%, Train Loss 5.7678, Learning Rate 0.0983




Validation: 27.9657%




Epoch 6/60: Train Acc 46.7534%, Train Loss 5.1763, Learning Rate 0.0976




Epoch 7/60: Train Acc 59.3936%, Train Loss 4.7347, Learning Rate 0.0967




Epoch 8/60: Train Acc 68.2149%, Train Loss 4.4223, Learning Rate 0.0957




Epoch 9/60: Train Acc 74.5858%, Train Loss 4.1894, Learning Rate 0.0946




Epoch 10/60: Train Acc 79.3341%, Train Loss 4.0152, Learning Rate 0.0933




Validation: 60.3000%




Epoch 11/60: Train Acc 82.6172%, Train Loss 3.8844, Learning Rate 0.0919




Epoch 12/60: Train Acc 85.4925%, Train Loss 3.7749, Learning Rate 0.0905




Epoch 13/60: Train Acc 87.7161%, Train Loss 3.6840, Learning Rate 0.0889




Epoch 14/60: Train Acc 89.6220%, Train Loss 3.6088, Learning Rate 0.0872




Epoch 15/60: Train Acc 91.0192%, Train Loss 3.5478, Learning Rate 0.0854




Validation: 67.4629%




Epoch 16/60: Train Acc 92.5009%, Train Loss 3.4915, Learning Rate 0.0835




Epoch 17/60: Train Acc 93.5382%, Train Loss 3.4457, Learning Rate 0.0815




Epoch 18/60: Train Acc 94.6057%, Train Loss 3.4023, Learning Rate 0.0794




Epoch 19/60: Train Acc 95.4685%, Train Loss 3.3644, Learning Rate 0.0772




Epoch 20/60: Train Acc 96.3220%, Train Loss 3.3302, Learning Rate 0.0750




Validation: 73.7914%




Epoch 21/60: Train Acc 96.9065%, Train Loss 3.3021, Learning Rate 0.0727




Epoch 22/60: Train Acc 97.3436%, Train Loss 3.2784, Learning Rate 0.0703




Epoch 23/60: Train Acc 97.8094%, Train Loss 3.2539, Learning Rate 0.0679




Epoch 24/60: Train Acc 98.1406%, Train Loss 3.2315, Learning Rate 0.0655




Epoch 25/60: Train Acc 98.5334%, Train Loss 3.2121, Learning Rate 0.0629




Validation: 78.6086%




Epoch 26/60: Train Acc 98.7745%, Train Loss 3.1926, Learning Rate 0.0604




Epoch 27/60: Train Acc 98.9218%, Train Loss 3.1775, Learning Rate 0.0578




Epoch 28/60: Train Acc 99.1551%, Train Loss 3.1600, Learning Rate 0.0552




Epoch 29/60: Train Acc 99.3304%, Train Loss 3.1434, Learning Rate 0.0526




Epoch 30/60: Train Acc 99.4362%, Train Loss 3.1292, Learning Rate 0.0500




Validation: 79.9343%




Epoch 31/60: Train Acc 99.5686%, Train Loss 3.1135, Learning Rate 0.0474




Epoch 32/60: Train Acc 99.6294%, Train Loss 3.0998, Learning Rate 0.0448




Epoch 33/60: Train Acc 99.7067%, Train Loss 3.0863, Learning Rate 0.0422




Epoch 34/60: Train Acc 99.7918%, Train Loss 3.0708, Learning Rate 0.0396




Epoch 35/60: Train Acc 99.8505%, Train Loss 3.0577, Learning Rate 0.0371




Validation: 81.3857%




Epoch 36/60: Train Acc 99.8877%, Train Loss 3.0446, Learning Rate 0.0345




Epoch 37/60: Train Acc 99.9113%, Train Loss 3.0310, Learning Rate 0.0321




Epoch 38/60: Train Acc 99.9356%, Train Loss 3.0188, Learning Rate 0.0297




Epoch 39/60: Train Acc 99.9549%, Train Loss 3.0084, Learning Rate 0.0273




Epoch 40/60: Train Acc 99.9649%, Train Loss 2.9965, Learning Rate 0.0250




Validation: 84.3514%




Epoch 41/60: Train Acc 99.9800%, Train Loss 2.9850, Learning Rate 0.0228




Epoch 42/60: Train Acc 99.9921%, Train Loss 2.9752, Learning Rate 0.0206




Epoch 43/60: Train Acc 99.9907%, Train Loss 2.9652, Learning Rate 0.0185




Epoch 44/60: Train Acc 99.9893%, Train Loss 2.9564, Learning Rate 0.0165




Epoch 45/60: Train Acc 99.9943%, Train Loss 2.9481, Learning Rate 0.0146




Validation: 86.0229%




Epoch 46/60: Train Acc 99.9957%, Train Loss 2.9410, Learning Rate 0.0128




Epoch 47/60: Train Acc 99.9971%, Train Loss 2.9349, Learning Rate 0.0111




Epoch 48/60: Train Acc 99.9979%, Train Loss 2.9298, Learning Rate 0.0095




Epoch 49/60: Train Acc 99.9971%, Train Loss 2.9246, Learning Rate 0.0081




Epoch 50/60: Train Acc 99.9979%, Train Loss 2.9206, Learning Rate 0.0067




Validation: 86.9543%




Epoch 51/60: Train Acc 99.9986%, Train Loss 2.9170, Learning Rate 0.0054




Epoch 52/60: Train Acc 99.9993%, Train Loss 2.9140, Learning Rate 0.0043




Epoch 53/60: Train Acc 100.0000%, Train Loss 2.9116, Learning Rate 0.0033




Epoch 54/60: Train Acc 99.9993%, Train Loss 2.9097, Learning Rate 0.0024




Epoch 55/60: Train Acc 99.9993%, Train Loss 2.9079, Learning Rate 0.0017




Validation: 87.2857%




Epoch 56/60: Train Acc 99.9993%, Train Loss 2.9069, Learning Rate 0.0011




Epoch 57/60: Train Acc 100.0000%, Train Loss 2.9058, Learning Rate 0.0006




Epoch 58/60: Train Acc 100.0000%, Train Loss 2.9053, Learning Rate 0.0003




Epoch 59/60: Train Acc 100.0000%, Train Loss 2.9048, Learning Rate 0.0001




Epoch 60/60: Train Acc 100.0000%, Train Loss 2.9045, Learning Rate 0.0000




Validation: 87.2857%


In [None]:
class ClassificationTestSet(Dataset):

    def __init__(self, data_dir, transforms):
        self.data_dir = data_dir
        self.transforms = transforms

        self.img_paths = list(map(lambda fname: osp.join(self.data_dir, fname), sorted(os.listdir(self.data_dir))))

    def __len__(self):
        return len(self.img_paths)
    
    def __getitem__(self, idx):
        return self.transforms(Image.open(self.img_paths[idx]))

In [None]:
test_dataset = ClassificationTestSet(TEST_DIR, val_transforms)
test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False,
                         drop_last=False, num_workers=1)

In [None]:
with torch.no_grad():
    model.eval()
    batch_bar = tqdm(total=len(test_loader), dynamic_ncols=True, position=0, leave=False, desc='Test')

    res = np.array([])
    for i, (x) in enumerate(test_loader):
        input_image = x.cuda()
        output = model(input_image)
        prediction = torch.argmax(output, axis=1).to('cpu')
        res = np.append(res, prediction.numpy())
        batch_bar.update()
        
    batch_bar.close()



In [None]:
res = res.astype(int)
with open("classification_early_submission.csv", "w+") as f:
    f.write("id,label\n")
    for i in range(len(test_dataset)):
        f.write("{},{}\n".format(str(i).zfill(6) + ".jpg", res[i]))

In [None]:
! kaggle competitions submit -c 11-785-s22-hw2p2-classification -f classification_early_submission.csv -m "NA"

100% 541k/541k [00:05<00:00, 94.6kB/s]
Successfully submitted to Face Recognition

In [None]:
class VerificationDataset(Dataset):
    def __init__(self, data_dir, transforms):
        self.data_dir = data_dir
        self.transforms = transforms

        # This one-liner basically generates a sorted list of full paths to each image in data_dir
        self.img_paths = list(map(lambda fname: osp.join(self.data_dir, fname), sorted(os.listdir(self.data_dir))))

    def __len__(self):
        return len(self.img_paths)
    
    def __getitem__(self, idx):
        # We return the image, as well as the path to that image (relative path)
        return self.transforms(Image.open(self.img_paths[idx])), osp.relpath(self.img_paths[idx], self.data_dir)

In [None]:
val_transforms

Compose(
    ToTensor()
    Normalize(mean=(0.51301944, 0.40335497, 0.35214797), std=(0.30744416, 0.2702129, 0.25891313))
)

In [None]:
val_veri_dataset = VerificationDataset(osp.join(DATA_DIR, "verification/verification/dev"), val_transforms)
val_ver_loader = torch.utils.data.DataLoader(val_veri_dataset, batch_size=batch_size, 
                                             shuffle=False, num_workers=1)

In [None]:
model.eval()

feats_dict = dict()
for batch_idx, (imgs, path_names) in tqdm(enumerate(val_ver_loader), total=len(val_ver_loader), position=0, leave=False):
    imgs = imgs.cuda()
    with torch.no_grad():
        # Note that we return the feats here, not the final outputs
        # Feel free to try the final outputs too!
        feats = model(imgs, return_feats = True)
        for img_name, embedding in zip(path_names, feats):
            feats_dict['dev/'+img_name] = embedding
    # TODO: Now we have features and the image path names. What to do with them?
    # Hint: use the feats_dict somehow.



In [None]:
# We use cosine similarity between feature embeddings.
# TODO: Find the relevant function in pytorch and read its documentation.
similarity_metric = torch.nn.CosineSimilarity(dim=0)

val_veri_csv = osp.join(DATA_DIR, "verification/verification/verification_dev.csv")


# Now, loop through the csv and compare each pair, getting the similarity between them
pred_similarities = []
gt_similarities = []
for line in tqdm(open(val_veri_csv).read().splitlines()[1:], position=0, leave=False): # skip header
    img_path1, img_path2, gt = line.split(",")

    similarity = similarity_metric(feats_dict[img_path1],feats_dict[img_path2])
    pred_similarities.append(similarity.cpu().numpy())
    gt_similarities.append(int(gt))

pred_similarities = np.array(pred_similarities)
gt_similarities = np.array(gt_similarities)
# pred_similarities_avg = np.mean(pred_similarities, axis = (1,2))



In [None]:
pred_similarities

array([[[ 0.82377017,  0.5729863 ,  0.42320836, ...,  0.5230511 ,
          0.58104306,  0.8321384 ],
        [ 0.3145008 ,  0.27459118,  0.3023045 , ...,  0.24750963,
          0.13598429,  0.16847019],
        [ 0.2768411 ,  0.26482818,  0.35657415, ...,  0.3624979 ,
          0.22425668,  0.10787322],
        ...,
        [ 0.44831637,  0.43629894,  0.5518107 , ...,  0.52631074,
          0.39577594,  0.29823503],
        [ 0.57992506,  0.35480937,  0.46189806, ...,  0.43099782,
          0.34787092,  0.40202567],
        [ 0.87842506,  0.7209258 ,  0.6057954 , ...,  0.5370739 ,
          0.6628217 ,  0.8686696 ]],

       [[ 0.9188603 ,  0.60921067,  0.30519232, ...,  0.2804889 ,
          0.37876323,  0.65714866],
        [ 0.5792971 ,  0.06475962,  0.08167127, ...,  0.07043648,
          0.14538717,  0.20882432],
        [ 0.4572583 ,  0.0798768 ,  0.09816156, ...,  0.01824662,
          0.04520112,  0.08696   ],
        ...,
        [ 0.5709415 ,  0.13493557,  0.15733431, ...,  

In [None]:
pred_similarities.shape

(166800,)

In [None]:
pred_similarities_avg = np.mean(pred_similarities, axis = (1,2))
print("AUC:", roc_auc_score(gt_similarities, pred_similarities_avg))

AUC: 0.9045100957770504


In [None]:
test_veri_dataset = VerificationDataset(osp.join(DATA_DIR, "verification/verification/test"), val_transforms)
test_ver_loader = torch.utils.data.DataLoader(test_veri_dataset, batch_size=batch_size, 
                                              shuffle=False, num_workers=1)

In [None]:
model.eval()

feats_dict = dict()
for batch_idx, (imgs, path_names) in tqdm(enumerate(test_ver_loader), total=len(test_ver_loader), position=0, leave=False):
    imgs = imgs.cuda()

    with torch.no_grad():
        # Note that we return the feats here, not the final outputs
        # Feel free to try to final outputs too!
        feats = model(imgs) 
        for img_name, embedding in zip(path_names, feats):
            feats_dict['test/'+img_name] = embedding
    
    # TODO: Now we have features and the image path names. What to do with them?
    # Hint: use the feats_dict somehow.



In [None]:
# We use cosine similarity between feature embeddings.
# TODO: Find the relevant function in pytorch and read its documentation.
# similarity_metric = 
val_veri_csv = osp.join(DATA_DIR, "verification/verification/verification_test.csv")


# Now, loop through the csv and compare each pair, getting the similarity between them
pred_similarities = []
for line in tqdm(open(val_veri_csv).read().splitlines()[1:], position=0, leave=False): # skip header
    img_path1, img_path2 = line.split(",")

    similarity = similarity_metric(feats_dict[img_path1],feats_dict[img_path2])
    pred_similarities.append(similarity.cpu().numpy())
    
pred_similarities = np.array(pred_similarities)



In [None]:
with open("verification_early_submission.csv", "w+") as f:
    f.write("id,match\n")
    for i in range(len(pred_similarities)):
        f.write("{},{}\n".format(i, pred_similarities[i]))

In [None]:
! kaggle competitions submit -c 11-785-s22-hw2p2-verification -f verification_early_submission.csv -m "NA"

100% 16.9M/16.9M [00:00<00:00, 45.9MB/s]
Successfully submitted to Face Verification