In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
! pip install --upgrade --force-reinstall --no-deps kaggle
! mkdir ~/.kaggle
! cp /content/drive/MyDrive/kaggle.json ~/.kaggle

Collecting kaggle
  Downloading kaggle-1.5.12.tar.gz (58 kB)
[?25l[K     |█████▋                          | 10 kB 32.9 MB/s eta 0:00:01[K     |███████████▏                    | 20 kB 8.7 MB/s eta 0:00:01[K     |████████████████▊               | 30 kB 7.7 MB/s eta 0:00:01[K     |██████████████████████▎         | 40 kB 3.6 MB/s eta 0:00:01[K     |███████████████████████████▉    | 51 kB 4.0 MB/s eta 0:00:01[K     |████████████████████████████████| 58 kB 3.0 MB/s 
[?25hBuilding wheels for collected packages: kaggle
  Building wheel for kaggle (setup.py) ... [?25l[?25hdone
  Created wheel for kaggle: filename=kaggle-1.5.12-py3-none-any.whl size=73051 sha256=3eb02c5fe2f957b7d47ed04d33c989ffdddbe1f52f0832aa25c76974af975c0c
  Stored in directory: /root/.cache/pip/wheels/62/d6/58/5853130f941e75b2177d281eb7e44b4a98ed46dd155f556dc5
Successfully built kaggle
Installing collected packages: kaggle
  Attempting uninstall: kaggle
    Found existing installation: kaggle 1.5.12
    Unins

In [3]:
!kaggle competitions download -c 11-785-s22-hw2p2-classification
!kaggle competitions download -c 11-785-s22-hw2p2-verification

!unzip -q 11-785-s22-hw2p2-classification.zip
!unzip -q 11-785-s22-hw2p2-verification.zip

!ls

Downloading 11-785-s22-hw2p2-classification.zip to /content
100% 2.34G/2.35G [01:09<00:00, 56.4MB/s]
100% 2.35G/2.35G [01:09<00:00, 36.4MB/s]
Downloading 11-785-s22-hw2p2-verification.zip to /content
 94% 246M/263M [00:10<00:00, 24.7MB/s]
100% 263M/263M [00:10<00:00, 26.6MB/s]
11-785-s22-hw2p2-classification.zip   sample_data
11-785-s22-hw2p2-verification.zip     train_subset
classification			      verification
classification_sample_submission.csv  verification_sample_submission.csv
drive


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import torchvision
import torchvision.transforms as ttf

import os
import os.path as osp

from tqdm import tqdm
from PIL import Image
from sklearn.metrics import roc_auc_score
import numpy as np

In [10]:
class NextBlock(nn.Module):

    def __init__(self, in_channels, expansion_ratio = 4):

        super().__init__()
        
        hidden_dim = in_channels * expansion_ratio

        self.depthWise = nn.Sequential (
            nn.Conv2d(in_channels, in_channels, kernel_size = 7, padding = 3, bias = False, groups = in_channels),
            nn.BatchNorm2d(in_channels)
        )

        self.pointwise = nn.Sequential(
            nn.Conv2d(in_channels, hidden_dim, kernel_size = 1, padding = 0, bias = False),
            nn.BatchNorm2d(hidden_dim),
            nn.GELU()
        )

        self.bottleneck = nn.Sequential(
            nn.Conv2d(hidden_dim, in_channels, kernel_size = 1, padding = 0, bias = False),
            nn.BatchNorm2d(in_channels)
        )

        
    def forward(self, x):
        out = self.depthWise(x)
        out = self.pointwise(out)
        out = self.bottleneck(out)
        return x + out

In [11]:
class Downsample(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.downsample = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size = 2, stride = 2)
        )

    def forward(self, x):
        out = self.downsample(x)
        return out 

In [12]:
class ConvNext(nn.Module):
    
    def __init__(self, classes = 7000):
        super().__init__()

        self.classes = classes
        self.stem = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size = 4, padding = 0, bias = False, stride = 4),
            nn.BatchNorm2d(96)
        )

        self.stages = [
            [96, 4, 3],
            [192, 4, 3],
            [384, 4, 9],
            [768, 4, 3],
        ]

        layers = self.make_layers()
        self.layers = nn.Sequential(*layers)

        final_channels = 768

        self.final_block = nn.Sequential(
            nn.Conv2d(final_channels, final_channels, kernel_size=1, padding=0, stride=1, bias=False),
            nn.BatchNorm2d(final_channels),
            nn.GELU()
        )

        self.cls_layer = nn.Sequential(
            nn.AdaptiveAvgPool2d((1,1)),
            nn.Flatten(),
            nn.Dropout(0.1),
            nn.Linear(final_channels, classes)
        )

    def make_layers(self):
        layers = []

        for idx, curr_stage in enumerate(self.stages):

            in_channels, exp_ratio, num_blocks = curr_stage
            for __ in range(num_blocks):
                layers.append(NextBlock(
                    in_channels = in_channels,
                    expansion_ratio=exp_ratio))
 
            if(idx != len(self.stages)-1):
                out_channels = self.stages[idx+1][0]
                layers.append(Downsample(in_channels = in_channels, out_channels = out_channels))
            
            
        return layers


    def forward(self, x, return_feats = False):
        out = self.stem(x)
        out = self.layers(out)
        out = self.final_block(out)
        if return_feats:
            return out

        out = self.cls_layer(out)
        return out

In [None]:
# from torchsummary import summary
# model = ConvNext(7000).cuda()
# summary(model, (3, 224, 224))
# del model
# model = torch.load('/content/drive/MyDrive/HW2P2/models-conv/conv5')

In [7]:
batch_size = 128
lr = 0.1
epochs = 55

DATA_DIR = "/content"
TRAIN_DIR = osp.join(DATA_DIR, "classification/classification/train")
VAL_DIR = osp.join(DATA_DIR, "classification/classification/dev")
TEST_DIR = osp.join(DATA_DIR, "classification/classification/test")


train_transforms = ttf.Compose([ttf.RandAugment(),
                    ttf.RandomHorizontalFlip(),
                    ttf.ColorJitter(),
                    ttf.RandomRotation(15),
                    ttf.RandomPerspective(0.3, 0.4),
                    ttf.ToTensor(),
                    ttf.RandomErasing(0.25),
                    ttf.Normalize((0.51301944, 0.40335497, 0.35214797), (0.30744416, 0.2702129 , 0.25891313)), 
                    ])

val_transforms = ttf.Compose([ttf.ToTensor(),
                  ttf.Normalize((0.51301944, 0.40335497, 0.35214797), (0.30744416, 0.2702129 , 0.25891313))])

val_dataset = torchvision.datasets.ImageFolder(VAL_DIR,
                                               transform = val_transforms)
train_dataset = torchvision.datasets.ImageFolder(TRAIN_DIR,
                                                 transform = train_transforms)
train_loader = DataLoader(train_dataset, batch_size=batch_size,
                          shuffle=True, drop_last=True, num_workers=2)

val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False,
                        drop_last=True, num_workers=1)

In [None]:
num_trainable_parameters = 0
for p in model.parameters():
    num_trainable_parameters += p.numel()
print("Number of Params: {}".format(num_trainable_parameters))

criterion = torch.nn.CrossEntropyLoss(label_smoothing = 0.1)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4, nesterov = True)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones = [2,5,9,13,16,19,20,22,25,27,29] ,gamma = 0.5)

scaler = torch.cuda.amp.GradScaler()

Number of Params: 33811384


In [None]:
for epoch in range(1,epochs+1):
    # Quality of life tip: leave=False and position=0 are needed to make tqdm usable in jupyter
    batch_bar = tqdm(total=len(train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train') 
    model.train()
    num_correct = 0
    total_loss = 0

    for i, (x, y) in enumerate(train_loader):
        optimizer.zero_grad()

        x = x.cuda()
        y = y.cuda()

        with torch.cuda.amp.autocast():     
            outputs = model(x)
            loss = criterion(outputs, y)

        num_correct += int((torch.argmax(outputs, axis=1) == y).sum())
        total_loss += float(loss)

        batch_bar.set_postfix(
            acc="{:.04f}%".format(100 * num_correct / ((i + 1) * batch_size)),
            loss="{:.04f}".format(float(total_loss / (i + 1))),
            num_correct=num_correct,
            lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
        
        
        scaler.scale(loss).backward() 
        scaler.step(optimizer) 
        scaler.update() 

        batch_bar.update()
    batch_bar.close() 
    
    print("Epoch {}/{}: Train Acc {:.04f}%, Train Loss {:.04f}, Learning Rate {:.04f}".format(
        epoch,
        epochs,
        100 * num_correct / (len(train_loader) * batch_size),
        float(total_loss / len(train_loader)),
        float(optimizer.param_groups[0]['lr'])))

    model.eval()
    batch_bar = tqdm(total=len(val_loader), dynamic_ncols=True, position=0, leave=False, desc='Val')
    num_correct = 0
    for i, (x, y) in enumerate(val_loader):

        x = x.cuda()
        y = y.cuda()

        with torch.no_grad():
            outputs = model(x)

        num_correct += int((torch.argmax(outputs, axis=1) == y).sum())
        batch_bar.set_postfix(acc="{:.04f}%".format(100 * num_correct / ((i + 1) * batch_size)))

        batch_bar.update()
        
    batch_bar.close()
    print("Validation: {:.04f}%".format(100 * num_correct / len(val_dataset)))
    ss = '/content/drive/MyDrive/HW2P2/models-conv/conv'+str(epoch)
    torch.save(model, ss)
    
    scheduler.step(100 * num_correct / len(val_dataset))



Epoch 1/30: Train Acc 96.4283%, Train Loss 1.6322, Learning Rate 0.0000




Validation: 85.3114%




Epoch 2/30: Train Acc 96.6284%, Train Loss 1.6273, Learning Rate 0.0000




Validation: 85.2743%




Epoch 3/30: Train Acc 96.5026%, Train Loss 1.6306, Learning Rate 0.0000




Validation: 85.3457%




Epoch 4/30: Train Acc 96.5312%, Train Loss 1.6295, Learning Rate 0.0000




Validation: 85.3314%


Train:   5%|▍         | 54/1093 [00:57<18:01,  1.04s/it, acc=96.4915%, loss=1.6268, lr=0.0000, num_correct=6793]

KeyboardInterrupt: ignored

In [5]:
class VerificationDataset(Dataset):
    def __init__(self, data_dir, transforms):
        self.data_dir = data_dir
        self.transforms = transforms

        # This one-liner basically generates a sorted list of full paths to each image in data_dir
        self.img_paths = list(map(lambda fname: osp.join(self.data_dir, fname), sorted(os.listdir(self.data_dir))))

    def __len__(self):
        return len(self.img_paths)
    
    def __getitem__(self, idx):
        # We return the image, as well as the path to that image (relative path)
        return self.transforms(Image.open(self.img_paths[idx])), osp.relpath(self.img_paths[idx], self.data_dir)

In [8]:
val_veri_dataset = VerificationDataset(osp.join(DATA_DIR, "verification/verification/dev"), val_transforms)
val_ver_loader = torch.utils.data.DataLoader(val_veri_dataset, batch_size=batch_size, 
                                             shuffle=False, num_workers=1)

In [13]:
model = torch.load('/content/drive/MyDrive/HW2P2/models-conv/conv5')

In [14]:
model.eval()
activation = nn.GELU()
feats_dict = dict()
for batch_idx, (imgs, path_names) in tqdm(enumerate(val_ver_loader), total=len(val_ver_loader), position=0, leave=False):
    imgs = imgs.cuda()
    with torch.no_grad():
        # Note that we return the feats here, not the final outputs
        # Feel free to try the final outputs too!
        feats = model(imgs, return_feats = False)
        out = activation(feats)
        for img_name, embedding in zip(path_names, out):
            feats_dict['dev/'+img_name] = embedding
    # TODO: Now we have features and the image path names. What to do with them?
    # Hint: use the feats_dict somehow.



In [15]:
# We use cosine similarity between feature embeddings.
# TODO: Find the relevant function in pytorch and read its documentation.
similarity_metric = torch.nn.CosineSimilarity(dim=0)

val_veri_csv = osp.join(DATA_DIR, "verification/verification/verification_dev.csv")


# Now, loop through the csv and compare each pair, getting the similarity between them
pred_similarities = []
gt_similarities = []
for line in tqdm(open(val_veri_csv).read().splitlines()[1:], position=0, leave=False): # skip header
    img_path1, img_path2, gt = line.split(",")

    # TODO: Use the similarity metric
    # How to use these img_paths? What to do with the features?
    similarity = similarity_metric(feats_dict[img_path1],feats_dict[img_path2])
    # print(similarity)
    pred_similarities.append(similarity.cpu().numpy())
    gt_similarities.append(int(gt))

pred_similarities = np.array(pred_similarities)
gt_similarities = np.array(gt_similarities)
# pred_similarities_avg = np.mean(pred_similarities, axis = (1,2))



In [17]:
print("AUC:", roc_auc_score(gt_similarities, pred_similarities))

AUC: 0.9685092547024112


In [None]:
test_veri_dataset = VerificationDataset(osp.join(DATA_DIR, "verification/verification/test"), val_transforms)
test_ver_loader = torch.utils.data.DataLoader(test_veri_dataset, batch_size=batch_size, 
                                              shuffle=False, num_workers=1)

In [None]:
model.eval()

feats_dict = dict()
activation = nn.GELU()
for batch_idx, (imgs, path_names) in tqdm(enumerate(test_ver_loader), total=len(test_ver_loader), position=0, leave=False):
    imgs = imgs.cuda()

    with torch.no_grad():
        # Note that we return the feats here, not the final outputs
        # Feel free to try to final outputs too!
        feats = model(imgs, return_feats = False) 
        out = activation(feats)
        for img_name, embedding in zip(path_names, out):
            feats_dict['test/'+img_name] = embedding
    
    # TODO: Now we have features and the image path names. What to do with them?
    # Hint: use the feats_dict somehow.



In [None]:
# We use cosine similarity between feature embeddings.
# TODO: Find the relevant function in pytorch and read its documentation.
# similarity_metric = 
val_veri_csv = osp.join(DATA_DIR, "verification/verification/verification_test.csv")


# Now, loop through the csv and compare each pair, getting the similarity between them
pred_similarities = []
for line in tqdm(open(val_veri_csv).read().splitlines()[1:], position=0, leave=False): # skip header
    img_path1, img_path2 = line.split(",")

    similarity = similarity_metric(feats_dict[img_path1],feats_dict[img_path2])
    pred_similarities.append(similarity.cpu().numpy())
    
pred_similarities = np.array(pred_similarities)



In [None]:
with open("verification_early_submission.csv", "w+") as f:
    f.write("id,match\n")
    for i in range(len(pred_similarities)):
        f.write("{},{}\n".format(i, pred_similarities[i]))

In [None]:
! kaggle competitions submit -c 11-785-s22-hw2p2-verification -f verification_early_submission.csv -m "NA"

100% 16.9M/16.9M [00:00<00:00, 45.9MB/s]
Successfully submitted to Face Verification