TODO:
- Work on Data Augmentation
- Clean up Submission CSV code

## Mount Drive and Download Data

In [70]:
from IPython.display import clear_output 
! apt-get install -y -qq software-properties-common python-software-properties module-init-tools
! add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
! apt-get update -qq 2>&1 > /dev/null
! apt-get -y install -qq google-drive-ocamlfuse fuse

from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass

! google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
! echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}
% cd /content
! mkdir cmudrive
% cd ..
! google-drive-ocamlfuse /content/cmudrive
! pip install kaggle wandb torch-summary
! mkdir ~/.kaggle
! cp /content/cmudrive/IDL/kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! pip install --upgrade --force-reinstall --no-deps kaggle 
! kaggle config set -n path -v /content

! wandb login
! pip install --upgrade --force-reinstall --no-deps albumentations
clear_output()

In [71]:
! kaggle competitions download -c 11-785-s22-hw2p2-classification
! kaggle competitions download -c 11-785-s22-hw2p2-verification

! unzip -q /content/competitions/11-785-s22-hw2p2-classification/11-785-s22-hw2p2-classification.zip -d /content
! unzip -q /content/competitions/11-785-s22-hw2p2-verification/11-785-s22-hw2p2-verification.zip -d /content

clear_output()

## Dependencies

In [173]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import torchvision
import torchvision.transforms as ttf
import torchvision.models as models

import os
import os.path as osp

from tqdm import tqdm
from PIL import Image
from sklearn.metrics import roc_auc_score
import numpy as np
from torchsummary import summary

In [191]:
import yaml
import wandb
import torch
import torch.nn as nn
import torch.optim as optim
from torchsummary import summary
from torch.cuda.amp import GradScaler, autocast
from torch.optim.lr_scheduler import ExponentialLR, ReduceLROnPlateau
import albumentations as A
from albumentations.pytorch import ToTensorV2
# from pytorch_lightning.callbacks.early_stopping import EarlyStopping

# torch.autograd.set_detect_anomaly(False)
# torch.autograd.profiler.profile(False)
# torch.autograd.profiler.emit_nvtx(False)

ImportError: ignored

# Classification

## FaceNet

In [175]:
class FaceNet(nn.Module):
    """
    The Very Low early deadline architecture is a 4-layer CNN.
    The first Conv layer has 64 channels, kernel size 7, and stride 4.
    The next three have 128, 256, and 512 channels. Each have kernel size 3 and stride 2.
    Think about what the padding should be for each layer to not change spatial resolution.
    Each Conv layer is accompanied by a Batchnorm and ReLU layer.
    Finally, you want to average pool over the spatial dimensions to reduce them to 1 x 1.
    Then, remove (Flatten?) these trivial 1x1 dimensions away.
    Look through https://pytorch.org/docs/stable/nn.html 
    TODO: Fill out the model definition below! 

    Why does a very simple network have 4 convolutions?
    Input images are 224x224. Note that each of these convolutions downsample.
    Downsampling 2x effectively doubles the receptive field, increasing the spatial
    region each pixel extracts features from. Downsampling 32x is standard
    for most image models.

    Why does a very simple network have high channel sizes?
    Every time you downsample 2x, you do 4x less computation (at same channel size).
    To maintain the same level of computation, you 2x increase # of channels, which 
    increases computation by 4x. So, balances out to same computation.
    Another intuition is - as you downsample, you lose spatial information. Want
    to preserve some of it in the channel dimension.
    """
    def list_to_kwarg(self, inc, outc, kernel, s, p):
        params = dict()
        params["in_channels"] = inc
        params["out_channels"] = outc
        params["kernel_size"] = kernel
        params["stride"] = s
        params["padding"] = p
        return params

    def __init__(self, config):
        super().__init__()

        # Note that first conv is stride 4. It is (was?) standard to downsample.
        # 4x early on, as with 224x224 images, 4x4 patches are just low-level details.

        # Food for thoughts: 
        # ? Why is the first conv kernel size 7, not kernel size 3?
        # ? Use activation before or after Pooling?
        num_classes = 7000
        num_channels = 3
        layers = []
        if config['backbone'] == 'simple':
            for l_idx, l_params in config['arch'].items():
                conv_params = self.list_to_kwarg(*l_params["conv"])
                layers.append(nn.Conv2d(**conv_params))
                layers.append(nn.BatchNorm2d(conv_params["out_channels"]))
                layers.append(nn.ReLU())
                if l_params["pool"] is not None:
                    if l_params["pool"]["max"]:
                        layers.append(nn.AdaptiveMaxPool2d(l_params["pool"]["output"]))
                    else:
                        layers.append(nn.AdaptiveAvgPool2d(l_params["pool"]["output"]))
            layers.append(nn.Flatten())
            self.backbone = nn.Sequential(*layers)
            self.cls_layer = nn.Linear(512, num_classes)
        else:
            if config['backbone'] == 'resnet_18':
                resnet_net = models.resnet18(pretrained=True)
                oc = 512
            elif config['backbone'] == 'resnet_34':
                resnet_net = models.resnet34(pretrained=True)
                oc = 512

            layers = list(resnet_net.children())[:-2]
            layers.append(nn.AdaptiveAvgPool2d((1, 1)))
            layers.append(nn.Flatten())

            self.backbone = nn.Sequential(*layers)
            self.backbone.out_channels = oc
            self.cls_layer = nn.Linear(self.backbone.out_channels, num_classes)
    
    def forward(self, x, return_feats=False):
        """
        What is return_feats? It essentially returns the second-to-last-layer
        features of a given image. It's a "feature encoding" of the input image,
        and you can use it for the verification task. You would use the outputs
        of the final classification layer for the classification task.

        You might also find that the classification outputs are sometimes better
        for verification too - try both.
        """
        feats = self.backbone(x)
        out = self.cls_layer(feats)

        if return_feats:
            return feats
        else:
            return out

## FaceNet Training

In [194]:
from albumentations.augmentations.transforms import HorizontalFlip
class FaceNetSetup:
    def __init__(self, config, save_path):
        self.config = config
        self.log = config['log']

        if config['subset']:
            train_path = r"train_subset/train_subset"
        else:
            train_path = r"classification/classification/train"

        self.SAVE_DIR = save_path
        self.DATA_DIR = r"/content" 
        self.TRAIN_DIR = osp.join(self.DATA_DIR, train_path) 
        self.VAL_DIR = osp.join(self.DATA_DIR, r"classification/classification/dev")

    def __check_model_params(self):
        num_trainable_parameters = 0
        for p in self.model.parameters():
            num_trainable_parameters += p.numel()
        print("Number of Params: {}".format(num_trainable_parameters))
        assert num_trainable_parameters <= 35000000

    def __gen_model_name(self):
        save_name = ''
        if not self.config['subset']:
            save_name += "Full_"
        for key, val in self.config.items():
            abbr = key[0] if len(key) > 2 else key
            if isinstance(val, dict):
                data = 'lr' + str(val["lr"])
                save_name += data
                break
            else:
                data = abbr + str(val) + '_'
                save_name += data
        if self.config['randomize']:
            save_name = save_name + "-v" + str(np.random.randint(10, 1000))
        print("\nModel Name: ", save_name)

        return save_name

    def __save_model_params(self):
        # Create Model Directory
        save_path = os.path.join(self.SAVE_DIR, self.model_name)
        try:
            os.mkdir(save_path)
        except FileExistsError:
                d = input("Model name already exists. Delete existing model? (y/n)")
                if d == 'y':
                    import shutil
                    shutil.rmtree(save_path)
                    os.mkdir(save_path)
                else:
                    return None

        os.mkdir(os.path.join(save_path, 'Checkpoints'))
        # Saving Model Configuration
        with open(os.path.join(save_path, 'model_config.yaml'), 'w') as metadata:
            yaml.dump({'Experiment': self.config['']}, metadata, indent=4, default_flow_style=False)
            yaml.dump(self.config, metadata, indent=4, default_flow_style=False)
        print("Model saved at: ", save_path)
        return save_path

    def __dataloaders(self): 
        """
        Transforms (data augmentation) is quite important for this task.
        Go explore https://pytorch.org/vision/stable/transforms.html for more details
        """
        if self.config["transforms"] is not None:
            # transforms_train = A.Compose([
            #     A.Pad(25, padding_mode='symmetric'),
            #     A.HorizontalFlip(), 
            #     A.RandomRotation(10),
            #     A.OpticalDistortion(mean, std),
            #     A.RandomCrop(width=256, height=256),
            #     A.HorizontalFlip(p=0.5),
            #     A.RandomBrightnessContrast(p=0.2),
            # ])
            self.train_transform = A.Compose([
                    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.4),
                    A.RandomCrop(height=128, width=128),
                    A.HorizontalFlip(p=0.5),
                    A.ColorJitter (brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, p=0.4),
                    A.RandomBrightnessContrast(p=0.5),
                    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                    ToTensorV2(),])
            self.val_transform = A.Compose([
                    A.HorizontalFlip(p=0.5),
                    A.CenterCrop(height=128, width=128),
                    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                    ToTensorV2(),])
        else:
            self.train_transforms = A.Compose([ToTensorV2()])
            self.val_transforms = A.Compose([ToTensorV2()])

        self.train_dataset = torchvision.datasets.ImageFolder(self.TRAIN_DIR, transform=self.train_transform)
        self.val_dataset = torchvision.datasets.ImageFolder(self.VAL_DIR, transform=self.val_transform)

        self.train_loader = DataLoader(self.train_dataset, batch_size=self.config['batch_size'], shuffle=True, drop_last=True, num_workers=2, pin_memory=True)
        self.val_loader = DataLoader(self.val_dataset, batch_size=self.config['batch_size'], shuffle=False, drop_last=True, num_workers=1)

    def setup(self):
        self.__dataloaders()
    
        # Model
        self.model = FaceNet(self.config)
        self.model.cuda()
        # summary(self.model, (3, 224, 224))
        self.__check_model_params()
        self.model_name = self.__gen_model_name()
        self.model_path = self.__save_model_params()

        if self.log:
            wandb.init(project="hw2-preliminary", entity="nefario7", config=self.config)

        # Loss
        self.criterion = nn.CrossEntropyLoss()

        # Optimizer
        self.optimizer = optim.SGD(self.model.parameters(), **self.config['optim'])

        # Scheduler
        self.scheduler = optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=(len(self.train_loader) * self.config['epochs']))
        # T_max is "how many times will i call scheduler.step() until it reaches 0 lr?"

        # For this homework, we strongly strongly recommend using FP16 to speed up training.
        # It helps more for larger models.
        # Go to https://effectivemachinelearning.com/PyTorch/8._Faster_training_with_mixed_precision
        # and compare "Single precision training" section with "Mixed precision training" section
        self.scaler = torch.cuda.amp.GradScaler()
    
    def train(self):
        epochs = self.config['epochs']
        batch_size = self.config['epochs']
        if self.log:
            wandb.watch(self.model, criterion=self.criterion, log="all", log_freq=batch_size, idx=None,log_graph=True)
        for epoch in range(epochs):
            print("\n-------------------------------------------------------------")
            # Quality of life tip: leave=False and position=0 are needed to make tqdm usable in jupyter
            batch_bar = tqdm(total=len(self.train_loader), dynamic_ncols=True, leave=False, position=0, desc='Train') 

            num_correct = 0
            total_loss = 0

            for i, (x, y) in enumerate(self.train_loader):
                self.optimizer.zero_grad()

                x = x.cuda()
                y = y.cuda()

                # Don't be surprised - we just wrap these two lines to make it work for FP16
                with torch.cuda.amp.autocast():     
                    outputs = self.model(x)
                    loss = self.criterion(outputs, y)

                # Update # correct & loss as we go
                num_correct += int((torch.argmax(outputs, axis=1) == y).sum())
                total_loss += float(loss)
                train_loss = float(total_loss / (i + 1))

                if self.log and i % 10 == 0:
                    wandb.log({
                        "Training Accuracy": 100 * num_correct / ((i + 1) * batch_size),
                        "Training Loss": float(train_loss),
                        "Num Correct": num_correct,
                        "Learning Rate": float(self.optimizer.param_groups[0]['lr'])
                               })

                batch_bar.set_postfix(
                    acc="{:.04f}%".format(100 * num_correct / ((i + 1) * batch_size)),
                    loss="{:.04f}".format(train_loss),
                    num_correct=num_correct,
                    lr="{:.04f}".format(float(self.optimizer.param_groups[0]['lr'])))
                
                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer) 
                self.scaler.update() # This is something added just for FP16

                self.scheduler.step() # We told scheduler T_max that we'd call step() (len(train_loader) * epochs) many times.
                batch_bar.update()

            batch_bar.close() # You need this to close the tqdm bar
            print("Epoch {}/{}: Train Acc {:.04f}%, Train Loss {:.04f}, Learning Rate {:.04f}".format(
                epoch + 1,
                epochs,
                100 * num_correct / (len(self.train_loader) * batch_size),
                float(total_loss / len(self.train_loader)),
                float(self.optimizer.param_groups[0]['lr'])))
            
            #Epoch Validation
            self.validate(self.model)
            
            # Save Checkpoint after epoch
            self.save_checkpoint(epoch, self.model, self.optimizer, total_loss / len(self.train_loader))
            
        if self.log:
            wandb.finish()
            
        return self.model

    def validate(self, val_model):
        val_model.eval()
        batch_bar = tqdm(total=len(self.val_loader), dynamic_ncols=True, position=0, leave=False, desc='Val')
        num_correct = 0
        for i, (x, y) in enumerate(self.val_loader):
            x = x.cuda()
            y = y.cuda()

            with torch.no_grad():
                outputs = val_model(x)

            num_correct += int((torch.argmax(outputs, axis=1) == y).sum())
            batch_bar.set_postfix(acc="{:.04f}%".format(100 * num_correct / ((i + 1) * self.config['batch_size'])))

            batch_bar.update()
            
        batch_bar.close()
        print("\nValidation: {:.04f}%".format(100 * num_correct / len(self.val_dataset)))

    def save_checkpoint(self, epoch, model, optimizer, loss):
        print("Saving Checkpoint!")
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss
            }, os.path.join(self.model_path, 'Checkpoints', 'chkpt_' + str(epoch) + '.pth'))

    def save_model(self, onnx=False):
        # if save_best:
        #     torch.save(self.model.state_dict(), os.path.join(self.model_path, "best_model.pth"))
        # else:

        name = os.path.join(self.model_path, "model.pth")
        torch.save(self.model.state_dict(), name)
        if onnx:
            torch.onnx.export(self.model, name.split('.')[0] + '.onnx')
            wandb.save(name.split('.')[0] + '.onnx')

        print("Model saved at : ", self.model_path)


## Run

In [195]:
"""
The well-accepted SGD batch_size & lr combination for CNN classification is 256 batch size for 0.1 learning rate.
When changing batch size for SGD, follow the linear scaling rule - halving batch size -> halve learning rate, etc.
This is less theoretically supported for Adam, but in my experience, it's a decent ballpark estimate.
Just for the early submission. We'd want you to train like 50 epochs for your main submissions.
"""
config = {
    '': 'Test',
    'batch_size': 1024,
    'transforms': None,
    'epochs': 2,
    'backbone': 'simple',
    'dropout': None,
    'optimizer': 'SGD',
    'optim':{'lr': 0.1, 'momentum':0.9, 'weight_decay':1e-4},
    'arch': {   #Order (In, Out, Kernel, Stride, Padding)
        1: {"conv": [3, 64, (7, 7), 4, 2], "pool": {'max': True, "output": (56, 56)}},
        2: {"conv": [64, 128, (3, 3), 2, 1], "pool": {'max': True, "output": (28, 28)}},
        3: {"conv": [128, 256, (3, 3), 2, 1], "pool": {'max': True, "output": (14, 14)}},
        4: {"conv": [256, 512, (3, 3), 2, 1], "pool": {'max': False, "output": (1, 1)}}
    },
    'scheduler': 'CosineAnnealingLR',
    'subset': True,
    'save': True,
    'log': False,
    'randomize': True,
}

torch.cuda.empty_cache()

# FaceNet
face = FaceNetSetup(config, save_path = r'/content/cmudrive/IDL/hw2-early')
face.setup()

# Model Training
facenet_model = face.train()

# Save Trained Model
face.save_model()

# Validation
face.validate(facenet_model)

NameError: ignored

## Classification Task: Submit to Kaggle

In [114]:
class ClassificationTestSet(Dataset):
    # It's possible to load test set data using ImageFolder without making a custom class.
    # See if you can think it through!

    def __init__(self, data_dir, transforms):
        self.data_dir = data_dir
        self.transforms = transforms

        # This one-liner basically generates a sorted list of full paths to each image in data_dir
        self.img_paths = list(map(lambda fname: osp.join(self.data_dir, fname), sorted(os.listdir(self.data_dir))))

    def __len__(self):
        return len(self.img_paths)
    
    def __getitem__(self, idx):
        return self.transforms(Image.open(self.img_paths[idx]))

In [None]:
class ClassificationSubmission():
    def __init__(self, data_path, csv_path):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.drive_dir = r'/content/cmudrive/IDL'
        self.DATA_DIR = r"/content" 
        self.TEST_DIR = osp.join(self.DATA_DIR, r"classification/classification/test")

    def __get_labels(self, imodel, iargs):
        imodel.eval()
        labels = []
        print(f"Context = {iargs['context']} | Batch Size = {iargs['batch_size']} | Arch = {iargs['arch']}")
        with torch.no_grad():
            for i in range(len(self.test_samples)):
                X = self.test_samples[i]
                test_items = SubmissionItems(X, context=iargs['context'])
                test_loader = torch.utils.data.DataLoader(test_items, batch_size=iargs['batch_size'], num_workers=2, pin_memory=True, shuffle=False)

                for data in tqdm(test_loader):
                    data = data.float().to(self.device)              
                    output = imodel(data)
                    y = torch.argmax(output, axis=1)
                    labels.extend(y.tolist())
        return labels

    def __load_model(self, model_name, model_type): 
        meta_path = os.path.join(self.drive_dir,  model_type, model_name, 'model_parameters.yaml')
        with open(meta_path, 'r') as meta:
            args = yaml.safe_load(meta)

        model_path = os.path.join(self.drive_dir, model_type, model_name, 'model.pth')
        model = Network(args["arch"], args['context'], args['drop']).to(self.device)
        # summary(model)
        model.load_state_dict(torch.load(model_path))
        return model, args

    def simple_inference(self, model_name, model_type):
        print("Running inference...")
        self.timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
        model, args = self.__load_model(model_name, model_type)
        labels = self.__get_labels(model, args)
        
        return labels

    def ensemble_inference(self, model_names, model_type):
        print("Running ensembled inference...")
        self.timestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

        prelim_labels = []
        for name in model_names:
            print("\n\n\tModel : ", name)
            model, args = self.__load_model(name, model_type)
            prelim_labels.append(self.__get_labels(model, args))

        accs = [86.146, 85.79, 84.95]
        w = accs / np.sum(accs)

        print("Combining predictions...")
        labels_df = pd.DataFrame(prelim_labels)
        labels_df = labels_df.transpose()
        ensembled_labels = labels_df.mode(axis=1, dropna=False).iloc[:, 0].tolist()
        # ensembled_labels = np.where((df.iloc[:,1] == df.iloc[:, 2]), df.iloc[:, 1], df.iloc[:, 0]).tolist()

        return labels_df, ensembled_labels

    def generate_submission(self, save_path, labels): 
        sub_dir = os.path.join(self.drive_dir, save_path + self.timestamp)
        sub_path = os.path.join(sub_dir, 'submission.csv')

        with open(r"/content/classification_early_submission.csv", "w+") as f:
            f.write("id,label\n")
            for i in tqdm(range(len(test_dataset))):
                f.write("{},{}\n".format(str(i).zfill(6) + ".jpg", res[i]))

        print(f"File saved at : {sub_path}")
        return sub_path

In [117]:
DATA_DIR = r"/content"
TEST_DIR = osp.join(DATA_DIR, r"classification/classification/test")
val_transforms = [ttf.ToTensor()]

test_dataset = ClassificationTestSet(TEST_DIR, ttf.Compose(val_transforms))
test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False,
                         drop_last=False, num_workers=2)

In [120]:
face.model.eval()
batch_bar = tqdm(total=len(test_loader), dynamic_ncols=True, position=0, leave=False, desc='Test')

res = []
for i, (x) in enumerate(test_loader):
    with torch.no_grad():
        x = x.cuda()
  
        outputs = face.model(x)

        y = torch.argmax(outputs, axis=1)
        res.extend(y.tolist())

        batch_bar.update()
    
batch_bar.close()



In [121]:
with open(r"/content/classification_early_submission.csv", "w+") as f:
    f.write("id,label\n")
    for i in tqdm(range(len(test_dataset))):
        f.write("{},{}\n".format(str(i).zfill(6) + ".jpg", res[i]))

In [122]:
!kaggle competitions submit -c 11-785-s22-hw2p2-classification -f /content/classification_early_submission.csv -m "Early2"

100% 541k/541k [00:00<00:00, 2.65MB/s]
Successfully submitted to Face Recognition

# Verification

## Verification Task: Validation

There are 6K verification dev images, but 166K "pairs" for you to compare. So, it's much more efficient to compute the features for the 6K verification images, and just compare afterwards.

This will be done by creating a dictionary mapping the image file names to the features. Then, you'll use this dictionary to compute the similarities for each pair.

In [123]:
!ls /content/verification/verification/dev | wc -l
!cat /content/verification/verification/verification_dev.csv | wc -l

6000
166801


In [124]:
class VerificationDataset(Dataset):
    def __init__(self, data_dir, transforms):
        self.data_dir = data_dir
        self.transforms = transforms

        # This one-liner basically generates a sorted list of full paths to each image in data_dir
        self.img_paths = list(map(lambda fname: osp.join(self.data_dir, fname), sorted(os.listdir(self.data_dir))))

    def __len__(self):
        return len(self.img_paths)
    
    def __getitem__(self, idx):
        # We return the image, as well as the path to that image (relative path)
        return self.transforms(Image.open(self.img_paths[idx])), osp.relpath(self.img_paths[idx], self.data_dir)

In [125]:
val_veri_dataset = VerificationDataset(osp.join(DATA_DIR, "verification/verification/dev"),
                                       ttf.Compose(val_transforms))
val_ver_loader = torch.utils.data.DataLoader(val_veri_dataset, batch_size=config['batch_size'], 
                                             shuffle=False, num_workers=1)

In [129]:
face.model.eval()

feats_dict = dict()
for batch_idx, (imgs, path_names) in tqdm(enumerate(val_ver_loader), total=len(val_ver_loader), position=0, leave=False):
    imgs = imgs.cuda()
    with torch.no_grad():
        # Note that we return the feats here, not the final outputs
        # Feel free to try the final outputs too!
        features = face.model(imgs, return_feats=True) 
        for i, feature in enumerate(features):
            feats_dict[path_names[i]] = feature
    
    # TODO: Now we have features and the image path names. What to do with them?
    # Hint: use the feats_dict somehow.

# print(list(feats_dict.items())[0])



In [158]:
# We use cosine similarity between feature embeddings.
# TODO: Find the relevant function in pytorch and read its documentation.
cosine_sim = nn.CosineSimilarity(dim=0, eps=1e-8)
val_veri_csv = osp.join(DATA_DIR, "verification/verification/verification_dev.csv")

# Now, loop through the csv and compare each pair, getting the similarity between them
pred_similarities = []
gt_similarities = []
for line in tqdm(open(val_veri_csv).read().splitlines()[1:], position=0, leave=False): # skip header
    img_path1, img_path2, gt = line.split(",")
    img_path1 = img_path1.split('/')[-1]
    img_path2 = img_path2.split('/')[-1]
    feat1 = feats_dict[img_path1]
    feat2 = feats_dict[img_path2]

    # TODO: Use the similarity metric
    sim_score = cosine_sim(feat1, feat2)
    pred_similarities.append(sim_score.item())
    gt_similarities.append(int(gt))

pred_similarities = np.array(pred_similarities)
gt_similarities = np.array(gt_similarities)

print("AUC:", roc_auc_score(gt_similarities, pred_similarities))



AUC: 0.9558763519432023


## Verification Task: Submit to Kaggle

In [159]:
test_veri_dataset = VerificationDataset(osp.join(DATA_DIR, "verification/verification/test"),
                                        ttf.Compose(val_transforms))
test_ver_loader = torch.utils.data.DataLoader(test_veri_dataset, batch_size=config['batch_size'], 
                                              shuffle=False, num_workers=1)

In [160]:
face.model.eval()

feats_dict = dict()
for batch_idx, (imgs, path_names) in tqdm(enumerate(test_ver_loader), total=len(test_ver_loader), position=0, leave=False):
    imgs = imgs.cuda()

    with torch.no_grad():
        # Note that we return the feats here, not the final outputs
        # Feel free to try to final outputs too!
        feats = face.model(imgs, return_feats=True) 
        for i, feat in enumerate(feats):
            feats_dict[path_names[i]] = feat
    
    # TODO: Now we have features and the image path names. What to do with them?
    # Hint: use the feats_dict somehow.

# We use cosine similarity between feature embeddings.
# TODO: Find the relevant function in pytorch and read its documentation.
cosine_sim = nn.CosineSimilarity(dim=0, eps=1e-8)
val_veri_csv = osp.join(DATA_DIR, "verification/verification/verification_test.csv")

# Now, loop through the csv and compare each pair, getting the similarity between them
pred_similarities = []
for line in tqdm(open(val_veri_csv).read().splitlines()[1:], position=0, leave=False): # skip header
    img_path1, img_path2 = line.split(",")
    img_path1 = img_path1.split('/')[-1]
    img_path2 = img_path2.split('/')[-1]
    feat1 = feats_dict[img_path1]
    feat2 = feats_dict[img_path2]
    sim_score = cosine_sim(feat1, feat2)
    pred_similarities.append(sim_score.item())

    # TODO: Finish up verification testing.
    # How to use these img_paths? What to do with the features?



In [162]:
with open(r"/content/verification_early_submission.csv", "w+") as f:
    f.write("id,match\n")
    for i in range(len(pred_similarities)):
        f.write("{},{}\n".format(i, pred_similarities[i]))

In [163]:
!kaggle competitions submit -c 11-785-s22-hw2p2-verification -f /content/verification_early_submission.csv -m 'Early'

100% 16.6M/16.6M [00:00<00:00, 38.9MB/s]
Successfully submitted to Face Verification