In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torch.nn.functional as F
from torchvision import datasets, models, transforms

In [2]:
class CNN_model_classification(nn.Module):
    def __init__(self):
        super(CNN_model_classification, self).__init__()
        """
        self.conv1 = nn.Conv2d(3, 32, kernel_size=(5,5))
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=(5,5))
        self.bn2 = nn.BatchNorm2d(64)
        self.conv2_drop = nn.Dropout2d(p = 0.3)
        self.conv3_drop = nn.Dropout2d(p = 0.5)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=(3,3))
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=(3,3))
        self.bn4 = nn.BatchNorm2d(256)
        self.conv5 = nn.Conv2d(256, 512, kernel_size=(3,3))
        self.bn5 = nn.BatchNorm2d(512)
        self.conv6 = nn.Conv2d(512, 1024, kernel_size=(1,1))
        self.bn6 = nn.BatchNorm2d(1024)
        self.fc1 = nn.Linear(4096, 2300)
        """

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()

        self.conv2 = nn.Conv2d(in_channels=12, out_channels=12, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()

        self.pool_1 = nn.MaxPool2d(kernel_size=2)

        self.conv3 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()
        
        self.conv5 = nn.Conv2d(in_channels=24, out_channels=48, kernel_size=3, stride=1, padding=1)
        self.relu5 = nn.ReLU()

        self.conv6 = nn.Conv2d(in_channels=48, out_channels=96, kernel_size=3, stride=1, padding=1)
        self.relu6 = nn.ReLU()

        self.pool_2 = nn.MaxPool2d(kernel_size=2)

        self.conv7 = nn.Conv2d(in_channels=96, out_channels=186, kernel_size=3, stride=1, padding=1)
        self.relu7 = nn.ReLU()

        self.conv8 = nn.Conv2d(in_channels=186, out_channels=196, kernel_size=3, stride=1, padding=1)
        self.relu8 = nn.ReLU()

        self.fc = nn.Linear(in_features=8 * 8 * 196, out_features=2300)

    def forward(self, input):
        output = self.conv1(input)
        output = self.relu1(output)

        output = self.conv2(output)
        output = self.relu2(output)

        output = self.pool_1(output)

        output = self.conv3(output)
        output = self.relu3(output)
        
        output = self.conv5(output)
        output = self.relu5(output)

        output = self.conv6(output)
        output = self.relu6(output)

        output = self.pool_2(output)

        output = self.conv7(output)
        output = self.relu7(output)

        output = self.conv8(output)
        output = self.relu8(output)

        output = output.view(-1, 8 * 8 * 196)

        output = self.fc(output)
        output = F.log_softmax(output, dim=1)

        return output

In [3]:
class ResBlock(nn.Module):
    def __init__(self, channel_size, stride=1):
        super(ResBlock, self).__init__()
        self.block = nn.Sequential(nn.Conv2d(in_channels=channel_size, out_channels=channel_size, 
                                             kernel_size=3, stride=stride, padding=1, bias=False),
                                   nn.BatchNorm2d(num_features=channel_size),
                                   nn.ReLU(inplace=True),
                                   nn.Conv2d(in_channels=channel_size, out_channels=channel_size, 
                                             kernel_size=3, stride=stride, padding=1, bias=False),
                                   nn.BatchNorm2d(num_features=channel_size))
        self.logit_non_linear = nn.ReLU(inplace=True)

    def forward(self, x):
        output = x
        output = self.block(output)
        output = self.logit_non_linear(output + x)
        return output
class Network(nn.Module):
    def __init__(self, num_feats, hidden_sizes, num_classes, feat_dim=10):
        super(Network, self).__init__()
        
        self.hidden_sizes = [num_feats] + hidden_sizes + [num_classes]
        
        self.layers = []
        for idx, channel_size in enumerate(hidden_sizes):
            self.layers.append(nn.Conv2d(in_channels=self.hidden_sizes[idx], 
                                         out_channels=self.hidden_sizes[idx+1], 
                                         kernel_size=3, stride=2, bias=False))
            self.layers.append(nn.ReLU(inplace=True))
            self.layers.append(ResBlock(channel_size=channel_size))
            
        self.layers = nn.Sequential(*self.layers)
        self.linear_label = nn.Linear(self.hidden_sizes[-2], self.hidden_sizes[-1], bias=False)
        
        # For creating the embedding to be passed into the Center Loss criterion
        self.linear_closs = nn.Linear(self.hidden_sizes[-2], feat_dim, bias=False)
        self.relu_closs = nn.ReLU(inplace=True)
    
    def forward(self, x, evalMode=False):
        output = x
        output = self.layers(output)
            
        output = F.avg_pool2d(output, [output.size(2), output.size(3)], stride=1)
        output = output.reshape(output.shape[0], output.shape[1])
        
        label_output = self.linear_label(output)
        label_output = label_output/torch.norm(self.linear_label.weight, dim=1)
        
        # Create the feature embedding for the Center Loss
        closs_output = self.linear_closs(output)
        closs_output = self.relu_closs(closs_output)

        return closs_output, label_output

def init_weights(m):
    if type(m) == nn.Conv2d or type(m) == nn.Linear:
        torch.nn.init.xavier_normal_(m.weight.data)

In [4]:
class Unit(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Unit, self).__init__()

        self.conv = nn.Conv2d(in_channels=in_channels, kernel_size=3, out_channels=out_channels, stride=1, padding=1)
        self.bn = nn.BatchNorm2d(num_features=out_channels)
        self.relu = nn.ReLU()

    def forward(self, input):
        output = self.conv(input)
        output = self.bn(output)
        output = self.relu(output)

        return output

class CNNet(nn.Module):
    def __init__(self, num_class):
        super(CNNet, self).__init__()

        # Create 14 layers of the unit with max pooling in between
        self.unit1 = Unit(in_channels=3, out_channels=32)
        self.unit2 = Unit(in_channels=32, out_channels=32)
        self.unit3 = Unit(in_channels=32, out_channels=32)

        self.pool1 = nn.MaxPool2d(kernel_size=2)

        self.unit4 = Unit(in_channels=32, out_channels=64)
        self.unit5 = Unit(in_channels=64, out_channels=64)
        self.unit6 = Unit(in_channels=64, out_channels=64)
        self.unit7 = Unit(in_channels=64, out_channels=64)

        self.pool2 = nn.MaxPool2d(kernel_size=2)

        self.unit8 = Unit(in_channels=64, out_channels=128)
        self.unit9 = Unit(in_channels=128, out_channels=128)
        self.unit10 = Unit(in_channels=128, out_channels=128)
        self.unit11 = Unit(in_channels=128, out_channels=128)

        self.pool3 = nn.MaxPool2d(kernel_size=2)

        self.unit12 = Unit(in_channels=128, out_channels=128)
        self.unit13 = Unit(in_channels=128, out_channels=128)
        self.unit14 = Unit(in_channels=128, out_channels=128)

        self.avgpool = nn.AvgPool2d(kernel_size=4)

        # Add all the units into the Sequential layer in exact order
        self.net = nn.Sequential(self.unit1, self.unit2, self.unit3, self.pool1, self.unit4, self.unit5, self.unit6
                                 , self.unit7, self.pool2, self.unit8, self.unit9, self.unit10, self.unit11, self.pool3,
                                 self.unit12, self.unit13, self.unit14, self.avgpool)

        self.fc = nn.Linear(in_features=128, out_features=num_class)

    def forward(self, input):
        output = self.net(input)
        output = output.view(-1, 128)
        output = self.fc(output)
        return output

In [5]:
image_datasets = {}
dataloaders_dict ={}
data_transforms = {
            'train': transforms.Compose([
                transforms.RandomHorizontalFlip(),
                #transforms.RandomResizedCrop(32),
                transforms.ToTensor(),
                #transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
                transforms.Normalize([0.44, 0.28, 0.37], [0.26, 0.18, 0.24])
            ]),
            'val': transforms.Compose([
                #transforms.Resize(32),
                #transforms.CenterCrop(32),
                transforms.ToTensor(),
                #transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
                transforms.Normalize([0.44, 0.28, 0.37], [0.26, 0.18, 0.24])
            ]),
        }
image_datasets['train'] = datasets.ImageFolder('/home/d.milovanov/win_kaggle/hw3/medium/', data_transforms['train'])
image_datasets['val'] = datasets.ImageFolder('/home/d.milovanov/win_kaggle/hw3/validation_classification/',
                                             data_transforms['val'])
dataloaders_dict['train'] = torch.utils.data.DataLoader(image_datasets['train'],batch_size=128, shuffle=True)
dataloaders_dict['val'] = torch.utils.data.DataLoader(image_datasets['val'],batch_size=128, shuffle=True)

In [20]:
hidden_sizes = [256, 512, 1024, 2048]
num_feats = 3
num_classes = 2300

#model = CNNet(num_classes)
cuda = torch.cuda.is_available()
model = Network(num_feats, hidden_sizes, num_classes)
model.apply(init_weights)
#model = Net()
criterion = nn.CrossEntropyLoss()
#optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
optimizer = torch.optim.SGD(model.parameters(), 
                            lr=1e-2, 
                            #weight_decay=5e-5,
                            weight_decay=5e-4,
                            momentum=0.9)
device = torch.device("cuda" if cuda else "cpu")
def train_model(model, train_loader, criterion, optimizer):
    model.train()
    model.to(device)
    running_loss = 0.0
    start_time = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):     
        optimizer.zero_grad()
        data = data.float()
        data = data.to(device)
        target = target.long().to(device)

        outputs = model(data)[1]
    
        loss = criterion(outputs, target)
        running_loss += loss.item()

        loss.backward()
        optimizer.step()

    end_time = time.time()

    running_loss /= len(train_loader)
    print('Training Loss: ', running_loss, 'Time: ',end_time - start_time, 's')
    return running_loss

In [7]:
def test_model(model, test_loader, criterion):
    with torch.no_grad():
        model.eval()
        model.to(device)

        running_loss = 0.0
        total_predictions = 0.0
        correct_predictions = 0.0

        for batch_idx, (data, target) in enumerate(test_loader):
            data = data.float()
            data = data.to(device)
            target = target.long().to(device)

            outputs = model(data)[1]
            

            _, predicted = torch.max(F.softmax(outputs, dim=1), 1)
            total_predictions += target.size(0)
            correct_predictions += (predicted == target).sum().item()

            loss = criterion(outputs, target).detach()
            running_loss += loss.item()
        running_loss /= len(test_loader)
        acc = (correct_predictions/total_predictions)*100.0
        print('Testing Loss: ', running_loss)
        print('Testing Accuracy: ', acc, '%')
        return running_loss, acc

In [21]:
import time
n_epochs = 100
Train_loss = []
Test_loss = []
Test_acc = []
for i in range(n_epochs):
    print("=================={0}/{1} epoch=============".format(i, n_epochs))
    train_loss = train_model(model, dataloaders_dict['train'], criterion, optimizer)
    test_loss, test_acc = test_model(model, dataloaders_dict['val'], criterion)
    Train_loss.append(train_loss)
    Test_loss.append(test_loss)
    Test_acc.append(test_acc)
    print('='*20)

Training Loss:  5.011818241471817 Time:  2903.111112833023 s
Testing Loss:  3.717221584584978
Testing Accuracy:  26.211693110193433 %
Training Loss:  3.017333748925519 Time:  2902.2039868831635 s
Testing Loss:  2.642170230547587
Testing Accuracy:  44.7294066507281 %
Training Loss:  2.2844844922553262 Time:  2904.4578261375427 s
Testing Loss:  2.2464216815100775
Testing Accuracy:  52.35818300369485 %
Training Loss:  1.9038356153264289 Time:  2912.5522191524506 s
Testing Loss:  1.9915782511234283
Testing Accuracy:  58.20473810041295 %
Training Loss:  1.6751337393765728 Time:  2904.8406682014465 s
Testing Loss:  1.8156611257129245
Testing Accuracy:  61.44316452945012 %
Training Loss:  1.5153164523067986 Time:  2904.8722858428955 s
Testing Loss:  1.774281620979309
Testing Accuracy:  61.42143012388611 %
Training Loss:  1.3993486094742011 Time:  2903.9404439926147 s
Testing Loss:  1.769388539923562
Testing Accuracy:  61.48663334057814 %
Training Loss:  1.3037081196774285 Time:  2904.27258253

Training Loss:  0.6455427852260925 Time:  3016.3839313983917 s
Testing Loss:  1.2843398617373571
Testing Accuracy:  72.223429689198 %
Training Loss:  0.6408380454784238 Time:  3026.427872657776 s
Testing Loss:  1.271025280157725
Testing Accuracy:  72.04955444468594 %
Training Loss:  0.6381099373728483 Time:  3036.6465809345245 s
Testing Loss:  1.291968325773875
Testing Accuracy:  71.74527276678982 %
Training Loss:  0.6335622104740751 Time:  3010.1208102703094 s
Testing Loss:  1.3053388992945354
Testing Accuracy:  71.65833514453381 %
Training Loss:  0.6307973560864386 Time:  3038.750501871109 s
Testing Loss:  1.2489794724517398
Testing Accuracy:  72.65811780047817 %
Training Loss:  0.6259187701579234 Time:  3026.490130662918 s
Testing Loss:  1.2457501391569774
Testing Accuracy:  73.17974353401434 %
Training Loss:  0.621947800406576 Time:  3087.880307674408 s
Testing Loss:  1.2270137800110712
Testing Accuracy:  72.7885242338622 %
Training Loss:  0.6141053080015989 Time:  3086.69061040878

KeyboardInterrupt: 

In [22]:
idx_to_class = {val: key for key, val in image_datasets['train'].class_to_idx.items()}

In [23]:
from PIL import Image
from io import BytesIO
import pandas as pd
from torch.autograd import Variable
data = {"id": [], "label": []}
data_transform = transforms.Compose([
                #transforms.RandomResizedCrop(32),
                transforms.ToTensor(),
                #transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
                transforms.Normalize([0.44, 0.28, 0.37], [0.26, 0.18, 0.24])
            ])
for i in range(4600):
    model.eval()
    model.to('cuda')
    data["id"].append(i)
    path = "/home/d.milovanov/win_kaggle/hw3/test_classification/" + str(i) + ".jpg"
    with open(path, 'rb') as f:
        raw_image = Image.open(f)
        img = data_transform(raw_image)
        img = img.unsqueeze(0)
        
        img = Variable(img)
        img = img.cuda()
        outputs = model(img)
        _, predicted = torch.max(outputs[1].data, 1)
        data["label"].append(idx_to_class[predicted.item()])


In [24]:
sub = pd.DataFrame.from_dict(data)

In [25]:
sub.to_csv("sub_6.csv", index=False)

In [13]:
sub.tail()

Unnamed: 0,id,label
4595,4595,1949
4596,4596,1521
4597,4597,526
4598,4598,1178
4599,4599,2110


In [33]:
image_datasets_ver = {}
dataloaders_dict_ver ={}
data_transforms = {
            'train': transforms.Compose([
                transforms.RandomHorizontalFlip(),
                #transforms.RandomResizedCrop(32),
                transforms.ToTensor(),
                #transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
                transforms.Normalize([0.44, 0.28, 0.37], [0.26, 0.18, 0.24])
            ]),
            'val': transforms.Compose([
                #transforms.Resize(32),
                #transforms.CenterCrop(32),
                transforms.ToTensor(),
                #transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
                transforms.Normalize([0.44, 0.28, 0.37], [0.26, 0.18, 0.24])
            ]),
        }
image_datasets_ver['val'] = datasets.ImageFolder('/home/d.milovanov/win_kaggle/hw3/validation_verification/',
                                             data_transforms['val'])

dataloaders_dict_ver['val'] = torch.utils.data.DataLoader(image_datasets_ver['val'],batch_size=1, shuffle=True)

In [48]:
from sklearn.metrics.pairwise import cosine_similarity
with torch.no_grad():
    model.eval()
    model.to(device)

    running_loss = 0.0
    total_predictions = 0.0
    correct_predictions = 0.0
    count = 0
    outputs = []
    for batch_idx, (data, target) in enumerate(dataloaders_dict_ver['val']):
        print(target)
        data = data.float()
        data = data.to(device)
        target = target.long().to(device)

        outputs.append(model(data)[1])
        
        count += 1
        if count > 2:
            print(cosine_similarity(outputs[0].cpu().numpy(), outputs[1].cpu().numpy()))
            break


tensor([350])
tensor([24])
tensor([484])
[[0.3494428]]


In [92]:
from PIL import Image
from io import BytesIO
import pandas as pd
from torch.autograd import Variable
data = {"id": [], "label": []}
data_transform = transforms.Compose([
                #transforms.RandomResizedCrop(32),
                transforms.ToTensor(),
                #transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
                transforms.Normalize([0.44, 0.28, 0.37], [0.26, 0.18, 0.24])
            ])
scores_pred = []
for i in range(sample.shape[0]):
    if i % 100000 == 0:
        print(i)
    model.eval()
    model.to('cuda')
    path_1 = sample.iloc[i].trial.split()[0]
    path_2 = sample.iloc[i].trial.split()[1]
    with open("/home/d.milovanov/win_kaggle/hw3/test_verification/" + path_1, 'rb') as f:
        raw_image = Image.open(f)
        img = data_transform(raw_image)
        img = img.unsqueeze(0)
        
        img = Variable(img)
        img = img.cuda()
        outputs_1 = model(img)[1]
    with open("/home/d.milovanov/win_kaggle/hw3/test_verification/" + path_2, 'rb') as f:
        raw_image = Image.open(f)
        img = data_transform(raw_image)
        img = img.unsqueeze(0)
        
        img = Variable(img)
        img = img.cuda()
        outputs_2 = model(img)[1]
    #print(cosine_similarity(outputs_1.detach().cpu().numpy(), outputs_2.detach().cpu().numpy()).reshape(1))
    scores_pred.append(cosine_similarity(outputs_1.detach().cpu().numpy(), outputs_2.detach().cpu().numpy()).reshape(1))

0
100000
200000
300000
400000
500000
600000
700000
800000


In [94]:
sample['score'] = scores_pred
sample.to_csv("ver_sub.csv", index=False)