In [None]:
#!pip install --no-deps '../input/evaluations/'

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import random
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import os
import multiprocessing
from scipy import stats
from torch.utils.data import Dataset,DataLoader
from time import perf_counter
import PIL
from PIL import Image, ImageDraw
#from evaluations.kaggle_2020 import global_average_precision_score

In [None]:
IN_KERNEL = os.environ.get('KAGGLE_WORKING_DIR') is not None
#MIN_SAMPLES_PER_CLASS = 150
BATCH_SIZE = 64
NUM_WORKERS = multiprocessing.cpu_count()
#MAX_STEPS_PER_EPOCH = 15000
NUM_EPOCHS = 1
GPU = torch.cuda.is_available()
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
TRAIN_DIR = '../input/landmark-recognition-2020/train/'
TEST_DIR = '../input/landmark-recognition-2020/test/'
#LOG_FREQ = 10
#NUM_TOP_PREDICTS = 20

In [None]:
#Helper Functions
def getPath(FileName,train_dir):
    Path = FileName + ".jpg"
    for i in range(2,-1,-1):
        Path = os.path.join(FileName[i],Path)
    Path = os.path.join(train_dir, Path)
    return Path
def imshow(img):
    #img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

def display_images(images, title=None): 
    f, ax = plt.subplots(5,5, figsize=(18,22))
    if title:
        f.suptitle(title, fontsize = 30)

    for i, image_id in enumerate(images):
        image_path = os.path.join(TRAIN_DIR, f'{image_id[0]}/{image_id[1]}/{image_id[2]}/{image_id}.jpg')
        image = Image.open(image_path)
        
        ax[i//5, i%5].imshow(image) 
        image.close()       
        ax[i//5, i%5].axis('off')

        landmark_id = train[train.id==image_id.split('.')[0]].landmark_id.values[0]
        ax[i//5, i%5].set_title(f"ID: {image_id.split('.')[0]}\nLandmark_id: {landmark_id}", fontsize="12")
    plt.show()
def getModel(Name, OutFeatures):
    if Name == "AlexNet":
        model = torchvision.models.alexnet()
        model.classifier[6] = nn.Linear(4096,OutFeatures,bias=True)
    elif Name=="VGG16":
        model = torchvision.models.vgg16()
        model.classifier[6] = nn.Linear(4096,OutFeatures,bias=True)
    
    return model

In [None]:
print("Reading Files...")
train = pd.read_csv('../input/landmark-recognition-2020/train.csv')
test = pd.read_csv('../input/landmark-recognition-2020/sample_submission.csv')
print("Reading Completed.")

In [None]:
df_train = pd.read_csv('../input/landmark-recognition-2020/train.csv', index_col=0)
Correct_Labels = df_train.to_dict()['landmark_id']
#Predicted_Labels = {key: (item, random.random()) for key, item in Correct_Labels.items()}
#global_average_precision_score(Correct_Labels, Predicted_Labels)

In [None]:
#print(Correct_Labels['0000059611c7d079'], Predicted_Labels['0000059611c7d079'])

In [None]:
print("Creating Dictionary & LandMarkIDs...")
Train_List = train.values
Test_List = test.values
Train_Dict = {}
for Row in Train_List:
    if str(Row[1]) not in Train_Dict:
        Train_Dict[str(Row[1])] = [Row[0]]
    else:
        Train_Dict[str(Row[1])].append(Row[0])
LandMarkIDs = np.unique(Train_List[:,1])
print("Creation Successful.")

In [None]:
Images_Per_Landmark = np.unique(train['landmark_id'].tolist(),return_counts=True)
print("Description of Dataset")
print("Number of Landmarks:",len(train.landmark_id.unique()))
print("#OfImages Per LandmarkID")
print("     Min:",min(Images_Per_Landmark[1]))
print("     Max:",max(Images_Per_Landmark[1]))
print("     Mean:",np.mean(Images_Per_Landmark[1]))
print("     Median:",np.median(Images_Per_Landmark[1]))
print("     Mode:",stats.mode(Images_Per_Landmark[1])[0][0])
print("#OfTestImages:",len(test))
print("\n")
print(train.head())
plt.hist(train.landmark_id,bins=1000)
plt.title('Histogram of number of images per Landmark ID')

In [None]:
#print("Displaying some Landmarks...")
#samples = train.sample(25).id.values
#display_images(samples)

In [None]:
class RetrievalData(Dataset):
    def __init__(self, Directory, FileNames, Transform, CorrectLabels, Images_Per_Landmark):
        self.directory = Directory
        self.filenames = FileNames
        self.transform = Transform
        self.correctlabels = CorrectLabels
        self.images_per_landmark = Images_Per_Landmark
    def __len__(self):
        return len(self.filenames)

    def __getitem__(self,index):
        x = Image.open(getPath(self.filenames[index],self.directory))
        if "train" in self.directory:
            if self.transform is not None:
                return self.transform(x),np.where(self.images_per_landmark[0]==self.correctlabels[self.filenames[index]])[0][0]
            return x,np.where(self.images_per_landmark[0]==self.correctlabels[self.filenames[index]])[0][0]
        elif "test" in self.directory:
            if self.transform is not None:
                return self.transform(x),self.filenames[index]
            return x,self.filenames[index]

In [None]:
Transform = transformations = transforms.Compose([
                                        transforms.Resize((256, 256)),
                                        transforms.ToTensor(),
                                     ])
Train_dataset = RetrievalData(TRAIN_DIR, Train_Dict['9'],Transform,Correct_Labels, Images_Per_Landmark)
Train_loader=DataLoader(Train_dataset, batch_size=4,num_workers=4)

for i,data in enumerate(Train_loader):
    image,Names = data
    print(' '.join('%5s' % j for j in Names))
    imshow(torchvision.utils.make_grid(image))

In [None]:
def train_model(model, criterion, optimizer, loader, epochs=10):
    model.to(DEVICE)
    criterion.to(DEVICE)
    epoch_losses = []
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for i,data in enumerate(loader):
            images,labels = data
            images = images.to(DEVICE)
            labels = labels.to(DEVICE)
            optimizer.zero_grad()
            pred = model(images)
            loss = criterion(pred,labels)
            if i%500==499:
                break
            running_loss+=loss.item()
            loss.backward()
            optimizer.step()
        break
        epoch_losses.append(running_loss/len(loader))
        print("Epoch#"+str(epoch+1).zfill(2)+"/"+str(epochs),"Loss:",epoch_losses[epoch])
    return model

In [None]:
def validate_model(model,loader,FinalLayer):
    model.to(DEVICE)
    criterion.to(DEVICE)
    model.eval()
    for i,data in enumerate(loader):
        images,labels = data.to(DEVICE)
        pred = model(images)

In [None]:
def inference_model(model,loader):
    softmax = nn.Softmax(1)
    model.to(DEVICE)
    model.eval()
    with torch.no_grad():
        with open("submission.csv","w") as File:
            File.write("id,landmarks\n")
            for i,data in enumerate(loader):
                images,FileNames = data
                images = images.to(DEVICE)
                
                pred = softmax(model(images))
                if GPU:
                    pred = pred.to("cpu")
                pred = pred.numpy()
                for j in range(len(FileNames)):
                    Index = np.where(pred[j]==max(pred[j]))[0][0]
                    File.write(FileNames[j]+","+str(Images_Per_Landmark[0][Index])+ " "+ str(pred[j][Index])+"\n")

In [None]:
if __name__ == '__main__':
    
    Start = perf_counter()
    print("Started...")
    Transform = transformations = transforms.Compose([
                                        transforms.Resize((256, 256)),
                                        transforms.ToTensor(),
                                     ])
    Train_dataset = RetrievalData(TRAIN_DIR, Train_List[:,0], Transform, Correct_Labels,Images_Per_Landmark)
    Train_loader = DataLoader(Train_dataset, batch_size=128,shuffle=True, num_workers=NUM_WORKERS)
    print("Minibatches in Train_Loader:",len(Train_loader))
    #Code for Training, Validation, Inference + Submission
    model = getModel("AlexNet",81313)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.000001)
    
    model = train_model(model,criterion,optimizer,Train_loader,1)

    Test_dataset = RetrievalData(TEST_DIR,Test_List[:,0] , Transform, Correct_Labels, Images_Per_Landmark)
    Test_loader = DataLoader(Test_dataset, batch_size=128,shuffle=False, num_workers=NUM_WORKERS)
    inference_model(model,Test_loader)
    Finish = perf_counter()
    print("Ended.")
    print("Time Taken:",Finish-Start, "seconds")

In [None]:
Submission = pd.read_csv("./submission.csv")
Submission