<a href="https://colab.research.google.com/github/rpmallya/mec-mini-projects/blob/master/resnet50_places365_cnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Run the entire places 365 validation suite on a pre-trained Resent model and reproduce the claimed accuracies as in the link below
https://github.com/CSAILVision/places365



##Link Google Drive


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


##Download and unzip the validation Suite


In [2]:
!cp -rf /content/drive/MyDrive/places365/val_256.tar  ./
!tar -xf val_256.tar

##Download text file that provides label for each image

In [3]:
!cp /content/drive/MyDrive/places365/places365_val.txt ./

In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import models,layers
from mlxtend.plotting import plot_confusion_matrix
from sklearn.metrics import  confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import top_k_accuracy_score

In [5]:
val_flist="/content/places365_val.txt"
vdir="/content/val_256/"

In [6]:
vfile=open (val_flist, 'r')

##Generate a list of images and labels from validation data

In [7]:
golden_labels=[]
image_name=[]
for lines in vfile.readlines():
    s=lines.split()
    iname=s[0]
    ilabel=s[1]
    image_name.append(iname)
    golden_labels.append(int(ilabel))
print("No of images {}. No of labels {}".format(len(image_name), len(golden_labels)))

No of images 36500. No of labels 36500


In [8]:
import torch
from torch.autograd import Variable as V
import torchvision.models as models
from torchvision import transforms as trn
from torch.nn import functional as F
import os
from PIL import Image

##This section of the code uses existing model to run inference. For this, we have used pytorch related code snippets from the link below (only runs inference on one random image)
###https://github.com/CSAILVision/places365/blob/master/run_placesCNN_basic.py

In [9]:
# th architecture to use
arch = 'resnet50'

In [10]:


# load the pre-trained weights
model_file = '%s_places365.pth.tar' % arch
if not os.access(model_file, os.W_OK):
    weight_url = 'http://places2.csail.mit.edu/models_places365/' + model_file
    os.system('wget ' + weight_url)

model = models.__dict__[arch](num_classes=365)
checkpoint = torch.load(model_file, map_location=lambda storage, loc: storage)
state_dict = {str.replace(k,'module.',''): v for k,v in checkpoint['state_dict'].items()}
model.load_state_dict(state_dict)
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [11]:
# load the image transformer
centre_crop = trn.Compose([
        trn.Resize((256,256)),
        trn.CenterCrop(224),
        trn.ToTensor(),
        trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


In [12]:
predition_array=[]
predition_top=[]
for no,item in enumerate(image_name):
    fpath=vdir+item
    #open the image
    img = Image.open(fpath).convert('RGB') #ADD THIS AS SOME IMAGES ARE GRAY SCALE
    #print("openened {0}\n".format(item))
    input_img = V(centre_crop(img).unsqueeze(0))
    # forward pass
    logit = model.forward(input_img)
    #print(logit)
    h_x = F.softmax(logit, 1).data.squeeze()
    probs, idx = h_x.sort(0, True)
    #print(idx[0])
    #print(golden_labels[0])
    #for i in range(0, 5):
    #  print('{:.3f} -> {}'.format(probs[i], classes[idx[i]]))
    cur_pred_array=np.array(idx)
    predition_array.append(cur_pred_array) #entire prediction array
    predition_top.append(idx[0])#(np.argmax(cur_pred_array, axis=-1)) #only best class
    
    if (no % 1000==0):
        print(no)
    
print(f" Golden Entries {len(golden_labels)} Predicted Entries {len(predition_top)}")
    



0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
 Golden Entries 36500 Predicted Entries 36500


##Below calculates the accuracy

In [13]:
#calculate accuracy
print(accuracy_score(golden_labels,predition_top))
#print(top_k_accuracy_score(golden_labels,predition_array,k=5))
#print(top_k_accuracy_score(golden_labels,predition_array,k=10))

0.5476712328767124
