In [27]:
import torchvision
import torch
import pandas as pd
import numpy as np
from torchvision import transforms
from PIL import Image
import os

In [79]:
with open('imagenet_classes.txt') as f:
    classes = [line.strip() for line in f.readlines()]

x = np.array([])
directory = os.fsencode('imagenet/')

for root, dirs, files in os.walk("imagenet/"):
    for filename in files:
        x = np.append(x, filename)
df = pd.DataFrame(data=x, columns=["images"])
df['images'][0]

In [114]:
def alexnet_model(img):
    """
    AlexNet for image classification on ImageNet
    """
    transform = transforms.Compose([
    transforms.Resize(256),                    
    transforms.CenterCrop(224),                
    transforms.ToTensor(),                     
    transforms.Normalize(                      
    mean=[0.485, 0.456, 0.406],                
    std=[0.229, 0.224, 0.225]                  
    )])
    
    alexnet = torchvision.models.alexnet(pretrained=True)
    alexnet.eval()
    img_2 = Image.open('imagenet/'+img[0])
    img_t = transform(img_2)
    batch_t = torch.unsqueeze(img_t, 0)
    out = alexnet(batch_t)
    percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100
    _, indices = torch.sort(out, descending=True)
    return indices.detach().numpy()[0], percentage.detach().numpy()
    

In [115]:
def resnet_model(img):
    """
    ResNet101 for image classification on ResNet
    """
    transform = transforms.Compose([
    transforms.Resize(256),                    
    transforms.CenterCrop(224),                
    transforms.ToTensor(),                     
    transforms.Normalize(                      
    mean=[0.485, 0.456, 0.406],                
    std=[0.229, 0.224, 0.225]                  
    )])
    
    resnet = torchvision.models.resnet101(pretrained=True)
    resnet.eval()
    img_2 = Image.open('imagenet/'+img[0])
    img_t = transform(img_2)
    batch_t = torch.unsqueeze(img_t, 0)
    out = resnet(batch_t)
    _, indices = torch.sort(out, descending=True)
    percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100
    return indices.detach().numpy()[0], percentage.detach().numpy()



In [147]:
def ensemble_predict(row):
    """
    Ensembling (via average) models for image classification
    """
    a_index = row[1]
    a_perc = row[2]
    r_index = row[3]
    r_perc = row[4]
    all_percentages = (a_perc + r_perc) / 2
    indices = np.argsort(all_percentages)[::-1]
    return indices, all_percentages, classes[indices[0]]

    

In [130]:
#Calling the functions

df_s = df.head(3)
alexnet_preds = df_s.apply(alexnet_model, axis=1, result_type="expand").rename(columns={0: "alexnet_indices", 1: "alexnet_percentage"}) 
resnet_preds = df_s.apply(resnet_model, axis=1, result_type="expand").rename(columns={0: "resnet_indices", 1: "resnet_percentage"}) 
# In this case concat is more efficient than join, is it worth to implement?
all_preds = df_s.join([alexnet_preds, resnet_preds])
all_preds


Unnamed: 0,images,alexnet_indices,alexnet_percentage,resnet_indices,resnet_percentage
0,ILSVRC2012_val_00036091.JPEG,"[511, 436, 817, 581, 717, 751, 468, 627, 656, ...","[2.0327654e-08, 3.525203e-08, 3.58106e-07, 2.7...","[511, 436, 581, 817, 627, 468, 479, 717, 751, ...","[2.2426984e-06, 1.2366584e-07, 1.2042776e-07, ..."
1,ILSVRC2012_val_00018439.JPEG,"[74, 72, 73, 815, 303, 70, 75, 300, 77, 319, 3...","[2.1032622e-09, 8.0098125e-08, 2.9302602e-10, ...","[74, 72, 815, 73, 77, 75, 70, 78, 119, 998, 99...","[1.6091902e-07, 3.3655735e-07, 1.6260454e-07, ..."
2,ILSVRC2012_val_00033769.JPEG,"[125, 33, 32, 54, 119, 113, 34, 36, 60, 305, 5...","[5.6719487e-05, 1.0501685e-06, 1.9312533e-06, ...","[316, 125, 33, 113, 32, 988, 37, 34, 120, 47, ...","[2.4145097e-06, 5.8845744e-05, 6.1371406e-07, ..."


In [148]:
#Making the ensemble call
ensemble_predictions = all_preds.apply(ensemble_predict, axis=1, result_type='expand').rename(columns={0: "indices", 1:"percentage", 2:"prediction"})
final_df = all_preds.join(ensemble_predictions)
final_df

Unnamed: 0,images,alexnet_indices,alexnet_percentage,resnet_indices,resnet_percentage,indices,percentage,prediction
0,ILSVRC2012_val_00036091.JPEG,"[511, 436, 817, 581, 717, 751, 468, 627, 656, ...","[2.0327654e-08, 3.525203e-08, 3.58106e-07, 2.7...","[511, 436, 581, 817, 627, 468, 479, 717, 751, ...","[2.2426984e-06, 1.2366584e-07, 1.2042776e-07, ...","[511, 436, 817, 581, 717, 751, 468, 627, 656, ...","[1.131513e-06, 7.9458935e-08, 2.3926688e-07, 5...",convertible
1,ILSVRC2012_val_00018439.JPEG,"[74, 72, 73, 815, 303, 70, 75, 300, 77, 319, 3...","[2.1032622e-09, 8.0098125e-08, 2.9302602e-10, ...","[74, 72, 815, 73, 77, 75, 70, 78, 119, 998, 99...","[1.6091902e-07, 3.3655735e-07, 1.6260454e-07, ...","[74, 72, 815, 73, 303, 70, 75, 300, 77, 319, 3...","[8.151114e-08, 2.0832773e-07, 8.144878e-08, 7....",garden_spider
2,ILSVRC2012_val_00033769.JPEG,"[125, 33, 32, 54, 119, 113, 34, 36, 60, 305, 5...","[5.6719487e-05, 1.0501685e-06, 1.9312533e-06, ...","[316, 125, 33, 113, 32, 988, 37, 34, 120, 47, ...","[2.4145097e-06, 5.8845744e-05, 6.1371406e-07, ...","[125, 316, 33, 32, 54, 113, 119, 34, 36, 305, ...","[2.9566998e-05, 2.9947956e-05, 1.2724837e-06, ...",hermit_crab
