## PyTorch - Pre Trained Models

In [1]:
import torchvision.models as models
import torch
from torchvision import transforms
from PIL import Image
from pathlib import Path
import numpy as np
from torch.autograd import Variable
import warnings
warnings.filterwarnings('ignore')
import requests
import os

C:\ProgramData\Anaconda3\lib\site-packages\numpy\.libs\libopenblas.NOIJJG62EMASZI6NYURL6JBKM4EVBGM7.gfortran-win_amd64.dll
C:\ProgramData\Anaconda3\lib\site-packages\numpy\.libs\libopenblas.PYQHXLVVQ7VESDPUVUADXEVJOBGHJPAY.gfortran-win_amd64.dll
  stacklevel=1)


In [2]:
#constants
base_path = Path("C:/Users/shubham/Desktop/Masters/Pre-Trained-Models-Research-Experiments/dataset/image_classification/")
LABELS_URL = 'https://s3.amazonaws.com/outcome-blog/imagenet/labels.json'
response = requests.get(LABELS_URL)  
labels = {int(key): value for key, value in response.json().items()}

In [3]:
custom_labels = [x for x in os.walk(base_path)][0][1]
custom_labels.remove("mixed")

In [4]:
def load_per_label_imgs_generator():
    
    #better due to memory restrictions we read per class images at once i.e airplane folder at a time.
    # output: (label, [(image_name, pil_image)])
    
    for label in custom_labels:
        print("reading class:" + label)
        per_label_images = []
        label_image_names = [x for x in os.walk(base_path/label)][0][2]
        for image_name in label_image_names:
            img = Image.open(base_path/label/image_name)
            #TO-DO : pre run this step of converting to RGB and remove from here
            rgb_im = img.convert('RGB')
            per_label_images.append((image_name, rgb_im))
        yield (label, per_label_images)

def transform_images(imgs):
    
    #input: imgs is [(image_name, pil_image)]
    #output: [(imagename, pil_transformed_image)]
    results = []
    
    def apply_tranformations(img):
        transform = transforms.Compose([            
            transforms.Resize(256),                    
            transforms.CenterCrop(224),               
            transforms.ToTensor(),                     
            transforms.Normalize(                      
            mean=[0.485, 0.456, 0.406],                
            std=[0.229, 0.224, 0.225]                  
        )])
        transformed_img = transform(img)
        #convert it in to format(batch_size, channel, height, width)
        transformed_img = transformed_img.unsqueeze(0)
        return Variable(transformed_img)

    for img_tuple in imgs:
        results.append((img_tuple[0], apply_tranformations(img_tuple[1])))
    return results



def evaluate_results(model, batch_imgs):
    
    #input: model is pytorch model, batch_imgs is [(image_name, pil_transformed_image)]
    #output: results is [(image_name, pred_label, pred_percentage)]
    
    results = []
    for batch_img in batch_imgs:  
        model.eval()
        out = model(batch_img[1])
        _, index = torch.max(out, 1)
        percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100
        results.append((batch_img[0], labels[index[0].item()], percentage[index[0]].item()))
    return results


def run_per_label_model_inference(model, per_label_images_tuple):
    
    # input model: any pytorch pre trained model
    # per_label_images_tuple: currently it assumers per label images, and of the form (label, [label_images])
    # output: (label, pred_tuple_list), pred_tuple_list: [(image_name, pred_label, pred_percentage)]
    
    tranformed_imgs = transform_images(per_label_images_tuple[1])
    res = evaluate_results(model, tranformed_imgs)    
    return (per_label_images_tuple[0], res)

#### PTMs
- resnet18 = models.resnet18(pretrained=True)
- alexnet = models.alexnet(pretrained=True)
- squeezenet = models.squeezenet1_0(pretrained=True)
- vgg16 = models.vgg16(pretrained=True)
- densenet = models.densenet161(pretrained=True)
- inception = models.inception_v3(pretrained=True)
- googlenet = models.googlenet(pretrained=True)
- shufflenet = models.shufflenet_v2_x1_0(pretrained=True)
- mobilenet = models.mobilenet_v2(pretrained=True)
- resnext50_32x4d = models.resnext50_32x4d(pretrained=True)
- wide_resnet50_2 = models.wide_resnet50_2(pretrained=True)
- mnasnet = models.mnasnet1_0(pretrained=True)

In [5]:
model_dict = {
    "resnet18" : models.resnet18(pretrained=True),
    "alexnet" : models.alexnet(pretrained=True),
    "squeezenet" : models.squeezenet1_0(pretrained=True),
    "vgg16" : models.vgg16(pretrained=True),
    "densenet" : models.densenet161(pretrained=True),
    "inception" : models.inception_v3(pretrained=True),
    "googlenet" : models.googlenet(pretrained=True),
    "shufflenet" : models.shufflenet_v2_x1_0(pretrained=True),
    "mobilenet" : models.mobilenet_v2(pretrained=True),
    "resnext50_32x4d" : models.resnext50_32x4d(pretrained=True),
    "wide_resnet50_2" : models.wide_resnet50_2(pretrained=True),
    "mnasnet" : models.mnasnet1_0(pretrained=True),
}

In [6]:
#for model in model_dict:
#    (label, label_prob) = run_model_inference(model_dict[model])
#    print("{0} predicted {2} with probability {1}".format(model, label_prob, label))

### Create ImageNet Lables Map

In [14]:
# all_label_result: [(label, [(image_name, pred_label, percentage)])]
all_label_result = []

for label_tuple in load_per_label_imgs_generator():
    res = run_per_label_model_inference(models.resnet18(pretrained=True), label_tuple)
    all_label_result.append(res)

reading class:airplane
reading class:apple
reading class:backpack
reading class:banana
reading class:beverage
reading class:boat
reading class:bowl
reading class:cake
reading class:car
reading class:chair
reading class:chess-board
reading class:clock
reading class:coffee-mug
reading class:cycle
reading class:eagle
reading class:fabric
reading class:forests
reading class:fork
reading class:guitar
reading class:kangaroo
reading class:lakes
reading class:laptop
reading class:mountains
reading class:orange
reading class:owl
reading class:pen
reading class:person
reading class:polar bear
reading class:remote
reading class:rose
reading class:rose leaf
reading class:schools
reading class:shoes
reading class:skateboard
reading class:suitcase
reading class:table
reading class:towers
reading class:traffic light
reading class:tulip
reading class:umbrella
reading class:vegetable
reading class:wine glass


In [15]:
imagenet_label_map_dict = {}
for label_result in all_label_result:
    dict_key = label_result[0]
    dict_value = set([res[1] for res in label_result[1]])
    imagenet_label_map_dict[dict_key] = dict_value

In [16]:
imagenet_label_map_dict

{'airplane': {'airliner', 'crane', 'warplane, military plane', 'wing'},
 'apple': {'Granny Smith',
  'bell pepper',
  'buckeye, horse chestnut, conker',
  'fig',
  'hip, rose hip, rosehip',
  'pomegranate',
  'vase'},
 'backpack': {'backpack, back pack, knapsack, packsack, rucksack, haversack'},
 'banana': {'banana'},
 'beverage': {'beaker',
  'beer glass',
  'bell pepper',
  'bucket, pail',
  'candle, taper, wax light',
  'cocktail shaker',
  'eggnog',
  'goblet',
  'guacamole',
  'paper towel',
  'perfume, essence',
  'pop bottle, soda bottle',
  'red wine',
  'strawberry',
  'trifle',
  'vase',
  'vending machine',
  'wine bottle'},
 'boat': {'canoe',
  'catamaran',
  'dock, dockage, docking facility',
  'gondola',
  'liner, ocean liner',
  'pirate, pirate ship',
  'speedboat',
  'trimaran'},
 'bowl': {'caldron, cauldron',
  'envelope',
  'mixing bowl',
  'mortar',
  'wok',
  'wooden spoon'},
 'cake': {'bagel, beigel',
  'bakery, bakeshop, bakehouse',
  'bonnet, poke bonnet',
  'can