# ResNet50 Feature Extractor

In [None]:
import torch
import torch.nn as nn
import numpy as np
import torchvision
import string
import os
import copy
import shutil
import json
import pickle
from PIL import Image
from random import sample
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt


from sklearn.linear_model import LogisticRegression as lr 
from sklearn import metrics
import cv2
from random import sample

In [None]:
torch.manual_seed(0)
np.random.seed(0)

In [None]:
experiments = {
    
    "original" : {
        "image_dir" : "/Users/madisonvanhorn/Documents/iNaturalist_2017/CUB_200_2011/CUB_200_2011/",
        "plot_color" : "C0",
        "notes" : "using the original CUB images, no center crop, then resize to 224",
    },
    "center_crop" : {
        "image_dir" : "/Users/madisonvanhorn/Documents/iNaturalist_2017/CUB_200_2011/CUB_200_2011/squared_bounding_box/",
        "plot_color" : "C1",
        "notes" : "using the original CUB images, with a 256 center crop then resize to 224."
    },
    "bounding_box" : {
        "image_dir" : "/Users/madisonvanhorn/Documents/iNaturalist_2017/CUB_200_2011/CUB_200_2011/bounding_box/",
        "plot_color" : "C2",
        "notes" : "using the original CUB images, with a 256 center crop then resize to 224."
    },
    "segmentation" : {
        "image_dir" : "/Users/madisonvanhorn/Documents/iNaturalist_2017/CUB_200_2011/CUB_200_2011/segmentation/",
        "plot_color" : "C3",
        "notes" : "using the original CUB images, with a 256 center crop then resize to 224."
    },
    "bounding_box_segmentation" : {
        "image_dir" : "/Users/madisonvanhorn/Documents/iNaturalist_2017/CUB_200_2011/CUB_200_2011/bounding_box_segmentation/",
        "plot_color" : "C4",
        "notes" : "using the original CUB images, with a 256 center crop then resize to 224."
    },
    "simple_rectification" : {
        "image_dir" : "/Users/madisonvanhorn/Documents/iNaturalist_2017/CUB_200_2011/CUB_200_2011/simple_rectification/",
        "plot_color" : "C5",
        "notes" : "using the original CUB images, with a 256 center crop then resize to 224."
    },
   
}

In [None]:
# Preparation 

# Don't forget that CUB labels start at 1, not 0! Might need to account for this because you do want the labels to start from 0
# So subtract 1 when you load in the CUB files...

class_txt = "/Users/madisonvanhorn/Documents/iNaturalist_2017/CUB_200_2011/CUB_200_2011/images.txt"
image_class_labels = "/Users/madisonvanhorn/Documents/iNaturalist_2017/CUB_200_2011/CUB_200_2011/image_class_labels.txt"
train_test_split = "/Users/madisonvanhorn/Documents/iNaturalist_2017/CUB_200_2011/CUB_200_2011/train_test_split.txt"

# cub label to list of all image ids 
# e.g. {1 : [1, 2, 3, 4, 5, 6, 7], 2: [33, 34, 34]}
label_to_image_ids = {}
with open(image_class_labels) as f:
    Lines = f.readlines()
    for line in Lines:
        line = line.rstrip('\n')

        img_id = int(line.split(' ')[0])
        label = int(line.split(' ')[1]) - 1
        
        next_label = label + 1
        if(label == next_label):
            label_to_image_ids[next_label] = img_id

        else:
            if(label in label_to_image_ids.keys()):
                label_to_image_ids[label].append(img_id)
                          
            else:
                label_to_image_ids[label]= [img_id]
    f.close()    
    
# cub label to list of image ids for training 
# e.g. {1 : [1, 2, 3, 4, 5, 6, 7], 2: [33, 34, 34]}
label_to_training_image_ids = {}
with open(train_test_split) as f:
    Lines = f.readlines()
    for line in Lines:
        line = line.rstrip('\n')

        img_id = int(line.split(' ')[0])

        determinant = int(line.split(' ')[1])

        if(determinant == 1): # this is a training img
            for key, value in label_to_image_ids.items():

                if img_id in value:
                    if(key in label_to_training_image_ids.keys()):
                        label_to_training_image_ids[key].append(img_id)
                    else:
                        label_to_training_image_ids[key] = [img_id]
                    break
    f.close()
    
# image id to fp
# e.g. {1 : "001.Black_footed_Albatross/Black_Footed_Albatross_0046_18.jpg", 2 : "001.Black_footed_Albatross/Black_Footed_Albatross_0009_34.jpg"}
cub_image_id_to_fp = {}
with open(class_txt) as f:
    Lines = f.readlines()
    for line in Lines:
        line = line.rstrip('\n')
        img_id = int(line.split(' ')[0])
        fp = line.split(' ')[1]
        
        cub_image_id_to_fp[img_id] = fp
        
    f.close()
def get_image_fp_for_image_dir(image_id, image_dir):
    """ Return the file path for `image_id` in the provided image directory
    """
    
    # get the filepath for the image_id
    return os.path.join(image_dir, cub_image_id_to_fp[image_id])

In [None]:
# The following code is used from https://github.com/visipedia/newt/tree/main/benchmark
PYTORCH_PRETRAINED_MODELS_DIR = '/Users/madisonvanhorn/Documents/iNaturalist_2017/pretrained-models/cvpr21_newt_pretrained_models/pt/'
PYTORCH = "pytorch"

# Pretraining Datasets
IMAGENET = "ImageNet"
INAT2021 = "iNat2021"
INAT2018 = "iNat2018"

# Training Objectives
SUPERVISED = "Supervised"
MOCO_V2 = "MOCO v2"
SWAV = "SwAV"
SIMCLR = "SimCLR"
SIMCLR_V2 = "SimCLR v2"

# Models
RESNET50 = "ResNet50"
model_specs = {

    "imagenet": {
        "name" : "imagenet_supervised",
        "display_name" : "ImageNet Supervised (pytorch)",
        "color" : "black",
        "format" : PYTORCH,
        "backbone" : RESNET50,
        "weights" : None,
        "training_dataset" : IMAGENET,
        "train_objective" : SUPERVISED,
        "pretrained_weights" : None
    },

    "inat2021": {
        "name" : "inat2021_supervised",
        "display_name" : "iNat2021 Supervised",
        "color" : "C9",
        "format" : PYTORCH,
        "backbone" : RESNET50,
        "weights" : PYTORCH_PRETRAINED_MODELS_DIR + 'inat2021_supervised_large.pth.tar',
        "training_dataset" : INAT2021,
        "train_objective" : SUPERVISED,
        "pretrained_weights" : IMAGENET
    },

    "inat2018":{
        "name" : "inat2018_supervised",
        "display_name" : "iNat2018 Supervised",
        "color" : "C7",
        "format" : PYTORCH,
        "backbone" : RESNET50,
        "weights" : PYTORCH_PRETRAINED_MODELS_DIR + 'inat2018_supervised.pth.tar',
        "training_dataset" : INAT2018,
        "train_objective" : SUPERVISED,
        "pretrained_weights" : IMAGENET
    },

}

## Initialize Model
Initialize model and remove the last fully connected layer.

In [None]:
# Change model name to your discretion 

model_name = 'inat2021'

model = models.resnet50(pretrained=False)
model_type = model_specs[model_name]['name']
model_weights_fp = model_specs[model_name]['weights']


# This model was actually trained with 10000 classes for the fc layer
# but only 8142 (the number in inat2018) were actually updated
model.fc = torch.nn.Linear(model.fc.in_features, 10000)
checkpoint = torch.load(model_weights_fp, map_location="cpu")
msg = model.load_state_dict(checkpoint['state_dict'], strict=True)

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    model_ft.cuda()

# strip the last layer
model = torch.nn.Sequential(*list(model.children())[:-1])
model.to(device)

## Extract the test features for each input type

In [None]:
# Load in the test image ids and test labels
# e.g. {8 : 1, 9 : 1}
test_image_id_to_label = {}
    
# Get the training image ads and labels
train_test_split = "/Users/madisonvanhorn/Documents/iNaturalist_2017/CUB_200_2011/CUB_200_2011/train_test_split.txt"
      
with open(train_test_split) as f:
    Lines = f.readlines()

    for line in Lines:
        line = line.rstrip('\n')

        img_id = int(line.split(' ')[0])
        test_id = int(line.split(' ')[1])

        if (test_id == 0): # Add to training dictionary  
            for key, value in label_to_image_ids.items():
                if img_id in value:
                    test_image_id_to_label[img_id] = key
                    break
                                 
    f.close()   

for experiment_name, experiment_settings in experiments.items():
    print(experiment_name)
    X_test = []
    y_test = []
    
    for test_image_id, test_label in test_image_id_to_label.items():
        img_directory = experiment_settings['image_dir'] +'test/'

        # Get the file path to the image for this specific input type
        image_fp = get_image_fp_for_image_dir(test_image_id, img_directory)

        # Normalize Data
        transform = transforms.Compose([
            transforms.Resize(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
                
        # Extract feature...
        model.eval()
        
        im = Image.open(image_fp).convert('RGB')
        im = transform(im).to(device)
        with torch.no_grad():
            image_feature = model(im.unsqueeze(0)).squeeze() # output now has the features corresponding to input x

            X_test.append(image_feature.cpu().data.numpy())
            y_test.append(test_label)
    
    experiment_settings['X_test'] = X_test
    experiment_settings['y_test'] = y_test  
    
    #Save features
    testing_features = '/Users/madisonvanhorn/Documents/iNaturalist_2017/features_inat2021/' + 'test_features_' + experiment_name + '.npz'
    np.savez(testing_features, features = np.array(X_test), labels = np.array(y_test))


## Extract the train features for each input type

In [None]:
# Load in the train image ids and train labels
# e.g. {8 : 1, 9 : 1}

train_image_id_to_label = {}

# Get the training image ads and labels
train_test_split = "/Users/madisonvanhorn/Documents/iNaturalist_2017/CUB_200_2011/CUB_200_2011/train_test_split.txt"
      
with open(train_test_split) as f:
    Lines = f.readlines()

    for line in Lines:
        line = line.rstrip('\n')

        img_id = int(line.split(' ')[0])
        train_id = int(line.split(' ')[1])

        if (train_id == 1): # Add to training dictionary  
            for key, value in label_to_training_image_ids.items():   
                if img_id in value:
                    train_image_id_to_label[img_id] = key 
                    break
                    
    f.close()       
                  
for experiment_name, experiment_settings in experiments.items():
    print('Experiment name: ' +str(experiment_name))
    X_train = {}
    y_train = {}

    for train_image_id, train_label in train_image_id_to_label.items():

        # Get the file path to the image for this specific input type
        img_directory = experiment_settings['image_dir'] +'train/'

        image_fp = get_image_fp_for_image_dir(train_image_id, img_directory)

        transform = transforms.Compose([
            transforms.Resize(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        
        model.eval()      
        im = Image.open(image_fp).convert('RGB')
        im = transform(im).to(device)
        with torch.no_grad():
            image_feature = model(im.unsqueeze(0)).squeeze() # output now has the features corresponding to input x  
            X_train[train_image_id] = image_feature.cpu().data.numpy()                               
            y_train[train_image_id] = train_label
            
    experiment_settings['X_train'] = X_train
    experiment_settings['y_train'] = y_train

    #Save features
    training_features = '/Users/madisonvanhorn/Documents/iNaturalist_2017/features_inat2021/' + 'train_features_' + experiment_name + '.npz'
    np.savez(training_features, features = np.array(X_train), labels = np.array(y_train))


In [None]:
# e.g. {1 : {'orginal' : [0.04, 0.05, 0.05], 'center_crop' : [0.1, 0.09, 0.09], ...}, '2' : {'original' : [...]}}
k_shot_accuracy_results = {}
correct_img_classification = {}
# Outer loop for the k-shot learning
for number_of_training_images in [1, 3, 5, 10, 20, 30]:
    
    k_shot_accuracy_results[number_of_training_images] = {experiment_name : [] for experiment_name in experiments}
    
    k_shot_accuracy_results[number_of_training_images]['selected_images'] = [] # this will save which random images (i.e dataset) were selected for each trial run
    
    # We want to repeat each experiment X times
    for i in range(10):
        
        # Create the dataset dataset
        exp_cub_label_to_trainning_images = {}
        for label in range(200):
            num_to_sample = min(len(label_to_training_image_ids[label]), number_of_training_images)
            exp_cub_label_to_trainning_images[label] = sample(label_to_training_image_ids[label], num_to_sample)
        
        k_shot_accuracy_results[number_of_training_images]['selected_images'].append(exp_cub_label_to_trainning_images)
        
        # Now go through each input setting and use the randomly selected dataset to run an experiment
        for experiment_name, experiment_settings in experiments.items():
            
            # Extract train features
            X_train = []
            y_train = []
            
            for label, label_image_ids in exp_cub_label_to_trainning_images.items():    
  
                for image_id in label_image_ids:
                    image_feature = experiment_settings['X_train'][image_id]
                    X_train.append(image_feature)
                    y_train.append(label)               
            
            
            # Train linear model
            clf = lr(max_iter=1000,C = 0.025)
            clf.fit(X_train, y_train)
            
            # Evaluate classifier
            y_pred = clf.predict(experiment_settings['X_test'])
            
            print(experiment_name)
            acc = metrics.accuracy_score(experiment_settings['y_test'], y_pred)
            print("Accuracy: ", acc)
          
            k_shot_accuracy_results[number_of_training_images][experiment_name].append(float(acc))


In [None]:
# Save off the results!

with open('/Users/madisonvanhorn/Documents/iNaturalist_2017/features/experiment_features_inat2021.pkl', 'wb') as fp:
    pickle.dump(experiments, fp)
    
with open("/Users/madisonvanhorn/Documents/iNaturalist_2017/report/results_inat2021.json", "w") as f:
    json.dump(k_shot_accuracy_results, f) # WATCH OUT: json complains about saving numpy data, so you might have to convert

## Display figure of bird with implemented experiment

In [None]:
main_dir = "/Users/madisonvanhorn/Documents/iNaturalist_2017/CUB_200_2011/CUB_200_2011/"
train_dir = main_dir + "train/"
img_dir=np.random.choice(os.listdir(train_dir))
img_name = np.random.choice(os.listdir(train_dir +img_dir))

original = main_dir + 'train/' + img_dir + '/' + img_name
center_crop_dir = main_dir + 'squared_bounding_box/' +'train/' + img_dir + '/' + img_name
bounding_box_dir = main_dir + 'bounding_box/' + 'train/' + img_dir + '/' + img_name
bounding_box_seg_dir = main_dir + 'bounding_box_segmentation/' + 'train/' + img_dir + '/' + img_name
segmentation_dir = main_dir + 'segmentation/' + 'train/' + img_dir + '/' + img_name
orientation_rect = main_dir + 'simple_rectification/' + 'train/' + img_dir + '/' + img_name


f, axarr = plt.subplots(2,3, figsize=(15,8))
axarr[0,0].title.set_text('Original')
axarr[0,0].imshow(cv2.cvtColor(cv2.imread(original),cv2.COLOR_BGR2RGB )) # Whole Image

axarr[0,1].title.set_text('Bounding Box')
axarr[0,1].imshow(cv2.cvtColor(cv2.imread(bounding_box_dir),cv2.COLOR_BGR2RGB)) # Bounding Boxes

axarr[0,2].title.set_text('Segmentation')
axarr[0,2].imshow(cv2.cvtColor(cv2.imread(segmentation_dir),cv2.COLOR_BGR2RGB)) # Segmentation Background

axarr[1,0].title.set_text('Simple Rectification')
axarr[1,0].imshow(cv2.cvtColor(cv2.imread(orientation_rect), cv2.COLOR_BGR2RGB)) # Simple Rectified

axarr[1,1].title.set_text('Bounding Box Segmentation')
axarr[1,1].imshow(cv2.cvtColor(cv2.imread(bounding_box_seg_dir), cv2.COLOR_BGR2RGB)) # Bounding Box Segmentation 

axarr[1,2].title.set_text('Squared Bounding Box')
axarr[1,2].imshow(cv2.cvtColor(cv2.imread(center_crop_dir), cv2.COLOR_BGR2RGB)) # Center Crop 


## Display plot showing the results from each K-Shot experiment with the various data augmentation experiments 

In [None]:
x = [1, 3, 5, 10, 20, 30]

fig,ax = plt.subplots(figsize=(8, 6))

y = [np.mean(k_shot_accuracy_results[1]['original']), np.mean(k_shot_accuracy_results[3]['original']),
     np.mean(k_shot_accuracy_results[5]['original']),np.mean(k_shot_accuracy_results[10]['original']),
     np.mean(k_shot_accuracy_results[20]['original']),np.mean(k_shot_accuracy_results[30]['original']) ]

plt.plot(x, y, marker="o", label='Original')
y = [np.mean(k_shot_accuracy_results[1]['center_crop']), np.mean(k_shot_accuracy_results[3]['center_crop']),
     np.mean(k_shot_accuracy_results[5]['center_crop']),np.mean(k_shot_accuracy_results[10]['center_crop']),
     np.mean(k_shot_accuracy_results[20]['center_crop']),np.mean(k_shot_accuracy_results[30]['center_crop']) ]

plt.plot(x, y, marker="o", label='Center Crop')
y = [np.mean(k_shot_accuracy_results[1]['bounding_box']), np.mean(k_shot_accuracy_results[3]['bounding_box']),
     np.mean(k_shot_accuracy_results[5]['bounding_box']),np.mean(k_shot_accuracy_results[10]['bounding_box']),
     np.mean(k_shot_accuracy_results[20]['bounding_box']),np.mean(k_shot_accuracy_results[30]['bounding_box'])]
plt.plot(x, y, marker="o", label='Bounding Box')
y = [np.mean(k_shot_accuracy_results[1]['segmentation']), np.mean(k_shot_accuracy_results[3]['segmentation']),
     np.mean(k_shot_accuracy_results[5]['segmentation']),np.mean(k_shot_accuracy_results[10]['segmentation']),
     np.mean(k_shot_accuracy_results[20]['segmentation']),np.mean(k_shot_accuracy_results[30]['segmentation'])]
     
plt.plot(x, y, marker="o", label='Segmentation')

y = [np.mean(k_shot_accuracy_results[1]['bounding_box_segmentation']), np.mean(k_shot_accuracy_results[3]['bounding_box_segmentation']),
     np.mean(k_shot_accuracy_results[5]['bounding_box_segmentation']),np.mean(k_shot_accuracy_results[10]['bounding_box_segmentation']),
     np.mean(k_shot_accuracy_results[20]['bounding_box_segmentation']),np.mean(k_shot_accuracy_results[30]['bounding_box_segmentation'])]

plt.plot(x, y, marker="o", label='Bounding Box Segmentation')

y = [np.mean(k_shot_accuracy_results[1]['simple_rectification']), np.mean(k_shot_accuracy_results[3]['simple_rectification']),
     np.mean(k_shot_accuracy_results[5]['simple_rectification']),np.mean(k_shot_accuracy_results[10]['simple_rectification']),
     np.mean(k_shot_accuracy_results[20]['simple_rectification']),np.mean(k_shot_accuracy_results[30]['simple_rectification'])]

plt.plot(x, y, marker="o", label='Simple Rectification')

plt.xlabel('Number of Training Samples Used Per Class')
plt.ylabel('Accuracy')

plt.xticks(x)
plt.title('Average Test Set Accuracy for K-Shot Learning with iNat2021 Feature Extractor')
plt.legend()


## Display plot after collecting all three feature extractors' accuracies
Will need to change the above code to retrieve all three.

In [None]:
with open("Users/madisonvanhorn/Documents/iNaturalist_2017/final-product/log_reg_results/results_iNat2021_logreg.pkl", "rb") as f:
    k_shot_accuracy_results_iNat2021 = pickle.load(f) 
with open("Users/madisonvanhorn/Documents/iNaturalist_2017/final-product/log_reg_results/results_iNat2018_logreg.pkl", "rb") as f:
    k_shot_accuracy_results_iNat2018 = pickle.load(f) 
with open("Users/madisonvanhorn/Documents/iNaturalist_2017/final-product/log_reg_results/results_imagenet_logreg.pkl", "rb") as f:
    k_shot_accuracy_results_imagenet = pickle.load(f)

In [None]:
x = [1, 3, 5, 10, 20, 30]

fig,ax = plt.subplots(figsize=(8, 6))
y = [np.mean(k_shot_accuracy_results_iNat2021[1]['original']), np.mean(k_shot_accuracy_results_iNat2021[3]['original']),
     np.mean(k_shot_accuracy_results_iNat2021[5]['original']),np.mean(k_shot_accuracy_results_iNat2021[10]['original']),
     np.mean(k_shot_accuracy_results_iNat2021[20]['original']),np.mean(k_shot_accuracy_results_iNat2021[30]['original']) ]

plt.plot(x, y, marker="o", label='iNat2021 Original')
y = [np.mean(k_shot_accuracy_results_iNat2018[1]['original']), np.mean(k_shot_accuracy_results_iNat2018[3]['original']),
     np.mean(k_shot_accuracy_results_iNat2018[5]['original']),np.mean(k_shot_accuracy_results_iNat2018[10]['original']),
     np.mean(k_shot_accuracy_results_iNat2018[20]['original']),np.mean(k_shot_accuracy_results_iNat2018[30]['original']) ]


plt.plot(x, y, marker="o", label='iNat2018 Original')
y = [np.mean(k_shot_accuracy_results_imagenet[1]['original']), np.mean(k_shot_accuracy_results_imagenet[3]['original']),
     np.mean(k_shot_accuracy_results_imagenet[5]['original']),np.mean(k_shot_accuracy_results_imagenet[10]['original']),
     np.mean(k_shot_accuracy_results_imagenet[20]['original']),np.mean(k_shot_accuracy_results_imagenet[30]['original']) ]

plt.plot(x, y, marker="o", label='ImageNet Original')

plt.xlabel('Number of Training Samples Used Per Class')
plt.ylabel('Accuracy')
plt.xticks(x)
plt.title('K-Shot Learning for Fine-Grained Classification on All Three Feature Extractors')
plt.legend()
