In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import random_split, DataLoader
import matplotlib.pyplot as plt
from torchvision.utils import make_grid

import numpy as np

import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import torchvision.transforms.functional as F
from PIL import Image 
from scipy import ndimage, misc
import matplotlib.pyplot as plt

from image_feature_extractor import Img2Vec

from sklearn.linear_model import LogisticRegression

import torch
from sklearn import preprocessing




# Download CIFAR 10 data form torchvision

In [2]:

train_dataset = torchvision.datasets.CIFAR10('~/datasets/cifar', train=True, download=True)
test_dataset = torchvision.datasets.CIFAR10('~/datasets/cifar', train=False, download=True)


Files already downloaded and verified
Files already downloaded and verified


In [3]:
np.expand_dims(test_dataset.data[0],0).shape

(1, 32, 32, 3)

# Useful functions

In [4]:
def apply_contrast(images_data, contrast_factor):
    to_tensor = transforms.ToTensor()   
    images_list = [to_tensor(im) for im in images_data]
    images = torch.stack(images_list)
    contrasted_images = F.adjust_contrast(images, contrast_factor)
    contrasted_images = np.array(np.stack([transforms.ToPILImage()(im) for im in contrasted_images]))
    return contrasted_images

def apply_rotation(images_data, angle):
    rotated_images = []
    for img in images_data:
        rotated_image = ndimage.rotate(img, angle, reshape=False)
        rotated_images.append(rotated_image)
    rotated_images = np.array(rotated_images)
    return rotated_images


# Generate a list of original and modified images: every original image has a duplicate which is 
# the modified original image.
def generate_duplicates_modified_original(image_set, mod_factor, mod_type="contrast"):
    original_labels = np.zeros(len(image_set.data))
    modified_labels = np.ones(len(image_set.data))
    
    modified_images = None
    
    if mod_type=="contrast":
        modified_images = apply_contrast(image_set.data, mod_factor) 
        
    elif mod_type=="rotation":
        modified_images = apply_rotation(image_set.data, mod_factor)

    original_and_modified_images = np.concatenate((image_set.data, modified_images), axis=0)
    modification_labels = np.concatenate((original_labels, modified_labels), axis=None)
    image_labels = np.concatenate((image_set.targets, image_set.targets), axis=None)        
    return modified_images, original_and_modified_images, modification_labels, image_labels


# used to binarize the target variable
def binarize(y):    
    y = np.copy(y) > 5
    return y.astype(int)

def correlate_label_with_img_modification(image_dataset, mod_factor, mod_percent= 0.1, 
                                          binarize_label=True, mod_type="contrast"):
    y = binarize(image_dataset.targets)
    mod_labels = np.logical_xor(y, np.random.binomial(1, mod_percent, size=len(y)))
    
    images_to_be_modified = image_dataset.data[mod_labels]
    images_to_be_left_alone = image_dataset.data[~mod_labels]
    
    all_img_labels = None
    
    if binarize_label:
        modified_imgs_labels = y[mod_labels]
        unmodified_imgs_labels = y[~mod_labels]
        all_img_labels = np.concatenate((modified_imgs_labels, unmodified_imgs_labels), axis=None)
    else:
        modified_imgs_labels = np.array(image_dataset.targets)[mod_labels]
        unmodified_imgs_labels = np.array(image_dataset.targets)[~mod_labels]
        all_img_labels = np.concatenate((modified_imgs_labels, unmodified_imgs_labels), axis=None)    
    
    all_images = None
    
    if mod_type == "contrast":
        modified_images = apply_contrast(images_to_be_modified, mod_factor)
        all_images = np.concatenate((modified_images, images_to_be_left_alone), axis=0)                    
        
    elif mod_type=="rotation":
        modified_images = apply_rotation(images_to_be_modified, mod_factor)
        all_images = np.concatenate((modified_images, images_to_be_left_alone), axis=0)
        
    return all_images, all_img_labels, mod_labels.astype(int)


# Function to extract image features    
def get_features(images, batch_size):
    Z_list = []
    img2vec = Img2Vec(model="resnet50")
#     img2vec = Img2Vec()
    for first in range(0, len(images), batch_size):
        images_subset = images[first:first+batch_size]
        Z_subset = img2vec.get_vec(images_subset)
        Z_list.append(Z_subset)
    Z = np.vstack(Z_list)
    return Z

# Accuracies for different variations of data distributions per class

In [5]:


contrast_factor = 0.3 # 0 gray - 1 original image - 2 increases the contrast by a factor of 2.
angle = 70
batch_size = 64

distributions = [0.5, 0.75, 0.90, 0.95, 0.99,1.0]

for dist_value in distributions:
    print(" Results for alpha value: ", dist_value)
    # Creating sets of images in a way that correlates the way images are modified with label
    train_imgs, train_labels2, train_mod_labels2 = correlate_label_with_img_modification(
        train_dataset, contrast_factor, mod_percent= dist_value, binarize_label=False, mod_type="contrast")
    
    test_imgs_indist, test_labels2_indist, test_mod_labels2_indist = correlate_label_with_img_modification(
        test_dataset, contrast_factor, mod_percent= dist_value, binarize_label=False, mod_type="contrast")

    test_imgs_ood, test_labels2_ood, test_mod_labels2_ood = correlate_label_with_img_modification(
        test_dataset, contrast_factor, mod_percent= 1-dist_value, binarize_label=False, mod_type="contrast")


    # Extract image features
    Z_train = get_features(train_imgs, batch_size)
    Z_test_indist= get_features(test_imgs_indist, batch_size)
    Z_test_ood= get_features(test_imgs_ood, batch_size)


    # Classification and recording prediction accuracy
    logistic_regression_model2 = LogisticRegression(
        multi_class='multinomial', solver='lbfgs', random_state=0).fit(Z_train, 
                                                                       train_labels2)
    accuracy0 = logistic_regression_model2.score(Z_train, train_labels2)
    accuracy1 = logistic_regression_model2.score(Z_test_indist, test_labels2_indist)
    accuracy2 = logistic_regression_model2.score(Z_test_ood, test_labels2_ood)
    print('LR Training Accuracy: ', accuracy0)
    print('LR Accuracy on test data - Z - In Dist: ', accuracy1)
    print('LR Accuracy on test data - Z - OOD: ', accuracy2)
    print("\n"*2)


 Results for alpha value:  0.5


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LR Training Accuracy:  0.88922
LR Accuracy on test data - Z - In Dist:  0.8604
LR Accuracy on test data - Z - OOD:  0.8609



 Results for alpha value:  0.75


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LR Training Accuracy:  0.89246
LR Accuracy on test data - Z - In Dist:  0.8616
LR Accuracy on test data - Z - OOD:  0.8342



 Results for alpha value:  0.9


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LR Training Accuracy:  0.90384
LR Accuracy on test data - Z - In Dist:  0.8821
LR Accuracy on test data - Z - OOD:  0.7535



 Results for alpha value:  0.95


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LR Training Accuracy:  0.91116
LR Accuracy on test data - Z - In Dist:  0.8882
LR Accuracy on test data - Z - OOD:  0.6686



 Results for alpha value:  0.99


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LR Training Accuracy:  0.92392
LR Accuracy on test data - Z - In Dist:  0.903
LR Accuracy on test data - Z - OOD:  0.4855



 Results for alpha value:  1.0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LR Training Accuracy:  0.92894
LR Accuracy on test data - Z - In Dist:  0.9082
LR Accuracy on test data - Z - OOD:  0.1366



