In [1]:
import pickle
import collections
import time
import numpy as np

from keras.preprocessing import image
from PIL import Image
from keras.applications.inception_v3 import InceptionV3

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from skimage.transform import resize
from skimage.feature import hog
from sklearn import svm
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, confusion_matrix

%config IPCompleter.greedy=True

## Loading data and checking structure

In [4]:
#little snippet taken from cifar-10 website
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [5]:
#checking labels
unpickle('data\\batches.meta') 

{b'num_cases_per_batch': 10000,
 b'label_names': [b'airplane',
  b'automobile',
  b'bird',
  b'cat',
  b'deer',
  b'dog',
  b'frog',
  b'horse',
  b'ship',
  b'truck'],
 b'num_vis': 3072}

In [6]:
#checking structure of one batch
batch1 = unpickle('data\data_batch_1')
print(batch1.keys())
print(batch1[b'data'].shape)
print(len(batch1[b'labels']))
print(collections.Counter(batch1[b'labels']))

dict_keys([b'batch_label', b'labels', b'data', b'filenames'])
(10000, 3072)
10000
Counter({2: 1032, 6: 1030, 8: 1025, 3: 1016, 0: 1005, 7: 1001, 4: 999, 9: 981, 1: 974, 5: 937})


In [7]:
#loading full dataset
x=batch1[b'data']
y=batch1[b'labels']

for i in range(2,6):
    temp_dict= unpickle('data\data_batch_'+str(i) )
    x=np.append(x, temp_dict[b'data'], axis=0)
    y=y+temp_dict[b'labels']

In [8]:
print(collections.Counter(y)) #checking distribution of labels

Counter({6: 5000, 9: 5000, 4: 5000, 1: 5000, 2: 5000, 7: 5000, 8: 5000, 3: 5000, 5: 5000, 0: 5000})


## Randomly selecting images to modelling

In [None]:
#train and test sets
x_left, x_small, y_left, y_small = train_test_split(
    x, y, test_size=0.1, random_state=42)
x_left, x_test, y_left, y_test = train_test_split(
    x_left, y_left, test_size=0.05, random_state=42)

#remove auxiliary data
del x_left
del y_left
del x
del y
x_small.shape

## Extracting features from training and test datasets

In [None]:
def preprocess_inception (df): #function for generating CNN from image data
    df_small_reshaped=[]
    time_start=time.time() #timer
    for i in range(len(df)): 
        df_small_reshaped.append(df[i].reshape(3,1024).T.reshape(32,32,3))
    df= np.asarray(df_small_reshaped)
    print("Image data reshaped")
    
    #removing top layer, input shape set to default for inception model
    model = InceptionV3(weights='imagenet', include_top=False, input_shape=(139, 139, 3)) 
    print("Model created")
    
    df_train_resized = np.array([resize(df[i], (139, 139, 3)) 
                        for i in range(0, len(df))]).astype('float32')
    print("image resized to suit inception model")
    features_inception = model.predict(df_train_resized, verbose=1)
    print("features predicted")
    features_inception = np.squeeze(features_inception)
    features_inception = features_inception.reshape((len(df), 3*3*2048))
    print("features reshaped to suit sklearn models")
    print("total time:  ", time.time()-time_start, " s")
    return features_inception
    