[View in Colaboratory](https://colab.research.google.com/github/pawelos88/cifar10/blob/master/Cifar10.ipynb)

### Download CIFAR10 dataset

In [9]:
from urllib.request import urlretrieve
from urllib.parse import urlparse
import os

cifar10_url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
cifar10_archive_file = os.path.basename(urlparse(cifar10_url).path)

urlretrieve(cifar10_url, cifar10_archive_file)

('cifar-10-python.tar.gz', <http.client.HTTPMessage at 0x7faae2420198>)

###Extract archive file

In [0]:
import tarfile

def extract_archive_file(archive_file):
  with tarfile.open(archive_file) as file:
    file.extractall()
    
extract_archive_file(cifar10_archive_file)

### Load data from archive file

In [0]:
import numpy as np
import os


cifar_path = "cifar-10-batches-py"

def load_data(cifar_path):
  data = load_batches(cifar_path)
  test_data = load_test_batch(cifar_path)
  return data, test_data 

def unpickle(file):
  import pickle
  with open(file, 'rb') as fo:
      dict = pickle.load(fo, encoding='bytes')
  return dict

def load_batches(cifar_path):
  batch_names = [ "data_batch_{}".format(i) for i in range(1, 6) ]
  data_batches = [ load_batch(cifar_path, batch_name) for batch_name in batch_names ]
  data = [ data for (data, label) in data_batches ]
  labels = [ label for (data, label) in data_batches ]
  return np.concatenate(data), np.concatenate(labels)

def load_test_batch(cifar_path):
  return load_batch(cifar_path, "test_batch");

def load_batch(cifar_path, batch_name):
  data_batch = unpickle(os.path.join(cifar_path, batch_name))
  return data_batch[b'data'], data_batch[b'labels']

(X, y), (X_test, y_test) = load_data(cifar_path)


### Split data to train and dev set
Use 45000 examples as train set and 5000 as dev set

In [2]:
from sklearn.cross_validation import train_test_split

X_train, X_dev, y_train, y_dev = train_test_split(X, y, test_size=0.1)




### Show random images from each category

In [0]:
def reshape(image):
  red_channel = image[0:1024].reshape(32, 32)
  green_channel = image[1024:2048].reshape(32, 32)
  blue_cannel = image[2048:3072].reshape(32, 32)
  return np.dstack((red_channel, green_channel, blue_cannel))

### Benchmark using hog feature extractor

In [0]:
from skimage.feature import hog
from skimage import color
from matplotlib.pyplot import imshow

def get_hog(image):
  grey_image = color.rgb2gray(image)
  return hog(grey_image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), block_norm='L2-Hys', feature_vector=True)
  
X_dev_hog = [ get_hog(reshape(x)) for x in X_dev ]
X_test_hog = [ get_hog(reshape(x)) for x in X_test ]

In [0]:
X_train_hog = [ get_hog(reshape(x)) for x in X_train ]

In [17]:
from sklearn import svm
from sklearn.metrics import accuracy_score

svc_model = svm.SVC(kernel='linear', C=0.01)
svc_model.fit(X_train_hog, y_train) 

y_pred = svc_model.predict(X_test_hog)
accuracy_score(y_test, y_pred)

0.4999

The benchmark above gave 0.4999 accuracy on the test set.

### Extract features using ResNet50

In [4]:
from keras import applications
resnet50 =  applications.resnet50.ResNet50(weights='imagenet', include_top=False, pooling='avg')

Using TensorFlow backend.


Prepare data to be fed into resnet

In [0]:
import cv2


X_train_224 = [ cv2.resize(reshape(x), dsize=(224, 224), interpolation=cv2.INTER_CUBIC) for x in X_train ]
X_dev_224 = [ cv2.resize(reshape(x), dsize=(224, 224), interpolation=cv2.INTER_CUBIC) for x in X_dev ]
X_test_224 = [ cv2.resize(reshape(x), dsize=(224, 224), interpolation=cv2.INTER_CUBIC) for x in X_test ]

Extract features

In [0]:
X_train_features = resnet50.predict(np.array(X_train_224[0:1000]))
X_test_features = resnet50.predict(np.array(X_test_224))
X_dev_features = resnet50.predict(np.array(X_dev_224))


Fist svm classifier

In [16]:
from sklearn import svm
from sklearn.metrics import accuracy_score

svc_model = svm.SVC(kernel='linear', C=10)
svc_model.fit(X_train_features, y_train[0:1000]) 


SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [17]:
y_pred = svc_model.predict(X_dev_features)
accuracy_score(y_dev, y_pred)

0.7402