# Softmax Sklearn CIFAR10

In [1]:
# import the necessary packages
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn import datasets

import matplotlib.pyplot as plt
import numpy as np

In [2]:
from keras.datasets import cifar10

# load the training and testing data, scale it into the range [0, 1],
# then reshape the design matrix
print("[INFO] loading CIFAR-10 data...")
((trainX, trainY), (testX, testY)) = cifar10.load_data()

trainX = trainX.reshape((trainX.shape[0], 3072))
testX = testX.reshape((testX.shape[0], 3072))

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


[INFO] loading CIFAR-10 data...


In [3]:
# append the bias dimension of ones (i.e. bias trick) so that our SVM
# only has to worry about optimizing a single weight matrix W.
trainX = np.hstack([trainX, np.ones((trainX.shape[0], 1))])
testX = np.hstack([testX, np.ones((testX.shape[0], 1))])

print(trainX.shape, testX.shape)

(50000, 3073) (10000, 3073)


In [4]:
from sklearn.linear_model import SGDClassifier
model = SGDClassifier(loss="log", penalty='l2', alpha=2.5e4, eta0=5e-7, n_jobs=-1)

In [5]:
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

labelBinarizer = LabelBinarizer()
classes = labelBinarizer.fit_transform(classes)

batch_size = 200
num_iters = 1000
num_train = 50000

for it in range(num_iters):
    X_batch = None
    y_batch = None

    #########################################################################
    # TODO:                                                                 #
    # Sample batch_size elements from the training data and their           #
    # corresponding labels to use in this round of gradient descent.        #
    # Store the data in X_batch and their corresponding labels in           #
    # y_batch; after sampling X_batch should have shape (batch_size, dim)   #
    # and y_batch should have shape (batch_size,)                           #
    #                                                                       #
    # Hint: Use np.random.choice to generate indices. Sampling with         #
    # replacement is faster than sampling without replacement.              #
    #########################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    batch_idx = np.random.choice(num_train, batch_size)
    X_batch = trainX[batch_idx]
    Y_batch = trainY[batch_idx]
    model.partial_fit(X_batch, Y_batch.ravel(), classes=classes)

In [6]:
# evaluate the classifier
acc = model.score(testX, testY.ravel())
print("[INFO] accuracy: {:.2f}%".format(acc * 100))

[INFO] accuracy: 33.54%


Well Done