In [4]:
import matplotlib.pyplot as plt
import numpy as np
from numpy import genfromtxt
import scipy
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from skimage import io
from skimage.color import rgb2gray
from skimage.filters import roberts, sobel, scharr, prewitt
from sklearn.decomposition import PCA
train_set_dir = 'cifar-10/train/'
test_set_dir = 'cifar-10/test/'

In [5]:
img_labels = genfromtxt(train_set_dir + 'labels')
img_labels, img_labels.shape

(array([ 6.,  9.,  9., ...,  9.,  1.,  1.]), (50000,))

In [6]:
img_data = np.empty([50000, 1024])

for i in range(0, 50000):
    filename = '{0:05d}'.format(i) + '.png'
    img = rgb2gray(io.imread(train_set_dir + filename)) #Reading file, converting to Grayscale
    img_data[i, :] = img.flatten()

pca = PCA(n_components=40, whiten=True)
pca.fit(img_data)
img_data = pca.transform(img_data)
img_data

array([[ -7.04735316e-01,   6.61165182e-01,  -7.38088116e-01, ...,
         -1.08967377e+00,   1.99666413e-01,  -4.21821898e-01],
       [  3.82689127e-02,  -2.44954738e-01,  -1.68389032e+00, ...,
          3.78074581e+00,   5.63324492e-01,   8.52676735e-01],
       [  1.00255745e+00,  -2.53248381e+00,   6.40279535e-01, ...,
         -4.62366146e-01,  -1.80424411e-01,  -7.08352178e-01],
       ..., 
       [ -3.70438239e-02,  -2.41636500e+00,   2.76119701e-01, ...,
          2.93723411e-01,   1.11616737e+00,   4.44922312e-01],
       [  1.41274719e+00,  -6.91003242e-01,  -1.52481142e+00, ...,
          3.39348376e-01,   1.11697334e-01,  -6.78384810e-01],
       [  4.99252604e-01,  -2.71186007e-01,   1.23507790e+00, ...,
         -6.79991430e-01,   1.84416791e-03,   3.25099697e+00]])

In [7]:
# Split the data using K-Folds, using 5 different sets
kf = KFold(n_splits=5)
kf.get_n_splits(img_data)

5

In [9]:
count = 0
train_score = np.zeros(5)
val_score = np.zeros(5)
for train_index, val_index in kf.split(img_data):
    img_data_train, img_data_val = img_data[train_index], img_data[val_index]
    img_labels_train, img_labels_val = img_labels[train_index], img_labels[val_index]
    
    regr = LogisticRegression(multi_class='multinomial', solver='sag')
    regr.fit(img_data_train, img_labels_train)

    count += 1
    train_score[count-1] = regr.score(img_data_train, img_labels_train)
    val_score[count-1] = regr.score(img_data_val, img_labels_val)
    print("Set %d -- Train Score: %.2f Validation score: %.2f"
      % (count, train_score[count-1], val_score[count-1]))
    

Set 1 -- Train Score: 0.30 Validation score: 0.30
Set 2 -- Train Score: 0.31 Validation score: 0.29
Set 3 -- Train Score: 0.30 Validation score: 0.31
Set 4 -- Train Score: 0.30 Validation score: 0.30
Set 5 -- Train Score: 0.30 Validation score: 0.30


In [10]:
print("Mean Score Train: %.2f Mean Score Validation: %.2f" % (np.average(train_score), np.average(val_score)))

Mean Score Train: 0.30 Mean Score Validation: 0.30
