In [1]:
import matplotlib.pyplot as plt
import numpy as np
from numpy import genfromtxt
import scipy
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from skimage import io
from skimage.color import rgb2gray
from skimage.filters import roberts, sobel, scharr, prewitt
train_set_dir = 'cifar-10/train/'
test_set_dir = 'cifar-10/test/'

In [2]:
img_labels = genfromtxt(train_set_dir + 'labels')
img_labels, img_labels.shape

(array([ 6.,  9.,  9., ...,  9.,  1.,  1.]), (50000,))

In [3]:
img_data = np.empty([50000, 1024])

for i in range(0, 50000):
    filename = '{0:05d}'.format(i) + '.png'
    img = rgb2gray(io.imread(train_set_dir + filename)) #Reading file, converting to Grayscale
    img_data[i, :] = img.flatten()

img_data

array([[ 0.24092   ,  0.17760941,  0.18848824, ...,  0.73579451,
         0.48063176,  0.38096275],
       [ 0.67777843,  0.52780549,  0.40613176, ...,  0.53159843,
         0.53525216,  0.53301216],
       [ 1.        ,  0.99215686,  0.99215686, ...,  0.32693451,
         0.32776784,  0.33168941],
       ..., 
       [ 0.59498902,  0.59467569,  0.59690784, ...,  0.19015686,
         0.15679725,  0.11110745],
       [ 0.81731725,  0.8052698 ,  0.80134824, ...,  0.72649412,
         0.74192039,  0.74389255],
       [ 0.90086667,  0.93140588,  0.92693373, ...,  0.69465333,
         0.64257176,  0.6386502 ]])

In [4]:
# Split the data using K-Folds, using 5 different sets
kf = KFold(n_splits=5)
kf.get_n_splits(img_data)

5

In [11]:
count = 0
train_score = np.zeros(5)
val_score = np.zeros(5)
for train_index, val_index in kf.split(img_data):
    img_data_train, img_data_val = img_data[train_index], img_data[val_index]
    img_labels_train, img_labels_val = img_labels[train_index], img_labels[val_index]
    
    regr = LogisticRegression(multi_class='multinomial', solve='sag')
    regr.fit(img_data_train, img_labels_train)

    count += 1
    train_score[count-1] = regr.score(img_data_train, img_labels_train)
    val_score[count-1] = regr.score(img_data_val, img_labels_val)
    print("Set %d -- Train Score: %.2f Validation score: %.2f"
      % (count, train_score[count-1], val_score[count-1]))
    



Set 1 -- Train Score: 0.34 Validation score: 0.29
Set 2 -- Train Score: 0.35 Validation score: 0.27
Set 3 -- Train Score: 0.35 Validation score: 0.29
Set 4 -- Train Score: 0.35 Validation score: 0.28
Set 5 -- Train Score: 0.35 Validation score: 0.28


In [12]:
print("Mean Score Train: %.2f Mean Score Validation: %.2f" % (np.average(train_score), np.average(val_score)))

Mean Score Train: 0.35 Mean Score Validation: 0.28
