In [52]:
import matplotlib.pyplot as plt
import numpy as np
from numpy import genfromtxt
import scipy
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from skimage import io
from skimage.color import rgb2gray
train_set_dir = 'cifar-10/train/'
test_set_dir = 'cifar-10/test/'

In [53]:
img_labels = genfromtxt(train_set_dir + 'labels')
img_labels, img_labels.shape

(array([ 6.,  9.,  9., ...,  9.,  1.,  1.]), (50000,))

In [54]:
img_data = np.empty([50000, 1024])

for i in range(0, 50000):
    filename = '{0:05d}'.format(i) + '.png'
    img = rgb2gray(io.imread(train_set_dir + filename)).flatten() #Reading file, converting to Grayscale and flattening img data
    img_data[i, :] = img

img_data

array([[ 0.24092   ,  0.17760941,  0.18848824, ...,  0.73579451,
         0.48063176,  0.38096275],
       [ 0.67777843,  0.52780549,  0.40613176, ...,  0.53159843,
         0.53525216,  0.53301216],
       [ 1.        ,  0.99215686,  0.99215686, ...,  0.32693451,
         0.32776784,  0.33168941],
       ..., 
       [ 0.59498902,  0.59467569,  0.59690784, ...,  0.19015686,
         0.15679725,  0.11110745],
       [ 0.81731725,  0.8052698 ,  0.80134824, ...,  0.72649412,
         0.74192039,  0.74389255],
       [ 0.90086667,  0.93140588,  0.92693373, ...,  0.69465333,
         0.64257176,  0.6386502 ]])

In [55]:
# Split the data using K-Folds, using 5 different sets
kf = KFold(n_splits=5)
kf.get_n_splits(img_data)

5

In [None]:
count = 0
train_score = np.zeros(5)
val_score = np.zeros(5)
for train_index, val_index in kf.split(img_data):
    img_data_train, img_data_val = img_data[train_index], img_data[val_index]
    img_labels_train, img_labels_val = img_labels[train_index], img_labels[val_index]
    
    regr = LogisticRegression(multi_class='ovr')
    regr.fit(img_data_train, img_labels_train)

    count += 1
    train_score[count-1] = regr.score(img_data_train, img_labels_train)
    val_score[count-1] = regr.score(img_data_val, img_labels_val)
    print("Set %d -- Train Score: %.2f Validation score: %.2f"
      % (count, train_score[count-1], val_score[count-1]))



Set 1 -- Train Score: 0.35 Validation score: 0.28
Set 2 -- Train Score: 0.36 Validation score: 0.27
Set 3 -- Train Score: 0.35 Validation score: 0.29


In [7]:
print("Mean Score Train: %.2f Mean Score Validation: %.2f" % (np.average(train_score), np.average(val_score)))

Mean Train MSE: 91.25 Mean Validation MSE: 91.30
Estimated intercept:  1951.10281144
Coefficients:  [  8.75639345e-01  -5.62540046e-02  -4.41323415e-02   4.11333022e-03
  -1.50999673e-02  -2.19989936e-01  -6.75886934e-03  -1.00055899e-01
  -6.53097961e-02   2.01988381e-02  -1.68867620e-01  -2.01718001e-03
   4.70676496e-02   3.59014415e-04  -4.47912002e-04   6.29820419e-04
   4.12670283e-04   1.39570601e-03   1.97328269e-03   2.20417993e-03
   7.48751599e-04  -6.44767497e-04   7.69927317e-03   2.79270644e-03
  -3.59534246e-03   3.38734605e-05   1.61345349e-03   5.32076408e-04
   9.88023250e-04  -1.74057071e-04  -1.41016329e-03  -1.38827440e-03
  -5.53457219e-03   2.20287341e-03   1.36076553e-03  -5.15449816e-03
  -2.57536356e-04   6.78848759e-04   1.37075992e-03  -1.71279897e-03
  -2.25715740e-03  -7.57222803e-04  -1.45860693e-03  -1.93512222e-03
  -3.27479436e-03   6.89072383e-03   4.70857954e-04  -2.02695788e-03
   3.21436507e-04   2.01635435e-03  -3.96283765e-05  -1.84255266e-03
   