In [5]:
import matplotlib.pyplot as plt
import numpy as np
from numpy import genfromtxt
import scipy
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from skimage import io, img_as_uint
from skimage.color import rgb2gray
from skimage.filters import roberts, threshold_otsu
train_set_dir = 'cifar-10/train/'
test_set_dir = 'cifar-10/test/'

In [6]:
img_labels = genfromtxt(train_set_dir + 'labels')
img_labels, img_labels.shape

(array([ 6.,  9.,  9., ...,  9.,  1.,  1.]), (50000,))

In [7]:
img_data = np.empty([50000, 1024])

for i in range(0, 50000):
    filename = '{0:05d}'.format(i) + '.png'
    img = rgb2gray(io.imread(train_set_dir + filename)) #Reading file, converting to Grayscale and applying edge operator
    val = threshold_otsu(img)
    mask = img < val
    img[mask] = 1
    img_data[i, :] = img.flatten()

(50000, 1024)

In [8]:
# Split the data using K-Folds, using 5 different sets
kf = KFold(n_splits=5)
kf.get_n_splits(img_data)

5

In [None]:
count = 0
train_score = np.zeros(5)
val_score = np.zeros(5)
for train_index, val_index in kf.split(img_data):
    img_data_train, img_data_val = img_data[train_index], img_data[val_index]
    img_labels_train, img_labels_val = img_labels[train_index], img_labels[val_index]
    
    regr = LogisticRegression(multi_class='ovr')
    regr.fit(img_data_train, img_labels_train)

    count += 1
    train_score[count-1] = regr.score(img_data_train, img_labels_train)
    val_score[count-1] = regr.score(img_data_val, img_labels_val)
    print("Set %d -- Train Score: %.2f Validation score: %.2f"
      % (count, train_score[count-1], val_score[count-1]))



Set 1 -- Train Score: 0.31 Validation score: 0.22
Set 2 -- Train Score: 0.30 Validation score: 0.21
Set 3 -- Train Score: 0.30 Validation score: 0.21
Set 4 -- Train Score: 0.30 Validation score: 0.21


In [None]:
print("Mean Score Train: %.2f Mean Score Validation: %.2f" % (np.average(train_score), np.average(val_score)))