In [2]:
import pandas as pd
import numpy as np
from keras.datasets import cifar10

In [3]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [4]:
x_train.shape

(50000, 32, 32, 3)

In [5]:
y_train.shape

(50000, 1)

In [6]:
x_test.shape

(10000, 32, 32, 3)

In [7]:
y_test.shape

(10000, 1)

In [8]:
train_features = x_train.reshape(len(x_train), np.prod(x_train.shape[1:]))

In [9]:
train_features.shape

(50000, 3072)

In [10]:
test_features = x_test.reshape(len(x_test), np.prod(x_test.shape[1:]))

In [11]:
test_features.shape

(10000, 3072)

In [12]:
train_features = pd.DataFrame(train_features)
test_features = pd.DataFrame(test_features)

In [13]:
train_labels = pd.DataFrame(y_train)
test_labels = pd.DataFrame(y_test)

## Renaming Classes

Refering to the description in the page https://www.cs.toronto.edu/~kriz/cifar.html

In [14]:
train_labels.head()

Unnamed: 0,0
0,6
1,9
2,9
3,4
4,1


In [15]:
train_labels = train_labels.rename(columns = {0 : 'Class'})

In [16]:
train_labels.head()

Unnamed: 0,Class
0,6
1,9
2,9
3,4
4,1


In [17]:
train_labels['Class'].unique()

array([6, 9, 4, 1, 2, 7, 8, 3, 5, 0], dtype=uint64)

In [18]:
train_labels['Class Name'] = ""
class_name = []
for i in train_labels['Class']:
    if i == 0:
        x = 'airplane'
    elif i == 1:
        x = 'automobile'
    elif i == 2:
        x = 'bird'
    elif i == 3:
        x = 'cat'
    elif i == 4:
        x = 'deer'
    elif i == 5:
        x = 'dog'
    elif i == 6:
        x = 'frog'
    elif i == 7:
        x = 'horse'
    elif i == 8:
        x = 'ship'
    else:
        x = 'truck'
    
    class_name.append(x)
train_labels['Class Name'] = class_name

In [19]:
train_labels.head()

Unnamed: 0,Class,Class Name
0,6,frog
1,9,truck
2,9,truck
3,4,deer
4,1,automobile


In [20]:
train_labels['Class Name'].nunique()

10

In [21]:
train_labels['Class'].nunique()

10

In [22]:
test_labels.head()

Unnamed: 0,0
0,3
1,8
2,8
3,0
4,6


In [23]:
test_labels = test_labels.rename(columns = {0 : 'Class'})

In [24]:
test_labels['Class Name'] = ""
test_class_name = []

for i in test_labels['Class']:
    if i == 0:
        x = 'airplane'
    elif i == 1:
        x = 'automobile'
    elif i == 2:
        x = 'bird'
    elif i == 3:
        x = 'cat'
    elif i == 4:
        x = 'deer'
    elif i == 5:
        x = 'dog'
    elif i == 6:
        x = 'frog'
    elif i == 7:
        x = 'horse'
    elif i == 8:
        x = 'ship'
    else:
        x = 'truck'
    test_class_name.append(x)

test_labels['Class Name'] = test_class_name

In [25]:
test_labels['Class Name'].nunique()

10

In [26]:
test_labels['Class'].nunique()

10

## Final DataFrame Creation

In [27]:
train_df = pd.concat([train_labels, train_features], axis = 1)
test_df = pd.concat([test_labels, test_features], axis = 1)

In [28]:
print(train_df.shape)
print(test_df.shape)

(50000, 3074)
(10000, 3074)


In [29]:
datadir = '/Users/sidv88/Documents/Data_Management_2/Data/'

In [30]:
train_df.to_csv(datadir + 'train_rgb.csv', index = False)
test_df.to_csv(datadir + 'test_rgb.csv', index = False)

## Converting to Gray

In [31]:
train_rgb = pd.read_csv(datadir + 'train_rgb.csv')
test_rgb = pd.read_csv(datadir + 'test_rgb.csv')
print(train_rgb.shape)
print(test_rgb.shape)

(50000, 3074)
(10000, 3074)


In [32]:
train_rgb.head()

Unnamed: 0,Class,Class Name,0,1,2,3,4,5,6,7,...,3062,3063,3064,3065,3066,3067,3068,3069,3070,3071
0,6,frog,59,62,63,43,46,45,50,48,...,104,216,184,140,151,118,84,123,92,72
1,9,truck,154,177,187,126,137,136,105,104,...,136,143,133,139,143,134,142,143,133,144
2,9,truck,255,255,255,253,253,253,253,253,...,79,78,85,83,79,85,83,80,86,84
3,4,deer,28,25,10,37,34,19,38,35,...,38,54,47,28,63,56,37,72,65,46
4,1,automobile,170,180,198,168,178,196,177,185,...,78,75,79,82,71,75,78,73,77,80


In [33]:
test_rgb.head()

Unnamed: 0,Class,Class Name,0,1,2,3,4,5,6,7,...,3062,3063,3064,3065,3066,3067,3068,3069,3070,3071
0,3,cat,158,112,49,159,111,47,165,116,...,145,24,77,124,34,84,129,21,67,110
1,8,ship,235,235,235,231,231,231,232,232,...,163,168,183,178,180,195,191,186,200,199
2,8,ship,158,190,222,158,187,218,139,166,...,37,5,6,8,4,5,3,7,8,7
3,0,airplane,155,156,149,167,176,187,176,179,...,53,60,63,50,64,65,52,73,68,50
4,6,frog,65,68,50,70,81,64,48,64,...,147,143,179,136,154,185,146,128,156,117


In [34]:
x_train_rgb = train_rgb.iloc[:, 2:]
print(x_train_rgb.shape)
y_train_rgb = train_rgb.iloc[:, [0, 1]]
print(y_train_rgb.shape)

(50000, 3072)
(50000, 2)


In [36]:
x_test_rgb = test_rgb.iloc[:, 2:]
print(x_test_rgb.shape)
y_test_rgb = test_rgb.iloc[:, [0, 1]]
print(y_test_rgb.shape)

(10000, 3072)
(10000, 2)


In [37]:
x_train_rgb = np.array(x_train_rgb)
x_train_rgb = x_train_rgb.reshape(len(x_train_rgb), 32, 32, 3)
x_test_rgb = np.array(x_test_rgb)
x_test_rgb = x_test_rgb.reshape(len(x_test_rgb), 32, 32, 3)

In [40]:
import cv2

x_train_gray = []
for i in range(len(x_train_rgb)):
    train_gray = cv2.cvtColor(x_train[i], cv2.COLOR_RGB2GRAY)
    x_train_gray.append(train_gray)

In [42]:
x_train_gray = np.array(x_train_gray)
x_train_gray = x_train_gray.reshape(len(x_train_gray), np.prod(x_train_gray.shape[1:]))
x_train_gray = pd.DataFrame(x_train_gray)

In [49]:
y_train_gray = np.array(y_train_rgb)

In [51]:
y_train_gray = pd.DataFrame(y_train_gray)

In [54]:
y_train_gray.head()

Unnamed: 0,0,1
0,6,frog
1,9,truck
2,9,truck
3,4,deer
4,1,automobile


In [55]:
y_train_gray = y_train_gray.rename(columns = {0 : 'Class', 1 : 'Class Name'})

In [56]:
y_train_gray.head()

Unnamed: 0,Class,Class Name
0,6,frog
1,9,truck
2,9,truck
3,4,deer
4,1,automobile


In [57]:
train_gray = pd.concat([y_train_gray, x_train_gray], axis = 1)
train_gray.head()

Unnamed: 0,Class,Class Name,0,1,2,3,4,5,6,7,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,6,frog,61,45,48,57,78,96,113,117,...,93,96,103,94,72,83,145,189,124,99
1,9,truck,171,134,103,101,130,164,187,195,...,23,46,66,91,115,130,134,137,138,137
2,9,truck,255,253,253,253,253,253,253,253,...,74,79,76,65,62,68,76,83,83,84
3,4,deer,24,33,34,37,39,36,37,22,...,81,65,81,67,75,75,58,47,56,65
4,1,automobile,179,177,185,192,194,192,194,193,...,88,84,81,78,79,75,74,78,74,76


In [62]:
x_test_gray = []

for i in range(len(x_test_rgb)):
    test_gray = cv2.cvtColor(x_test_rgb[i].astype(np.uint8), cv2.COLOR_RGB2GRAY)
    x_test_gray.append(test_gray)
    


In [64]:
x_test_gray = np.array(x_test_gray)
x_test_gray = x_test_gray.reshape(len(x_test_gray), np.prod(x_test_gray.shape[1:]))
x_test_gray = pd.DataFrame(x_test_gray)
print(x_test_gray.shape)

y_test_gray = np.array(y_test_rgb)
y_test_gray = pd.DataFrame(y_test_gray)
y_test_gray = y_test_gray.rename(columns = {0 : 'Class', 1 : 'Class Name'})
print(y_test_gray.shape)

test_gray = pd.concat([y_test_gray, x_test_gray], axis = 1)
test_gray.head()

(10000, 1024)
(10000, 2)


Unnamed: 0,Class,Class Name,0,1,2,3,4,5,6,7,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,3,cat,119,118,123,125,119,115,121,119,...,86,79,101,123,138,126,89,67,74,58
1,8,ship,235,231,232,232,232,232,232,232,...,120,127,132,141,147,157,166,178,190,196
2,8,ship,184,182,161,153,188,203,207,212,...,33,34,33,31,31,42,30,6,4,8
3,0,airplane,155,175,180,193,185,171,172,172,...,62,65,80,113,113,79,62,61,63,67
4,6,frog,65,76,57,39,29,50,63,67,...,123,150,177,158,154,154,167,163,171,143


In [65]:
train_gray.to_csv(datadir + 'train_gray.csv', index = False)
test_gray.to_csv(datadir + 'test_gray.csv', index = False)