## Import libraries necessary for this project

In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd

from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout

%matplotlib inline

Using TensorFlow backend.


### As dataset files are given in .pkl format so we will import pickle library to open them and after this we will convert list into array for working on data.

In [2]:
import pickle
import numpy as np

with open('train_image.pkl', 'rb') as f:
    train_image = pickle.load(f)
    
with open('train_label.pkl', 'rb') as a:
    train_label = pickle.load(a) 
    
with open('test_image.pkl', 'rb') as b:
    test_image = pickle.load(b)    
    
train_image = np.array(train_image)
train_label = np.array(train_label)
test_image = np.array(test_image) 

In [3]:
dataframe = pd.DataFrame(train_label)
dataframe[0].value_counts()  #for checking the data is balanced or not

3    2000
6    2000
2    2000
0    2000
Name: 0, dtype: int64

In [4]:
#reshaping data as needed by the model

train_image = np.reshape(train_image,(-1,28,28,1))
test_image = np.reshape(test_image,(-1,28,28,1))
train_image.shape, test_image.shape, train_label.shape

((8000, 28, 28, 1), (2000, 28, 28, 1), (8000,))

## Shuffle and split the data into training and validation subsets

In [5]:
train_x, val_x, train_y, val_y = train_test_split(train_image, train_label, test_size=0.1)

### Normalising the images for getting good results

In [6]:
train_x = train_x/255.0
val_x = val_x/255.0
test_image = test_image/255.0

mapping the classes as they are sparsely distributed

In [7]:
mapping = { 0:0,2:1,3:2,6:3 }

train_y = [mapping[i] for i in train_y]
val_y = [mapping[i] for i in val_y]

## Building CNN model

In [8]:
model = Sequential()

In [9]:
#adding layers and forming the model
model.add(Conv2D(64,(2,2),strides=(1,1),padding="Same",activation="relu",input_shape=(28,28,1)))
model.add(MaxPooling2D(padding="same"))

model.add(Conv2D(32,(2,2),strides=(1,1),padding="same",activation="relu"))
model.add(MaxPooling2D(1, padding="same"))

model.add(Flatten())

model.add(Dense(256,activation="relu"))
model.add(Dense(128,activation="relu"))
model.add(Dropout(rate = 0.8))
model.add(Dense(4,activation="sigmoid"))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [10]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(train_x, train_y, batch_size=100, epochs=10, validation_data=(val_x, val_y)) #fitting the model on training and validation set

Instructions for updating:
Use tf.cast instead.
Train on 7200 samples, validate on 800 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x20a4ed9e748>

In [11]:
pred = model.predict(test_image) #predicting classes/labels for test data

demapping = { 0:0,1:2,2:3,3:6 }  #demapping the labels as we want result in same format as of train_labels
pred_classes = [demapping[i.argmax()] for i in pred]

df = pd.DataFrame(pred_classes, columns = ['predicted class']) 

In [12]:
df.to_csv("./Result.csv", index_label=['Test_image_index'] ) #converting dataframe into csv file