# Convolutional Neural Network

### Importing the libraries

In [1]:
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
tf.__version__

'2.0.0'

##Importing Data

In [4]:
# X_train.isnull().any().describe()

In [5]:
from keras.utils.np_utils import to_categorical
dataset = pd.read_csv('train.csv')
X = dataset.iloc[:, 1:].values
y_dataset = dataset['label']
y = to_categorical(y_dataset, num_classes = 10)


## Part 1 - Data Preprocessing

##Impute Missing Data

In [6]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=np.nan,strategy='constant',fill_value=0)
imputer.fit(X)
X = imputer.transform(X)

## Splitting the dataset into the Training set and Test set

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 32)

##Feature Scaling

In [8]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

##Convert to 3D Array

In [9]:
X_train = X_train.reshape(-1,28,28,1)
X_test = X_test.reshape(-1,28,28,1)

In [10]:
X_train.shape

(33600, 28, 28, 1)

### Preprocessing the Training set

In [11]:
train_datagen = ImageDataGenerator(
    shear_range=0.2,
    zoom_range=0.2,
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)
train_datagen.fit(X_train)

In [12]:
train_datagen.

<keras.preprocessing.image.ImageDataGenerator at 0x1b02e6e4348>

## Part 2 - Building the CNN

### Initialising the CNN

In [11]:
cnn = tf.keras.models.Sequential()

### Step 1 - Convolution

In [12]:
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[28, 28, 1]))

### Step 2 - Pooling

In [13]:
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

### Adding a second convolutional layer

In [14]:
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

### Step 3 - Flattening

In [15]:
cnn.add(tf.keras.layers.Flatten())

### Step 4 - Full Connection

In [16]:
cnn.add(tf.keras.layers.Dense(units=56, activation='relu'))

### Step 5 - Output Layer

In [17]:
cnn.add(tf.keras.layers.Dense(units=10, activation='softmax'))

## Part 3 - Training the CNN

### Compiling the CNN

In [18]:
cnn.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [19]:
print(y_train[0])

[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]


### Training the CNN on the Training set and evaluating it on the Test set

In [20]:
# cnn.fit(x = training_set, validation_data = test_set, epochs = 2)
cnn.fit(train_datagen.flow(X_train, y_train, batch_size=32),validation_data = (X_test,y_test), steps_per_epoch=len(X_train) / 32, epochs=15)
# history = cnn.fit_generator(train_datagen.flow(X_train,y_train, batch_size=32),
#                               epochs = 20, validation_data = (X_test,y_test),
#                               verbose = 2, steps_per_epoch=X_train.shape[0])

Train for 1050.0 steps, validate on 8400 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x1b7b902f908>

In [27]:
y_pred = cnn.predict(X_test)

#Predict Test Data





In [21]:
test_dataset = pd.read_csv('test.csv')
X_test_data = test_dataset.values

In [22]:
from sklearn.impute import SimpleImputer
imputer2 = SimpleImputer(missing_values=np.nan,strategy='constant',fill_value=0)
imputer2.fit(X_test_data)
X_test_data = imputer2.transform(X_test_data)

In [23]:
X_test_data = sc.transform(X_test_data)
X_test_data = X_test_data.reshape(-1,28,28,1)

In [24]:
results = cnn.predict(X_test_data)
results = np.argmax(results,axis = 1)

In [25]:
print(results)

[2 0 9 ... 3 9 2]


In [26]:
final_df = pd.DataFrame()
final_df['ImageId'] = pd.Series(range(1,28001))
final_df['Label'] = results
final_df.to_csv('digit_recognizer_cnn.csv',index = False)
print(final_df)

       ImageId  Label
0            1      2
1            2      0
2            3      9
3            4      0
4            5      3
...        ...    ...
27995    27996      9
27996    27997      7
27997    27998      3
27998    27999      9
27999    28000      2

[28000 rows x 2 columns]
