In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd

class HyperParameters:
    def __init__(self, learning_rate=0.01, epochs=10, mini_batch_size=None, beta=.9, layers=None, beta1=.9, beta2=.998, lambd=0):
        if layers is None:
            layers = [10, 20, 10]
        self.layers = layers
        self.no_l = len(layers)
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = mini_batch_size
        self.beta = beta
        self.beta1 = beta1
        self.beta2 = beta2
        self.lambd = lambd

'''        
Class that is responsbile for initialising weights and biases. 
'''
class WeightAndBias: 
    def __init__(self, number_features, layers, initialisation_type="random"):
        
        self.initialisation_type = initialisation_type
        self.layers = [number_features] + layers
        self.weights = [pd.DataFrame()] + [np.random.randn(self.layers[i+1], self.layers[i]) * 0.01 for i in range(len(self.layers)-1)]
        self.biases = [pd.DataFrame()] + [np.zeros([self.layers[i+1], 1]) for i in range(len(self.layers)-1)]
                
    ''' 
    method to update learning parameters    
    '''
    def update_learning_parameters(self, no_l, hp_obj, dW, db, m_training) :
        for l in range(1, no_l+1):
            self.biases[l] =  self.biases[l] - hp_obj.learning_rate * db[l]
            self.weights[l] = (1 - (hp_obj.lambd *  hp_obj.learning_rate)/m_training) * self.weights[l] - hp_obj.learning_rate * dW[l]

'''            
ActivationFunctions that takes layers and list of activation functions to be used for each of the layers.
'''
class ActivationFunctions:
    def __init__(self, layers, activation_functions=None) :
        if activation_functions is None: 
            activation_functions= ['tanh'] * (len(layers) - 1) + ['softmax']
            
        self.activation_functions = [None] + [eval(f'ActivationFunctions.{activation_function}') 
                                     for activation_function in activation_functions]
        
        self.derivative_functions = [None] + [eval(f'ActivationFunctions.{activation_function}_derivative') 
                                     for activation_function in activation_functions]
        
    @staticmethod
    def sigmoid(z) :
        return 1 / (1 + np.exp( -z ))
    
    @staticmethod
    def relu(z) : 
        return np.where(z>0, z, 0.0001 * z )
    
    @staticmethod
    def tanh(z) :
        # return np.tanh(z
        z = np.clip(z, -20, 20)
        return (np.exp(z) - np.exp(-z))/ (np.exp(z) + np.exp(-z))
    
    @staticmethod
    def softmax(z):
        z = np.clip(z, -20, 20)
        return np.exp(z) / np.sum(np.exp(z), axis=0) 
    
    @staticmethod
    def softmax_derivative(y, a) :
        return a - y
    
    @staticmethod
    def sigmoid_derivative(y, a) :
        return a - y
    
    @staticmethod
    def tanh_derivative(z) :
        return (1 - np.tanh(z) ** 2)
    
    @staticmethod
    def relu_derivative(z) :
        return (z > 0) * 1
    
    @staticmethod
    def calculate_loss(a, y, m, hp, lp) :
            return (-1/m * np.sum(np.multiply(y, np.log(a))), 
                    -1/m * np.sum(np.multiply(y, np.log(a))) + hp.lambd/(2 *m ) * sum(np.sum(np.square(lp.weights[i]))
                                                                                    for i in range(1, hp.no_l+1)))
    
'''
NeuralNetwork class where the magic happens, Forward prop and Backprop happens.
'''
class NeuralNetwork: 
    def __init__(self, X_train, y_train, HyperParameters, activation_functions=None) :
        
        self.X_train, self.y_train = X_train, y_train
        self.n, self.m = X_train.shape
        
        print(f"number of training examples: {self.m}\nnumber of features: {self.n}"
              f"\nshape of y_train {self.y_train.shape}")

        #hp --> hyperparameters
        self.hp = HyperParameters 
        self.layers = self.hp.layers
        self.no_l = self.hp.no_l
        
        self.act_function_obj = ActivationFunctions(self.layers, activation_functions=activation_functions)
 
        #lp --> learning parameters -> weights and biases
        self.lp = WeightAndBias(self.n, self.layers) 
    
        if self.hp.batch_size is None:
            self.hp.batch_size = self.m
            

    def forward_propagation(self, X_batch) :
        self.Z, self.A = [0] + [None] * self.no_l, [X_batch ] + [None] * self.no_l
        activation_functions = self.act_function_obj.activation_functions

        for l in range(1, self.no_l + 1):
            self.Z[l] = np.dot(self.lp.weights[l], self.A[l-1]) + self.lp.biases[l]
            self.A[l] = activation_functions[l](self.Z[l])      

    def back_propagation(self, y_batch) :

        derivative_functions = self.act_function_obj.derivative_functions
        batch_size = y_batch.shape[1]
        
        self.dZ =[None] +  [None] * self.no_l
        self.dW =[None] +  [None] * self.no_l
        self.db =[None] +  [None] * self.no_l

        self.dZ[self.no_l] = derivative_functions[self.no_l](y_batch, self.A[self.no_l])
        self.dW[self.no_l] = 1/batch_size * np.dot(self.dZ[self.no_l] , self.A[self.no_l - 1].T)
        self.db[self.no_l] = 1/batch_size * np.sum(self.dZ[self.no_l], axis=1, keepdims=True)

        assert self.dZ[self.no_l].shape == self.Z[self.no_l].shape
        assert self.db[self.no_l].shape == self.lp.biases[self.no_l].shape        
        assert self.dW[self.no_l].shape == self.lp.weights[self.no_l].shape

        for l in range(self.no_l - 1, 0, -1) : 

            self.dZ[l] = np.dot(self.lp.weights[l+1].T, self.dZ[l+1] )* derivative_functions[l](self.Z[l])
            self.dW[l] = 1/batch_size * np.dot(self.dZ[l], self.A[l-1].T)
            self.db[l] = 1/batch_size * np.sum(self.dZ[l], axis=1, keepdims=True)

            assert self.dZ[l].shape == self.Z[l].shape
            assert self.dW[l].shape == self.lp.weights[l].shape
            assert self.db[l].shape == self.lp.biases[l].shape  
            
            
    def train_nn(self, verbose=False, per_epoch_log=100) :
        for epoch in range(self.hp.epochs): 
            for batch_s in range(0, self.m, self.hp.batch_size) :
                
                batch_e = min(batch_s + self.hp.batch_size, self.m)
                
                X_batch = self.X_train[:, batch_s: batch_e]
                y_batch = self.y_train[:, batch_s: batch_e]
                m_batch_size = batch_e - batch_s

                self.forward_propagation(X_batch)
                self.back_propagation(y_batch)
                self.lp.update_learning_parameters(self.no_l, self.hp,  self.dW, self.db, m_batch_size)

            if verbose and epoch % per_epoch_log == 0: 
                print(f"epochs {epoch} loss: ",ActivationFunctions.calculate_loss(self.A[self.no_l], y_batch, m_batch_size, self.hp,  
                                                                                  self.lp))

    def predict(self, X_test):
        self.forward_propagation(X_test)
        preds=  self.A[self.no_l].T
        return (preds == preds.max(axis=1)[:,None]).astype(int)

def one_hot_encoding_y(train_data) :
    a = train_data.label
    b = np.zeros((a.size, 10))
    b[np.arange(a.size),a] = 1
    return b

In [None]:
train_data = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')

In [None]:
m = train_data.shape[0]
X = train_data.drop('label', axis=1).iloc[0:m].to_numpy() / 255
y  = one_hot_encoding_y(train_data)[:m]
y = np.reshape(y, (m, 10))

layers=[256, 10]
activation_functions = ['relu'] * (len(layers) - 1) + ['softmax']
hp = HyperParameters(layers=layers, learning_rate=.5, epochs=500, mini_batch_size=2048, lambd=.1)

nn = NeuralNetwork(X.T, y.T, hp, activation_functions=activation_functions)

# nn.train_nn( verbose=True, per_epoch_log=10)

In [None]:
prob_preds = lambda preds: (preds == preds.max(axis=1)[:,None]).astype(int)
preds = nn.predict(X.T)
r = np.sum(np.argmax(y, axis=1) == np.argmax(preds, axis=1))
w = np.sum(np.argmax(y, axis=1) != np.argmax(preds, axis=1))
print(f"total number of examples: {m}\nnumber of right predictions: {r}\nnumber of wrong predictions: {w}\n"
          f"accuracy on train: {r/m * 100}%")

In [None]:
test_data = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
X_test = test_data.to_numpy() / 255

In [None]:
# preds = nn.predict(X_test.T)
# preds = np.argmax(preds, axis=1)

In [None]:
# sub_df = pd.DataFrame(preds, columns=['Label'])
# sub_df.index.name= 'ImageId'
# sub_df.index = sub_df.index + 1
# sub_df.reset_index().to_csv('mnsit_submission.csv',index=False)

### Using Tensorflow

In [None]:
import tensorflow as tf
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
import keras

In [None]:
def normalize_img(image, label):
  """Normalizes images: `uint8` -> `float32`."""
  return tf.cast(image, tf.float32) / 255., label

In [None]:
X.T.shape

In [None]:
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape = [784]),
    keras.layers.Dense(784),
    keras.layers.BatchNormalization(),
    keras.layers.LeakyReLU(alpha = 0.4),
    
    keras.layers.Dense(576),
    keras.layers.BatchNormalization(),
    keras.layers.LeakyReLU(alpha = 0.4),
    
    keras.layers.Dense(320),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    
    keras.layers.Dense(256),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    
    keras.layers.Dense(160, kernel_regularizer = keras.regularizers.l2(0.0025)),
    keras.layers.BatchNormalization(),
    keras.layers.LeakyReLU(alpha = 0.4),
    
    keras.layers.Dense(84, kernel_regularizer = keras.regularizers.l2(0.0025)),
    keras.layers.BatchNormalization(),
    keras.layers.LeakyReLU(alpha = 0.25),
    
    keras.layers.Dense(10, activation = keras.activations.softmax),
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), 
            loss = tf.keras.losses.sparse_categorical_crossentropy, 
            metrics = ['accuracy'])

cb1 = EarlyStopping(patience = 3, restore_best_weights=True, monitor = 'val_acc')
cb2 = ReduceLROnPlateau(patience = 4, min_lr=0.00001, factor = 0.4, monitor = 'val_acc')

model.summary()

In [None]:
# X_train = tf.constant(np.array(X), dtype = tf.float32)
# X_test = tf.constant(np.array(X_test), dtype = tf.float32)
# y_train = tf.constant(train_data.label)

# X_train.shape, X_test.shape, y_train.shape

In [None]:
# history = model.fit(X_train, y_train, validation_split=0.2, epochs = 50, batch_size = 256, callbacks = [cb1, cb2], verbose = 1)

In [None]:
# model.predict(X_train)

## Trying perf with Data Augmentation

In [None]:
X = train_data.drop('label', axis=1).to_numpy() #/ 255
y  = train_data.label

In [None]:
# X.iloc[0]

In [None]:
from scipy.ndimage.interpolation import shift


In [None]:
get_image_shape = lambda i_r : np.reshape(i_r, (28, 28))
def create_data_aug_images(X, y) :
    X_transposed = np.reshape(X, (42000, 28, 28))
    X_transposed = np.array([X_transposed[i].T for i in range(len(X_transposed))])
    y_copy = y.copy()

    image = np.reshape(image_rows, (28, 28))
    return image
    

In [None]:
# from matplotlib import pyplot as plt
# plt.imshow(X_aug, cmap=plt.get_cmap('gray'))
# plt.show()

### Using Numpy Code

In [None]:
X_reshaped = np.reshape(X, (42000, 28, 28))
X_transposed = np.array([X_reshaped[i].T for i in range(len(X_reshaped))])
X_aug = np.concatenate((X_reshaped, X_transposed), axis=0)
# y_aug = 


X_aug.shape
# X_transpose

In [None]:
def one_hot_encoding_y(a) :
#     a = train_data.label
    b = np.zeros((a.size, 10))
    b[np.arange(a.size),a] = 1
    return b

In [None]:
y_aug = np.concatenate((np.array(train_data.label), np.array(train_data.label)))
y_aug

In [None]:
m = X_aug.shape[0]
X_aug_train = X_aug.reshape(84000, 784) / 255.0
y_train  = one_hot_encoding_y(y_aug)
y_train = np.reshape(y_train, (m, 10))

In [None]:

# layers=[256, 10]
# activation_functions = ['relu'] * (len(layers) - 1) + ['softmax']
# hp = HyperParameters(layers=layers, learning_rate=.5, epochs=500, mini_batch_size=2048, lambd=.1)

# nn = NeuralNetwork(X_aug_train.T, y_train.T, hp, activation_functions=activation_functions)

In [None]:
# nn.train_nn( verbose=True, per_epoch_log=10)

### Trying out with keras NN

In [None]:
X_train = tf.constant(np.array(X_aug_train), dtype = tf.float32)
X_test = tf.constant(np.array(X_test), dtype = tf.float32)
y_train = tf.constant(y_aug)

X_train.shape, X_test.shape, y_train.shape

In [None]:
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape = [784]),
    keras.layers.Dense(784),
    keras.layers.BatchNormalization(),
    keras.layers.LeakyReLU(alpha = 0.4),
    
    keras.layers.Dense(576),
    keras.layers.BatchNormalization(),
    keras.layers.LeakyReLU(alpha = 0.4),
    
    keras.layers.Dense(320),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    
    keras.layers.Dense(256),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('relu'),
    
    keras.layers.Dense(160, kernel_regularizer = keras.regularizers.l2(0.0025)),
    keras.layers.BatchNormalization(),
    keras.layers.LeakyReLU(alpha = 0.4),
    
    keras.layers.Dense(84, kernel_regularizer = keras.regularizers.l2(0.0025)),
    keras.layers.BatchNormalization(),
    keras.layers.LeakyReLU(alpha = 0.25),
    
    keras.layers.Dense(10, activation = keras.activations.softmax),
])

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005), 
            loss = tf.keras.losses.sparse_categorical_crossentropy, 
            metrics = ['accuracy'])

cb1 = EarlyStopping(patience = 3, restore_best_weights=True, monitor = 'val_acc')
cb2 = ReduceLROnPlateau(patience = 4, min_lr=0.00001, factor = 0.4, monitor = 'val_acc')

model.summary()

In [None]:
# history = model.fit(X_train, y_train, 
#                     validation_split=0.2, epochs = 50, batch_size = 128, 
#                     callbacks = [cb1, cb2], verbose = 1)

In [None]:
### Trying out with CNN using Keras

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from kaggle_datasets import KaggleDatasets
import pandas as pd
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
train = pd.read_csv('../input/digit-recognizer/train.csv') 
test = pd.read_csv('../input/digit-recognizer/test.csv')

X_train = train.drop(['label'], axis = 1)
y_train = train['label']

X_test = test


# Divide the labels in the training, testing and validation set
y_train = tf.constant(y_train)


# Divide the input in the training, testing and validation set
X_train = tf.constant(np.array(X_train), dtype = tf.float64)
X_test = tf.constant(np.array(X_test), dtype = tf.float32)

In [None]:
plt.figure(figsize = (10, 10))
count = 1
idd = 0
for i in range(5):
    for j in range(5):
        plt.subplot(5, 5, count)
        plt.imshow(tf.constant(X_test[idd, :], shape = [28, 28]), cmap = 'gray')
        idd += 1
        count += 1

In [None]:
X_train_s = (X_train - 127.5) / 127.5
X_test_s = (X_test - 127.5) / 127.5

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(X_train_s.numpy(), y_train.numpy(), test_size = 0.2)
X_train.shape, X_valid.shape

In [None]:
X_train = tf.reshape(X_train, shape = [-1, 28, 28, 1])
X_valid = tf.reshape(X_valid, shape = [-1, 28, 28, 1])
X_test = tf.reshape(X_test_s, shape = [-1, 28, 28, 1])

y_train = keras.utils.to_categorical(y_train)
y_valid = keras.utils.to_categorical(y_valid)

In [None]:
# Create a model with keras sequential class
mod = keras.models.Sequential([

    keras.layers.InputLayer(input_shape = X_train.shape[1:]), 

    keras.layers.Conv2D(filters = 32, kernel_size = (3, 3)), 
    keras.layers.LeakyReLU(alpha = 0.25),     # (26, 26)
    keras.layers.BatchNormalization(),

    keras.layers.Conv2D(filters = 48, kernel_size = (3, 3)), 
    keras.layers.LeakyReLU(alpha = 0),
    keras.layers.BatchNormalization(),

    keras.layers.Conv2D(filters = 64, kernel_size = (5, 5)),
    keras.layers.LeakyReLU(alpha = 0.25),    
    keras.layers.BatchNormalization(),        # (20, 20)
    
    keras.layers.Conv2D(filters = 96, kernel_size = (5, 5)),
    keras.layers.LeakyReLU(alpha = 0),    
    keras.layers.BatchNormalization(),       # (16, 16)
    
    keras.layers.Conv2D(filters = 128, kernel_size = (7, 7)),
    keras.layers.LeakyReLU(alpha = 0.25),    # (10, 10)
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.5),
    
    keras.layers.Flatten(),                    # (12800)
  
    keras.layers.Dense(112), 
    keras.layers.LeakyReLU(alpha = 0), 
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.5),
    
    keras.layers.Dense(64),
    keras.layers.LeakyReLU(alpha = 0.25),
    keras.layers.BatchNormalization(),
    
    keras.layers.Dense(32),
    keras.layers.LeakyReLU(alpha = 0),
    keras.layers.BatchNormalization(),

    keras.layers.Dense(10),
    keras.layers.BatchNormalization(),
    keras.layers.Activation(keras.activations.softmax),

])

# Compile the model
mod.compile(optimizer=keras.optimizers.Adam(learning_rate=0.1), 
            loss = keras.losses.categorical_crossentropy, 
            metrics = ['accuracy'])



# summarize the model
mod.summary()




# Plot the model
keras.utils.plot_model(mod, show_shapes=True)

In [None]:
cb1 = EarlyStopping(patience = 3, restore_best_weights=True, monitor = 'val_acc', verbose = 1)
cb2 = ReduceLROnPlateau(patience = 3, min_lr=0.00001, factor = 0.15, monitor = 'val_acc', verbose = 1)

In [None]:
data_train = ImageDataGenerator(
    rotation_range = 10, 
    width_shift_range = 4, 
    height_shift_range = 4, 
    zoom_range = 0.15,
 )

data_valid = ImageDataGenerator()

train_gen = data_train.flow(X_train, y_train, batch_size = 168, shuffle = True)
valid_gen = data_valid.flow(X_valid, y_valid, batch_size = 56, shuffle = True)

In [None]:
# history = mod.fit(train_gen, epochs = 275, validation_data = valid_gen, callbacks = [cb1, cb2])
# acc = mod.evaluate(X_valid, y_valid)

In [None]:
# pred = mod.predict(X_test)
# pred = np.argmax(pred, axis = 1)
# pred.shape

In [None]:
# sub = pd.read_csv('../input/digit-recognizer/sample_submission.csv')
# sub['Label'] = pred
# sub.to_csv('submission.csv', index = False)
# sub.head()

In [None]:
### Trying out ther nets

In [None]:
import warnings
warnings.filterwarnings("ignore")

# Import the necessary libs
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

np.random.seed(2)

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools

from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from tensorflow.keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
from keras.datasets import mnist
import tensorflow as tf

sns.set(style='white', context='notebook', palette='deep')

In [None]:
train = pd.read_csv('../input/digit-recognizer/train.csv')
test = pd.read_csv('../input/digit-recognizer/test.csv')
sub = pd.read_csv('../input/digit-recognizer/sample_submission.csv')

print("Data are Ready!!")

Y_train = train["label"]
X_train = train.drop(labels = ["label"], axis = 1) 

(x_train1, y_train1), (x_test1, y_test1) = mnist.load_data()

train1 = np.concatenate([x_train1, x_test1], axis=0)
y_train1 = np.concatenate([y_train1, y_test1], axis=0)

Y_train1 = y_train1
X_train1 = train1.reshape(-1, 28*28)

X_train = X_train / 255.0
test = test / 255.0

X_train1 = X_train1 / 255.0

X_train = np.concatenate((X_train.values, X_train1))
Y_train = np.concatenate((Y_train, Y_train1))

X_train = X_train.reshape(-1,28,28,1)
test = test.values.reshape(-1,28,28,1)

Y_train = to_categorical(Y_train, num_classes = 10)

X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size = 0.1, random_state=2)



In [None]:
model = Sequential()

model.add(Conv2D(filters = 64, kernel_size = (5,5),padding = 'Same', activation ='relu', input_shape = (28,28,1)))
model.add(BatchNormalization())

model.add(Conv2D(filters = 64, kernel_size = (5,5),padding = 'Same', activation ='relu'))
model.add(BatchNormalization())

model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', activation ='relu'))
model.add(BatchNormalization())

model.add(Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same', activation ='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(filters = 64, kernel_size = (3,3), padding = 'Same',  activation ='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Dense(10, activation = "softmax"))

In [None]:
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)
epochs = 50
batch_size = 128

#Data Augmentation 
datagen = ImageDataGenerator(
        featurewise_center=False, # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images

#datagen.fit(X_train)
train_gen = datagen.flow(X_train,Y_train, batch_size=batch_size)

history = model.fit(train_gen,
                              epochs = epochs,validation_data = (X_val,Y_val),
                              verbose = 1, steps_per_epoch=X_train.shape[0] // batch_size
                              , callbacks=[learning_rate_reduction],
                             validation_steps = X_val.shape[0] // batch_size)



In [None]:
# Make predictions about test sets
results = model.predict(test)

# Convert one-hot vector to number
results = np.argmax(results,axis = 1)

results = pd.Series(results,name="Label")

submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)

submission.to_csv("submission.csv",index=False)