In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df=pd.read_csv("../input/digit-recognizer/train.csv")
df.head()
#Each image is 28 pixels in height and 28 pixels in width, for a total of 784 pixels in total.

In [None]:
test=pd.read_csv("../input/digit-recognizer/test.csv")
test.head()

In [None]:
print(df.shape)
print(test.shape)

In [None]:
#we split training data into features and target
X=df.drop("label",axis=1).values
y=df["label"].values
#we normalize the data


In [None]:
print(X.shape)
print(y.shape)
print(test.shape)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(15,10))
sns.set_style("darkgrid")
sns.countplot(x="label",data=df)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.05, random_state=42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
error_rate=list()
#here we iterate meny different k values and plot their error rates 
#and discover which one is better than others and has the lowest error rate
for i in range(1,40):
    knn=KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train,y_train)
    prediction_i=knn.predict(X_test)
    error_rate.append(np.mean(prediction_i != y_test))

In [None]:
# Now we will plot the prediction error rates of different k values
plt.figure(figsize=(15,10))
plt.plot(range(1,40),error_rate, color="blue", linestyle="--",marker="o",markerfacecolor="red",markersize=10)
plt.title("Error Rate vs K Value")
plt.xlabel="K Value"
plt.ylabel("Error Rate")

In [None]:
knn=KNeighborsClassifier(n_neighbors=1) # we get the minimum error when n=1

In [None]:
knn.fit(X_train,y_train)
knn_predictions = knn.predict(X_test)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix,accuracy_score
print(confusion_matrix(y_test,knn_predictions))

In [None]:
print(classification_report(y_test, knn_predictions))

In [None]:
print(accuracy_score(y_test, knn_predictions))

In [None]:
from sklearn.ensemble import RandomForestClassifier
random=RandomForestClassifier()
random.fit(X_train,y_train)
random_predictions= random.predict(X_test)

In [None]:
print(classification_report(y_test, random_predictions))
print(confusion_matrix(y_test,random_predictions))
print(accuracy_score(y_test, random_predictions)) #Random forest performs better than KNN

In [None]:
from sklearn.naive_bayes import GaussianNB
bayes=GaussianNB()
bayes.fit(X_train, y_train)
bayes_predictions=bayes.predict(X_test)
print(classification_report(y_test, bayes_predictions))
print(confusion_matrix(y_test, bayes_predictions))
print(accuracy_score(y_test, bayes_predictions)) #The predictions are not good

<font color="red">
Lets try Convolutional Neural Networks:

In [None]:
X = X/255
test= test.values/255

In [None]:
# Reshape image in 3 dimensions (height = 28px, width = 28px , canal = 1)
# canal = 1 => For gray scale
X = X.reshape(-1,28,28,1)
test = test.reshape(-1,28,28,1)

In [None]:
# we encode labels to one hot vectors (like [0,0,1,0,0,0,0,0,0,0])
from keras.utils.np_utils import to_categorical
y = to_categorical(y)

print(f"Label size {y.shape}")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.05, random_state=42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
X_visualization = X_train.reshape(X_train.shape[0], 28, 28)

fig, axis = plt.subplots(1, 4, figsize=(20, 10))
for i, ax in enumerate(axis.flat):
    ax.imshow(X_visualization[i], cmap='binary')
    digit = y_train[i].argmax()
    ax.set(title = f"Real Number is {digit}");
# we see how our data look like.

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten,Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping,  ReduceLROnPlateau
cnn=Sequential()

#model.add(Lambda(standardize,input_shape=(28,28,1)))    
cnn.add(Conv2D(filters=64, kernel_size = (3,3), activation="relu", input_shape=(28,28,1)))
cnn.add(Conv2D(filters=64, kernel_size = (3,3), activation="relu"))

cnn.add(MaxPooling2D(pool_size=(2,2)))
cnn.add(BatchNormalization())
cnn.add(Conv2D(filters=128, kernel_size = (3,3), activation="relu"))
cnn.add(Conv2D(filters=128, kernel_size = (3,3), activation="relu"))

cnn.add(MaxPooling2D(pool_size=(2,2)))
cnn.add(BatchNormalization())    
cnn.add(Conv2D(filters=256, kernel_size = (3,3), activation="relu"))
    
cnn.add(MaxPooling2D(pool_size=(2,2)))
    
cnn.add(Flatten())
cnn.add(BatchNormalization())
cnn.add(Dense(512,activation="relu"))
    
cnn.add(Dense(10,activation="softmax"))
    
cnn.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# With data augmentation to prevent overfitting
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images


#datagen.fit(X_train)
train_generator = datagen.flow(X_train, y_train, batch_size=32)
test_generator = datagen.flow(X_test, y_test, batch_size=32)

,
,
                              

In [None]:
# Fit the model
history = cnn.fit(train_generator, 
                  epochs = 10,
                  steps_per_epoch = X_train.shape[0] // 32,
                  validation_steps = X_test.shape[0] // 32,
                  validation_data = test_generator)

In [None]:
sns.set_style("darkgrid")
pd.DataFrame(cnn.history.history).plot(figsize=(15,10))

In [None]:
y_pred = cnn.predict(X_test)
X_new = X_test.reshape(X_test.shape[0], 28, 28)

fig, axis = plt.subplots(4, 4, figsize=(12, 14))
for i, ax in enumerate(axis.flat):
    ax.imshow(X_new[i], cmap='binary')
    ax.set(title = f"Real Number is {y_test[i].argmax()}\nPredict Number is {y_pred[i].argmax()}");

In [None]:
predictions = cnn.predict(test, verbose=2)
predictions

In [None]:
new_predictions =np.argmax(predictions, axis=1) # we get the original values instead of one hot coded version
new_predictions

In [None]:
submission = pd.read_csv('../input/digit-recognizer/sample_submission.csv')

In [None]:
submission

In [None]:
submission['Label'] = new_predictions
submission.to_csv("my_submission3.csv", index=False)
submission.head()