In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# MNIST Handwritten Digit Classification . This is one of the interesting machine learning projects.
# Deep Learning and neural networks have found use cases in many real-world applications like image recognition, automatic text generation, driverless cars, and much more. 
# The MNIST digit classification project is designed to train machines to recognize handwritten digits. 
# In this project, you will use the MNIST datasets to train your ML model using Convolutional Neural Networks (CNNs).

# importing the required libraries...

import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Reading the MNIST dataset.

train = pd.read_csv("/kaggle/input/hackathon2021/train.csv/train.csv")
print(train.shape)
train.head()


In [None]:
test= pd.read_csv("/kaggle/input/hackathon2021/test.csv/test.csv")
print(test.shape)
test.head()

In [None]:
# Processing of data :
# The images from the data set have the size 28 x 28. 
# Every line of these files consists of an image, i.e. 785 numbers between 0 and 255.
# The first number of each line is the label, i.e. the digit which is depicted in the image. 
# The following 784 numbers are the pixels of the 28 x 28 image.


# Removing the labels from training set.
X = train.drop('label',axis=1)
Y = train['label']
# Detect missing values for an array-like object.
X.isnull().any().describe()
# NumPy arrays have an attribute called shape. The shape of an array is the number of elements in each dimension.
X.shape
# '-1' keeps the number of data as it, values convert the dataframe to arrays and '1' is grayscale.

X = X.values.reshape(-1, 28,28,1)

In [None]:
X.shape


In [None]:
# Normalizing the data


# the algorithm converges faster on [0 to 1] data than on [0 to 255]. 
# The images of the MNIST dataset are greyscale and the pixels range between 0 and 255 including both bounding values. 
# We will map these values into an interval from [0.01 to 1] by multiplying each pixel by 0.99 / 255 and adding 0.01 to the result. 
# This way, we avoid 0 values as inputs, which are capable of preventing weight updates.

X = np.array(X, dtype="float") / 255.0 * 0.99 + 0.01
# Now creating CNN model consisting of three Convolutional layers and two fully connected layers.

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=(28,28,1)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')])

model.summary() #model summary

# compiling the model

model.compile(
    optimizer='adam',
    loss = 'sparse_categorical_crossentropy',
    metrics = ['acc']) 

In [None]:
model.fit(X,Y,epochs=20,batch_size=70)

In [None]:
test = test.values.reshape(28000,28,28,1)
test = test/225

In [None]:
#converting predictions to csv file
test_y = np.argmax(model.predict(test),axis =1)
# df_submission = pd.DataFrame([test.index+1,test_y],["IMAGEID","LABEL"]).transpose()
# df_submission.to_csv("prediction3.csv",index=False)

In [None]:
predictions = model.predict(test)
results = predictions.argmax(axis=-1)

In [None]:
result = pd.DataFrame()
result['ImageId'] = list(range(1,28001))
result['Label'] = results
result.to_csv("output.csv", index = False)

In [None]:
print("test of output at id 3345 is {}".format(test_y[256]))
plt.imshow(test[256])