# Digit Recognizer Project w/ MNIST Dataset

### Purpose: to begin to refine my data science skills and have concrete evidence of them.

### Project: Given a dataset of images with handwritten digits, make a TensorFlow model that is capable of recognizing these digits for what they are.

### Tools: Jupyter Notebook, Python 3.11, TensorFlow, Pandas

### Section 1: Preparing Data

In [2]:
#import the necessary libraries for the data preparation
import pandas as pd

In [3]:
#Read the csv data into the dataframe using pandas
myPxFrame = pd.read_csv('DigitRecognizerData/train.csv/train.csv')

In [18]:
#Display the dataframe
myPxFrame.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
#Let the feature dataframe be every column except for the labels
myFeatures=myPxFrame.loc[:,myPxFrame.columns != 'label']
#Normalize the color values (max 255)
myFeatures=myFeatures/255
myFeatures['pixel518'].head()

0        0.000000
1        0.000000
2        0.992188
3        0.957031
4        0.000000
           ...   
41995    0.000000
41996    0.988281
41997    0.000000
41998    0.984375
41999    0.011719
Name: pixel518, Length: 42000, dtype: float64

In [19]:
#We will use tensorflow later for the model and for creating a tensor object
import tensorflow as tf

In [20]:
myFeaturesTensor=tf.convert_to_tensor(myFeatures)
myFeaturesTensor.head()

AttributeError: 'tensorflow.python.framework.ops.EagerTensor' object has no attribute 'head'

#### Let's turn our data processing into some reusable code - function time!

In [9]:
def fromCSVtoFeatureTensor(location):
    myPxFrame = pd.read_csv(location)
    myFeatures=myPxFrame.loc[:,myPxFrame.columns != 'label']
    myFeatures=myFeatures/256
    print(myPxFrame)
    myFeaturesTensor=tf.convert_to_tensor(myFeatures)
    return myFeaturesTensor

### Section 2: Creating the Model

In [10]:

from tensorflow.keras import layers

In [11]:
model = tf.keras.Sequential([
    layers.Dense(784, activation="relu"),
    layers.Dense(392,  activation="relu"),
    layers.Dropout(.1),
    layers.Dense(150,  activation="relu"),
    layers.Dropout(.1),
    layers.Dense(40,  activation="relu"),
    layers.Dense(10,  activation="softmax")
])
model.compile(optimizer="adam", loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=["accuracy"])

### Section 3: Training the Model

In [12]:
model.fit(myFeaturesTensor,tf.convert_to_tensor(myPxFrame['label']),epochs=10)

Epoch 1/10


  output, from_logits = _get_logits(


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1a9211a1bb0>

### Section 4: Testing the Model

In [13]:
testFeatures = fromCSVtoFeatureTensor('DigitRecognizerData/test.csv/test.csv')
myResults=model.predict(testFeatures)

       pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  pixel8  \
0           0       0       0       0       0       0       0       0       0   
1           0       0       0       0       0       0       0       0       0   
2           0       0       0       0       0       0       0       0       0   
3           0       0       0       0       0       0       0       0       0   
4           0       0       0       0       0       0       0       0       0   
...       ...     ...     ...     ...     ...     ...     ...     ...     ...   
27995       0       0       0       0       0       0       0       0       0   
27996       0       0       0       0       0       0       0       0       0   
27997       0       0       0       0       0       0       0       0       0   
27998       0       0       0       0       0       0       0       0       0   
27999       0       0       0       0       0       0       0       0       0   

       pixel9  ...  pixel77

In [14]:
answerArray=[]
for array in myResults:
    max=0.0;
    count=0;
    save=0;
    for element in array:
        if(element>max):
            max=element;
            save=count;
        count=count+1;
    answerArray.append(save);
answerArray[0:10]

[2, 0, 9, 0, 3, 7, 0, 3, 0, 3]

In [15]:
import csv

In [17]:
with open('DigitRecognizerData/submission.csv',"w") as myFile:
    csvwriter= csv.writer(myFile);
    header=["ImageId","Label"]
    csvwriter.writerow(header);
    count=0;
    for answer in answerArray:
        count+=1;
        total=[str(count),str(answer)]
        csvwriter.writerow(total)