In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [2]:
y = train['label']
X = train.drop('label', axis = 1)
X /= 255

train_idx, val_idx = 29400, 29400 + 6300

X_train, y_train =  X.iloc[:train_idx].values.reshape(-1,28, 28, 1),            y.iloc[:train_idx].values
X_val, y_val =      X.iloc[train_idx:val_idx].values.reshape(-1, 28, 28, 1),    y.iloc[train_idx:val_idx].values
X_test, y_test =    X.iloc[val_idx:].values.reshape(-1, 28, 28, 1),             y.iloc[val_idx:].values

test_values = test.values.reshape(-1, 28, 28, 1)
print(X_train.shape)

(29400, 28, 28, 1)


In [3]:
import tensorflow as tf
import tensorflow.keras.layers as tfl
from sklearn.metrics import accuracy_score
import os
np.random.seed(1)

print(tf.config.list_physical_devices())

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [4]:
model = tf.keras.Sequential([
    tfl.Flatten(input_shape = (28, 28, 1)), 
    tfl.Dense(128, activation= 'relu'), 
    tfl.Dense(64, activation = 'relu'), 
    tfl.Dense(32, activation= 'relu'), 
    tfl.Dense(10, activation = 'softmax')
])

model.compile(optimizer = 'adam', 
              loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits= False), 
              metrics = ['accuracy'])

model.fit(X_train, y_train, epochs = 15, verbose= 1, batch_size= 128)

val_pred_probs = model.predict(X_val)
val_pred = tf.argmax(val_pred_probs, axis=1).numpy()
val_accuracy = accuracy_score(y_val, val_pred)
print('Accuracy on the validation set:', round(val_accuracy, 4))

Epoch 1/15


  super().__init__(**kwargs)
2024-07-22 10:43:52.028218: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2024-07-22 10:43:52.028242: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2024-07-22 10:43:52.028249: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
2024-07-22 10:43:52.028266: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-07-22 10:43:52.028279: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2024-07-22 10:43:52.412034: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled

[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.7284 - loss: 0.9345
Epoch 2/15
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.9442 - loss: 0.1889
Epoch 3/15
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.9620 - loss: 0.1269
Epoch 4/15
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.9715 - loss: 0.0967
Epoch 5/15
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9778 - loss: 0.0773
Epoch 6/15
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.9823 - loss: 0.0591
Epoch 7/15
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9855 - loss: 0.0479
Epoch 8/15
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.9895 - loss: 0.0350
Epoch 9/15
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━

In [5]:
test_pred_probs = model.predict(test_values)
test_pred = tf.argmax(test_pred_probs, axis = 1).numpy()
test_df = pd.DataFrame(test_pred, index = test.index, columns = ['Label'])
test_df.index.name = 'ImageId'
test_df.index = test_df.index + 1
#test_df.to_csv('simple_nn_v1.csv')

[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 340us/step


In [6]:
conv = tf.keras.Sequential()

conv.add(tfl.Conv2D(filters= 28, kernel_size= (5, 5), padding = 'same', input_shape = X_train.shape[1:], activation = 'relu'))
conv.add(tfl.BatchNormalization())
conv.add(tfl.Conv2D(filters = 28, kernel_size= (5,5), activation = 'relu'))
conv.add(tfl.MaxPool2D(pool_size = (2,2)))
conv.add(tfl.Dropout(0.25))

conv.add(tfl.Conv2D(filters = 32, kernel_size = (5,5), padding = 'same', activation = 'relu'))
conv.add(tfl.BatchNormalization())
conv.add(tfl.Conv2D(filters = 32, kernel_size = (5,5), activation = 'relu'))
conv.add(tfl.MaxPool2D(pool_size = (2,2)))
conv.add(tfl.Dropout(0.25))

conv.add(tfl.Flatten())
conv.add(tfl.Dense(512, activation = 'relu'))
conv.add(tfl.Dense(10, activation = 'softmax'))

conv.compile(optimizer= tf.keras.optimizers.SGD(learning_rate= 0.01),
             loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits= False), 
             metrics = ['accuracy'])

conv.fit(X_train, y_train, epochs = 20, verbose = 1, batch_size= 128)


val_pred_probs = conv.predict(X_val)
val_pred = tf.argmax(val_pred_probs, axis=1).numpy()
val_accuracy = accuracy_score(y_val, val_pred)
print('Accuracy on the validation set:', round(val_accuracy, 4))

Epoch 1/20
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6697 - loss: 1.0439
Epoch 2/20
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.9457 - loss: 0.1792
Epoch 3/20
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.9597 - loss: 0.1264
Epoch 4/20
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.9680 - loss: 0.1041
Epoch 5/20
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.9718 - loss: 0.0896
Epoch 6/20
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.9731 - loss: 0.0800
Epoch 7/20
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - accuracy: 0.9767 - loss: 0.0746
Epoch 8/20
[1m230/230[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - accuracy: 0.9802 - loss: 0.0626
Epoch 9/20
[1m230/230[0m [32m

In [28]:
test_pred_probs = conv.predict(test_values)
test_pred = tf.argmax(test_pred_probs, axis = 1).numpy()
test_df = pd.DataFrame(test_pred, index = test.index, columns = ['Label'])
test_df.index.name = 'ImageId'
test_df.index = test_df.index + 1
#test_df.to_csv('convolutional_nn_v3.csv')

[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
