In [10]:
# Kenneth Dandrow
# 512 Neurons

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Load MNIST data
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Prepare the data
x_train = x_train.reshape(60000, 784).astype("float32") / 255
x_test = x_test.reshape(10000, 784).astype("float32") / 255

# Split validation set
x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]

# Build the model (2 hidden layers)
model = keras.Sequential([
    layers.Dense(512, activation="relu"),
    layers.Dense(10, activation="softmax")
])

# Compile model
model.compile(optimizer="rmsprop",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

# Train model
history = model.fit(x_train, y_train,
                    epochs=5,
                    batch_size=128,
                    validation_data=(x_val, y_val))


Train on 50000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [11]:
# Kenneth Dandrow
# Evaluate the model / 512 Neurons

test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test accuracy: {test_acc}")

Test accuracy: 0.9778000116348267


In [12]:
# Kenneth Dandrow
# 256 Neurons

model_256 = keras.Sequential([
    layers.Dense(256, activation="relu"),
    layers.Dense(10, activation="softmax")
])

model_256.compile(optimizer="rmsprop",
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])

history_256 = model_256.fit(x_train, y_train,
                            epochs=5,
                            batch_size=128,
                            validation_data=(x_val, y_val))

Train on 50000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [13]:
# Kenneth Dandrow
# 256 Neuron Test

test_loss_256, test_acc_256 = model_256.evaluate(x_test, y_test)
print(f"Test accuracy (256 neurons): {test_acc_256}")

Test accuracy (256 neurons): 0.9782999753952026


In [14]:
# Kenneth Dandrow
# 1024 Neurons

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Load MNIST data
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Prepare the data
x_train = x_train.reshape(60000, 784).astype("float32") / 255
x_test = x_test.reshape(10000, 784).astype("float32") / 255

# Split validation set
x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]

model_1024 = keras.Sequential([
    layers.Dense(1024, activation="relu"),
    layers.Dense(10, activation="softmax")
])

model_1024.compile(optimizer="rmsprop",
                   loss="sparse_categorical_crossentropy",
                   metrics=["accuracy"])

history_1024 = model_1024.fit(x_train, y_train,
                              epochs=5,
                              batch_size=128,
                              validation_data=(x_val, y_val))

Train on 50000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [15]:
# Kenneth Dandrow
# 1024 Neuron Test

test_loss_1024, test_acc_1024 = model_1024.evaluate(x_test, y_test)
print(f"Test accuracy (1024 neurons): {test_acc_1024}")

Test accuracy (1024 neurons): 0.9811000227928162


In [1]:
# Kenneth Dandrow
# Accuracy Rate Summary

| Model (Hidden Neurons) | Training Accuracy | Validation Accuracy | Test Accuracy |
|------------------------|-------------------|---------------------|---------------|
| 512 (Default)          | 0.9881            | 0.9756              | 0.9778        |
| 256                    | 0.9837            | 0.9756              | 0.9783        |
| 1024                   | 0.9905            | 0.9800              | 0.9811        |

---

# Explanation of Results

When comparing the three experiments, all models performed well, achieving over 97% test accuracy. Here's what I observed:

- The **256-neuron model** performed slightly lower during training but nearly matched the validation and test accuracy of the 512-neuron model. This suggests it was slightly more efficient but had slightly less capacity to learn complex features.

- The **512-neuron model** offered strong overall performance and balanced training time with accuracy. It had the best performance per time invested and is a solid middle ground.

- The **1024-neuron model** achieved the highest accuracy across the board. However, it also had **the longest training time**, and the improvement in accuracy was small. This could mean diminishing returns on adding complexity — possibly even approaching overfitting if pushed further.

Overall, increasing the number of neurons improves accuracy up to a point, but too many neurons can make the model slower and harder to generalize.


SyntaxError: invalid syntax (<ipython-input-1-fcecf8251ce7>, line 4)