# Model Experiments

## Penguin classification

In [3]:
# load dataset
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder

df = pd.read_csv('../data/penguins.csv')
df = df.dropna()

le_species = LabelEncoder()
df['species'] = le_species.fit_transform(df['species'])
le_island = LabelEncoder()
df['island'] = le_island.fit_transform(df['island'])
le_sex = LabelEncoder()
df['sex'] = le_sex.fit_transform(df['sex'])


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

X = df.drop('species', axis=1)
y = df['species']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        30
           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00        24

    accuracy                           1.00        68
   macro avg       1.00      1.00      1.00        68
weighted avg       1.00      1.00      1.00        68

Confusion Matrix:
 [[30  0  0]
 [ 0 14  0]
 [ 0  0 24]]


## Image number classification

In [1]:
# load mnist dataset using keras
import keras
from keras.datasets import mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D
import numpy as np

# MNIST model meta parameters
num_classes = 10
input_shape = (28, 28, 1)
batch_size = 128
epochs = 15

# Load MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255

# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)

x_test = x_test.astype("float32") / 255
x_test = np.expand_dims(x_test, -1)
y_test = keras.utils.to_categorical(y_test, num_classes)


2025-07-20 15:07:50.555604: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-20 15:07:50.565053: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-07-20 15:07:50.583536: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753016870.628954   25579 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753016870.639294   25579 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1753016870.673482   25579 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linkin

In [3]:
# Build the model

from keras.layers import Input

model_mnist = Sequential()

model_mnist.add(Input(shape=input_shape))
model_mnist.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
model_mnist.add(MaxPooling2D(pool_size=(2, 2)))
model_mnist.add(Conv2D(64, (3, 3), activation='relu'))
model_mnist.add(MaxPooling2D(pool_size=(2, 2)))
model_mnist.add(Flatten())
model_mnist.add(Dense(128, activation='relu'))
model_mnist.add(Dense(num_classes, activation='softmax'))



In [5]:
# compile and train the model
model_mnist.compile(loss='categorical_crossentropy',
                    optimizer='adam',
                    metrics=['accuracy'])

model_mnist.fit(x_train, y_train,
                batch_size=batch_size,
                epochs=epochs,
                validation_data=(x_test, y_test))

Epoch 1/15


2025-07-20 15:10:50.995968: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 188160000 exceeds 10% of free system memory.


[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 37ms/step - accuracy: 0.8535 - loss: 0.4999 - val_accuracy: 0.9794 - val_loss: 0.0647
Epoch 2/15
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 35ms/step - accuracy: 0.9796 - loss: 0.0639 - val_accuracy: 0.9860 - val_loss: 0.0432
Epoch 3/15
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 35ms/step - accuracy: 0.9867 - loss: 0.0434 - val_accuracy: 0.9872 - val_loss: 0.0377
Epoch 4/15
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 35ms/step - accuracy: 0.9903 - loss: 0.0305 - val_accuracy: 0.9893 - val_loss: 0.0318
Epoch 5/15
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 35ms/step - accuracy: 0.9920 - loss: 0.0258 - val_accuracy: 0.9870 - val_loss: 0.0365
Epoch 6/15
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 35ms/step - accuracy: 0.9936 - loss: 0.0202 - val_accuracy: 0.9901 - val_loss: 0.0300
Epoch 7/15
[1m469/469[0m 

<keras.src.callbacks.history.History at 0x734bb72a4670>

In [7]:
# evaluate the model
score = model_mnist.evaluate(x_test, y_test, verbose=0)

print(f"Final Model Performance:")
print(f"- Test Loss: {score[0]:.4f}")
print(f"- Test Accuracy: {score[1]:.2%}")
print(f"- Error Rate: {(1-score[1]):.2%}")

Final Model Performance:
- Test Loss: 0.0368
- Test Accuracy: 99.03%
- Error Rate: 0.97%


In [None]:
# save the model
model_mnist.save('../models/mnist_model.h5')