In [35]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
import pandas as pd 
import numpy as np
import keras
from tensorflow.keras import layers, models, regularizers
import keras_tuner as kt
from tensorflow.keras.utils import to_categorical
from tensorflow import keras 
from tensorflow.keras import layers
import seaborn as sns 
import matplotlib.pyplot as plt 
from tensorflow.keras.models import load_model

In [36]:
df=pd.read_csv("train.csv") 
X=df.drop(columns=["label"],inplace=False).copy() 
y=np.array(df["label"].values.copy())
num_classes=np.unique(y).shape[0] 
y= tf.keras.utils.to_categorical(y, num_classes=num_classes)


In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 

In [38]:
def normalize_img(x, y): 
    x=tf.reshape(x,(28,28,1))
    x=tf.cast(x,tf.float32)/255
    return x,y

def augment(x,y): 
    scale = tf.random.uniform([], 0.9, 1.1)
    x = tf.image.resize(x, [int(28 * scale), int(28 * scale)])
    x = tf.image.resize_with_crop_or_pad(x, 28, 28)
    return x,y

BATCH_SIZE = 64
AUTOTUNE = tf.data.experimental.AUTOTUNE

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = (
    train_dataset.shuffle(buffer_size=1000)
    .map(normalize_img, num_parallel_calls=AUTOTUNE) 
    .map(augment, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    .map(lambda x, y: (x, tf.cast(y, tf.float32)))
    .batch(batch_size=BATCH_SIZE)
    .prefetch(buffer_size=AUTOTUNE) 
)

test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_dataset = (
    test_dataset
    .map(normalize_img, num_parallel_calls=AUTOTUNE)
    .map(augment, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    .map(lambda x, y: (x, tf.cast(y, tf.float32)))
    .batch(batch_size=BATCH_SIZE)
    .prefetch(buffer_size=AUTOTUNE) 
)



In [39]:
print(train_dataset.element_spec)

(TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None, 10), dtype=tf.float32, name=None))


In [40]:
def buildModel(): 
    inputs = layers.Input(shape=(28,28,1))
    x=layers.Conv2D(32,(3,3))(inputs) 
    x=layers.BatchNormalization()(x) 
    x = keras.activations.relu(x)
    x = layers.MaxPooling2D()(x)
    x= layers.Conv2D(64, 5, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = keras.activations.relu(x)
    x = layers.Conv2D(128, 3)(x)
    x = layers.BatchNormalization()(x)
    x = keras.activations.relu(x)
    x= layers.Flatten()(x)
    x = layers.Dense(64, activation='relu')(x)
    outputs = layers.Dense(10 ,activation='softmax')(x)
    model=keras.Model(inputs=inputs,outputs=outputs) 
    return model

In [41]:
model=buildModel() 
model.compile( 
    loss=keras.losses.CategoricalCrossentropy, 
    optimizer="adam", 
    metrics=["accuracy"])
#history=model.fit(train_dataset,epochs=10,verbose=2,validation_data=test_dataset)

In [42]:
def plotTrainingModel(history): 
    
    TrainingAccuracy = history.history["accuracy"]
    ValidationAccuracy = history.history["val_accuracy"]
    numOfEpochs=len(TrainingAccuracy)
    epochVector=range(1,numOfEpochs+1)

    TraingLoss=history.history['loss'] 

    fig, axes = plt.subplots(1, 2, figsize=(12, 5)) 

    axes[0].set_title("ACCURACY")
    sns.scatterplot(ax=axes[0],x=epochVector,y=TrainingAccuracy,color="blue")
    sns.lineplot(ax=axes[0],x=epochVector,y=TrainingAccuracy,marker="o", label="TrainingAccuracy")
    sns.scatterplot(ax=axes[0],x=epochVector,y=ValidationAccuracy,color="red")
    sns.lineplot(ax=axes[0],x=epochVector,y=ValidationAccuracy,marker="o", label="ValidationAccuracy")

    axes[1].set_title("LOSS")
    sns.scatterplot(ax=axes[1],x=epochVector,y=TraingLoss,color="blue")
    sns.lineplot(ax=axes[1],x=epochVector,y=TraingLoss,marker="o", label="Loss")
    
    plt.tight_layout()
    plt.legend() 
    plt.show()

In [43]:
#plotTrainingModel(history)

In [44]:
def build_model(hp):
    inputs = layers.Input(shape=(28, 28, 1))

    x = layers.Conv2D(
        filters=hp.Int('filters_1', min_value=16, max_value=128, step=16),
        kernel_size=(3, 3),
        activation='relu'
    )(inputs)
    
    x = layers.BatchNormalization()(x)
    
    x = layers.MaxPooling2D()(x)

    x = layers.Dropout(rate=hp.Float('dropout_1', min_value=0, max_value=0.05, step=0.01))(x)
    x = layers.Conv2D(
        filters=hp.Int('filters_2', min_value=32, max_value=256, step=32),
        kernel_size=hp.Choice('kernel_size_2', values=[3, 5]),  
        activation='relu',
        padding='same'
    )(x)
    x = layers.BatchNormalization()(x)

    x = layers.Dropout(rate=hp.Float('dropout_2', min_value=0, max_value=0.05, step=0.01))(x)
    x = layers.Conv2D(
        filters=hp.Int('filters_3', min_value=64, max_value=512, step=64),
        kernel_size=3,
        activation='relu'
    )(x) 
    
    x = layers.BatchNormalization()(x)

  
    x = layers.Flatten()(x)

    
    x = layers.Dense(
        units=hp.Int('dense_units', min_value=32, max_value=256, step=32),
        activation='relu'
    )(x)

    outputs = layers.Dense(10, activation='softmax')(x)

    model = keras.Model(inputs=inputs, outputs=outputs)

    model.compile(
        optimizer=keras.optimizers.Adam(
            learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
        loss=keras.losses.CategoricalCrossentropy,metrics=['accuracy'])

    return model


In [49]:
tuner = kt.RandomSearch(
    build_model,  
    objective='val_accuracy',  
    max_trials=8,  
    executions_per_trial=1, 
    directory='hyperparam_tuning',
    project_name='cnn_tuning'
)

In [50]:
callbacks_list = [
    keras.callbacks.EarlyStopping(
        monitor="val_accuracy",
        patience=2
    )
]

In [51]:
tuner.search(train_dataset,
             epochs=15,
             validation_data=test_dataset, 
                callbacks=callbacks_list)

Trial 8 Complete [00h 10m 36s]
val_accuracy: 0.9891666769981384

Best val_accuracy So Far: 0.9897618889808655
Total elapsed time: 01h 23m 56s


In [55]:
best_model = tuner.get_best_models(num_models=1)[0]

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
final_model = build_model(best_hps)

  saveable.load_own_variables(weights_store.get(inner_path))


## FINAL

In [56]:
final_earlystop_cb = keras.callbacks.EarlyStopping(
    monitor="accuracy",    
    patience=2,
    restore_best_weights=True
)

In [1]:
X_full = df.drop(columns=["label"]).values  
y_full = df["label"].values                

num_classes = len(np.unique(y_full))
y_full_oh = tf.keras.utils.to_categorical(y_full, num_classes=num_classes)
full_train_dataset = tf.data.Dataset.from_tensor_slices((X_full, y_full_oh))
full_train_dataset = (
    full_train_dataset
    .shuffle(buffer_size=10000)
    .map(normalize_img, num_parallel_calls=AUTOTUNE)
    .map(augment, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(AUTOTUNE)
)
history = final_model.fit(
    full_train_dataset,
    epochs=20,
    verbose=2,
    callbacks=[final_earlystop_cb]
)
stopped_epoch = final_earlystop_cb.stopped_epoch
patience     = final_earlystop_cb.patience
best_epoch   = stopped_epoch - patience + 1  

print(f"Best epoch from final training: {best_epoch}")

final_model = build_model(best_hps)
history_final = final_model.fit(
    full_train_dataset,
    epochs=best_epoch,
    verbose=2
)

NameError: name 'df' is not defined

In [61]:
df_test = pd.read_csv("test.csv")
X_test = df_test.values  

def normalize_test(x):
    x = tf.reshape(x, (28, 28, 1))
    x = tf.cast(x, tf.float32) / 255.0
    return x

test_dataset = tf.data.Dataset.from_tensor_slices(X_test)
test_dataset = (
    test_dataset
    .map(normalize_test, num_parallel_calls=AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(AUTOTUNE)
)

predictions = final_model.predict(test_dataset)
predicted_classes = np.argmax(predictions, axis=1)

print("Sample Predictions:", predicted_classes[:10])

submission_df = pd.DataFrame({
    "ImageId": np.arange(1, len(predicted_classes) + 1),
    "Label": predicted_classes
})
submission_df.to_csv("submission.csv", index=False)

[1m438/438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 22ms/step
Sample Predictions: [2 0 9 0 3 7 0 3 0 3]
Predictions saved to submission.csv!
