<a href="https://www.kaggle.com/code/najeebz/s4e3-tensorflow-cnn-hyperparameter-tuning-optuna?scriptVersionId=167212163" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# [Steel Plate Defect Prediction](https://www.kaggle.com/competitions/playground-series-s4e3)
### Playground Series - Season 4, Episode 3

​
_______________________________________________________________________ 
# Author Details:
- Name: Najeeb Haider Zaidi
- Email: zaidi.nh@gmail.com
- Profiles: [Github](https://github.com/snajeebz)  [LinkedIn](https://www.linkedin.com/in/najeebz) [Kaggle](https://www.kaggle.com/najeebz)
- Prepared for the submission to the competition.
________________________________________________________________________
# Attributions:

​
1. [Walter Reade, Ashley Chow. (2024). Steel Plate Defect Prediction. Kaggle.](https://kaggle.com/competitions/playground-series-s4e3)

________________________________________________________________________
•
This Notebook is to be submitted to the competition so aims to start the process from the beginning to the creation of the submission csv file in proper format.
__________________________________________________________________________
# Code Execution and Versioning Repository: 
- [Execute the notebook in Kaggle](https://www.kaggle.com/code/najeebz/steel-plate-defect-tensorflow-neural-net-optuna)
- [Github Repository](https://github.com/snajeebz/playground)
•
____________________________________________________________________
# Citation:
​
Najeeb Zaidi. (2024). Steel Plate Defect Prediction. Competition Submission. Kaggle. https://www.kaggle.com/code/najeebz/steel-plate-defect-tensorflow-neural-net-optuna

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
from warnings import filterwarnings;
filterwarnings('ignore')
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Reading the Dataset

In [None]:
df=pd.read_csv('/kaggle/input/playground-series-s4e3/train.csv')
test=pd.read_csv('/kaggle/input/playground-series-s4e3/test.csv')

# Initial EDA

In [None]:
df.head(10)

In [None]:
df.isnull().sum()


In [None]:
df.columns

In [None]:
test.columns

In [None]:
df.nunique()

In [None]:
train_df=df.copy()

In [None]:
train_df['summary']=train_df['Pastry']+train_df['Z_Scratch']+train_df['K_Scatch']+train_df['Stains']+train_df['Dirtiness']+train_df['Bumps']+train_df['Other_Faults']

In [None]:
train_df['summary'].value_counts()

There are 21 rows in which we have more than one column True i-e two defects.
and 818 where none of the columns are true, so no defects.

In [None]:
train_df[['Pastry', 'Z_Scratch', 'K_Scatch', 'Stains',
       'Dirtiness', 'Bumps', 'Other_Faults']].value_counts()

In [None]:
cat_cols=train_df[['Pastry', 'Z_Scratch', 'K_Scatch', 'Stains',
       'Dirtiness', 'Bumps', 'Other_Faults', 'TypeOfSteel_A300','TypeOfSteel_A300','Outside_Global_Index', 'summary' ]]
num_cols=train_df.drop(columns=['id','Pastry', 'Z_Scratch', 'K_Scatch', 'Stains',
       'Dirtiness', 'Bumps', 'Other_Faults', 'TypeOfSteel_A300','TypeOfSteel_A300','Outside_Global_Index', 'summary' ])

# Visualizations

# Categorical Columns Count Plots

In [None]:
for col in cat_cols:
    plt.figure(figsize=[15,7])
    sns.countplot(df,x=train_df[col]).set(title= col+' Value Distribution')
    plt.show()

# Numerical Columns Histograms

In [None]:
for col in num_cols:
    plt.figure(figsize=[10,7])
    sns.distplot(train_df[col],kde=True).set(title= col+' Histogram')
    plt.axvline(train_df[col].mean(),color='r', label='Mean')
    plt.axvline(train_df[col].median(),color='y', linestyle='--',label='Median')
    plt.legend()
    plt.show()

# Correlation Plot to assess the correlation between the features

In [None]:
corr = train_df.corr()
# plot the heatmap
plt.figure(figsize=(30,30))
s=sns.heatmap(corr,annot=True, cmap='crest')

# Preparing Testing and Training Datasets.

## Removing the 22 rows with multiple defects


In [None]:
train_df=train_df[train_df['summary']<=1]

In [None]:
def scale(X):
    from sklearn import preprocessing
    scaled=preprocessing.StandardScaler()
    scaler=scaled.fit(X)
    X=scaler.transform(X)
    return X

In [None]:
X=train_df.drop(columns=['Pastry', 'Z_Scratch', 'K_Scatch', 'Stains',
       'Dirtiness', 'Bumps', 'Other_Faults','summary' ])
y=train_df[['Pastry', 'Z_Scratch', 'K_Scatch', 'Stains',
       'Dirtiness', 'Bumps', 'Other_Faults' ]]

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(scale(X),y,train_size=0.75, random_state=42)
X.keys()
test_df=scale(test)

In [None]:
def class_dict_balance(a):
    from sklearn.utils.class_weight import compute_class_weight
    class_dict={}
    for class_name in a.columns:
        class_weights=compute_class_weight(class_weight="balanced", classes=a[class_name].unique(), y=a[class_name])
        class_dict[class_name]= class_weights
    return class_dict
class_dict=dict(class_dict_balance(y))
print('Final Weight Dict: ',class_dict)

# Training the Model

In [None]:
def evaluate(y_test,ypred):
    from sklearn.metrics import precision_score
    from sklearn.metrics import recall_score
    from sklearn.metrics import f1_score
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import confusion_matrix
    print("Accuracy: ",accuracy_score(y_test,y_pred)) 
    print("Recall Score: ", recall_score(y_test,y_pred, average='macro')) #Recall measures the proportion of true positive predictions among all actual positive instalnces. If we predicted 100 survived correctly whereas actually 100 survived out of which 67 predicted correctly so recall will be 0.67
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(10, 10))
    s=sns.heatmap(cm,annot=True, cmap='Reds')

In [None]:
def plot_history(history):
    hist1=pd.DataFrame(history.history)
    hist1['epoch']=history.epoch
    
   # 'CategoricalCrossentropy', 'accuracy', 'loss', 'mae',
    #   'val_CategoricalCrossentropy', 'val_accuracy', 'val_loss', 'val_mae',
     #  'epoch'
#hist1=hist.tail(550)
#sns.lineplot(data=histsamp, y='val_mse', x='epoch')
    print(hist1.columns)
    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Mean Absolute Error')
    plt.plot(hist1['epoch'],hist1['mae'], label='Train Error')
    plt.plot(hist1['epoch'], hist1['val_mae'], label='Val_Error')
    plt.legend()
#plt.ylim([0, 0.2])
    if 'CategoricalCrossentropy' in hist1.columns:
        plt.figure()
        plt.xlabel('Epoch')
        plt.ylabel('Categorical Crossentropy')
        plt.plot(hist1['epoch'],hist1['CategoricalCrossentropy'], label='Train CCE')
        plt.plot(hist1['epoch'], hist1['val_CategoricalCrossentropy'], label='Val_CCE')
        plt.legend()
    
    if 'accuracy' in hist1.columns:
        plt.figure()
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.plot(hist1['epoch'],hist1['accuracy'], label='Accuracy')
        plt.plot(hist1['epoch'], hist1['val_accuracy'], label='Val Accuracy')
        plt.legend()
    
    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.plot(hist1['epoch'],hist1['loss'], label='Loss')
    plt.plot(hist1['epoch'], hist1['val_loss'], label='Val Loss')
    plt.legend()

# Parameters Tuning by Optuna

    
 [The Trial Study deployed is similar to as in the notebook with few changes relevant to this dataset](https://www.kaggle.com/code/najeebz/titanic-deep-learning-tensorflow-optuna-optimized)

In [None]:
# Optuna example that optimizes multi-layer perceptrons using Tensorflow (Eager Execution).

import optuna
from packaging import version

import tensorflow as tf

BATCHSIZE = 32
CLASSES = 7
EPOCHS = 10
LOSS=tf.keras.losses.CategoricalCrossentropy(from_logits=True)
EndActivation= 'softmax'


def create_model(trial):
    # We optimize the numbers of layers, their units and weight decay parameter.
    n_layers = trial.suggest_int("n_layers", 1, 3)
    weight_decay = trial.suggest_float("weight_decay", 1e-9, 1e-5, log=True)
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.BatchNormalization(input_shape=[len(X.keys())]))
    for i in range(n_layers):
        num_hidden = trial.suggest_int("n_units_l{}".format(i), 4, 1024, log=True)
        model.add(
            tf.keras.layers.Dense(
                num_hidden,
                activation="relu",
                kernel_regularizer=tf.keras.regularizers.l2(weight_decay),
            )
        )
    model.add(
        tf.keras.layers.Dense(CLASSES, activation=EndActivation, kernel_regularizer=tf.keras.regularizers.l2(weight_decay))
    )
    return model


def create_optimizer(trial):
    # We optimize the choice of optimizers as well as their parameters.
    kwargs = {}
    optimizer_options = [
                   #      "RMSprop", 
                   #      "Adam", 
                   #      "SGD", 
                   #      'Nadam',
                         'Lion',
                   #      'Ftrl',
                   #      'Adamax',
                   #      'AdamW',
                   #      'Adagrad',
                   #      'Adafactor',
                   #      'Adadelta'
                        ]
    optimizer_selected = trial.suggest_categorical("optimizer", optimizer_options)
    if optimizer_selected == "RMSprop":
        kwargs["learning_rate"] = trial.suggest_float(
            "RMSprop_learning_rate", 1e-7, 1e-4, log=True
        )
        kwargs["weight_decay"] = trial.suggest_float("RMSprop_weight_decay", 0.85, 0.99)
        kwargs["momentum"] = trial.suggest_float("RMSprop_momentum", 1e-3, 1e-1, log=True)
    elif optimizer_selected == "Adam":
        kwargs["learning_rate"] = trial.suggest_float("Adam_learning_rate", 1e-7, 1e-4, log=True)
    elif optimizer_selected == "SGD":
        kwargs["learning_rate"] = trial.suggest_float(
            "SGD_learning_rate", 1e-5, 1e-1, log=True
        )
        kwargs["momentum"] = trial.suggest_float("SGD_momentum", 1e-5, 1e-1, log=True)
    elif optimizer_selected == "Nadam":
        kwargs["learning_rate"] = trial.suggest_float("Nadam_learning_rate", 1e-7, 1e-4, log=True)
    elif optimizer_selected == "Lion":
        kwargs["learning_rate"] = trial.suggest_float("Lion_learning_rate", 1e-7, 1e-4, log=True)
    elif optimizer_selected == "Ftrl":
        kwargs["learning_rate"] = trial.suggest_float("Ftrl_learning_rate", 1e-7, 1e-4, log=True)
    elif optimizer_selected == "Adamax":
        kwargs["learning_rate"] = trial.suggest_float("Adamax_learning_rate", 1e-7, 1e-4, log=True)
    elif optimizer_selected == "AdamW":
        kwargs["learning_rate"] = trial.suggest_float("AdamW_learning_rate", 1e-9, 1e-5, log=True)
    elif optimizer_selected == "Adagrad":
        kwargs["learning_rate"] = trial.suggest_float("Adagrad_learning_rate", 1e-7, 1e-4, log=True)
    elif optimizer_selected == "Adafactor":
        kwargs["learning_rate"] = trial.suggest_float("Adafactor_learning_rate", 1e-7, 1e-4, log=True)
    elif optimizer_selected == "Adadelta":
        kwargs["learning_rate"] = trial.suggest_float("Adadelta_learning_rate", 1e-7, 1e-4, log=True)

    optimizer = getattr(tf.optimizers, optimizer_selected)(**kwargs)
    return optimizer


def objective(trial):

    # Build model and optimizer.
    model = create_model(trial)
    optimizer = create_optimizer(trial)
    model.compile(
        loss=LOSS,
        optimizer=optimizer,
        metrics=["accuracy"],
    )

    model.fit(
        X_train,
        y_train,
        validation_data=(X_test, y_test),
        shuffle=True,
     #   batch_size=BATCHSIZE,
        epochs=EPOCHS,
        class_weight=class_dict,
        verbose=False,
    )
#x_train,x_test,y_tftrain,y_tftest
    # Evaluate the model accuracy on the validation set.
    score = model.evaluate(X_test, y_test, verbose=0)
    return score[1]

if __name__ == "__main__":
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=1000)

    print("Number of finished trials: ", len(study.trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)
    print('EPOCHS: ',EPOCHS)
    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

In [None]:
def get_optimizer(params):
    optimizer_name = params["optimizer"]
    learning_rate=params[optimizer_name+'_learning_rate']
    if optimizer_name == "Adam":
        return tf.keras.optimizers.Adam(learning_rate=learning_rate)
    elif optimizer_name == "RMSprop":
        wd=params[optimizer_name+'_weight_decay']
        momentum=params[optimizer_name+'_momentum']
        return tf.keras.optimizers.RMSprop(learning_rate=learning_rate, weight_decay=wd,momentum=momentum)
    elif optimizer_name == "SGD":
        momentum=params[optimizer_name+'_momentum']
        return tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=momentum)
    elif optimizer_name == "Nadam":
        return tf.keras.optimizers.Nadam(learning_rate=learning_rate)
    elif optimizer_name == "Lion":
        return tf.keras.optimizers.Lion(learning_rate=learning_rate)
    elif optimizer_name == "Ftrl":
        return tf.keras.optimizers.Ftrl(learning_rate=learning_rate)
    elif optimizer_name == "Adamax":
        return tf.keras.optimizers.Adamax(learning_rate=learning_rate)
    elif optimizer_name == "AdamW":
        return tf.keras.optimizers.AdamW(learning_rate=learning_rate)
    elif optimizer_name == "Adagrad":
        return tf.keras.optimizers.Adagrad(learning_rate=learning_rate)
    elif optimizer_name == "Adafactor":
        return tf.keras.optimizers.Adafactor(learning_rate=learning_rate)
    elif optimizer_name == "Adadelta":
        return tf.keras.optimizers.Adadelta(learning_rate=learning_rate)
    else:
        raise ValueError(f"Unsupported optimizer: {optimizer_name}")

# Creating the Model

In [None]:
best_trial = study.best_trial
best_params = best_trial.params
mod = create_model(study.best_trial)
mod.compile(optimizer=get_optimizer(best_trial.params), loss=LOSS, 
                                   metrics=['mae','accuracy',LOSS])
mod.summary()

In [None]:
print(class_dict)

# Training the Model

In [None]:
epoch=EPOCHS*10
early_stop=tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',
                                            min_delta=0.0001,
                                            patience=(20),
                                            start_from_epoch=(epoch/10),
                                            restore_best_weights=True,
                                            mode='auto'
                                            )
history=mod.fit(X_train,y_train, 
                epochs=epoch, 
                verbose=1, 
                validation_data=(X_test, y_test),
                callbacks=[early_stop], 
                class_weight=class_dict
               )
print('Results: ',mod.evaluate(X_test,y_test))

# Evaluating the Training Process

In [None]:
plot_history(history)

# Predicting the Results

In [None]:
result=np.round(mod.predict(test_df),1)

In [None]:
result

# Making the Submission File

In [None]:
submission = pd.DataFrame( result, columns=y.keys())

In [None]:
submission.insert(0, 'id', test['id']) 

In [None]:
submission

In [None]:
submission.to_csv('submission.csv', index=False)

In [None]:
submission.head(30)