In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import keras_tuner as kt
from tensorflow.keras.optimizers import Adam

### Importing the data, preprocessing it, and splitting into train and test

In [53]:
bean_data = pd.read_csv('Dry_Bean_Dataset.csv')

X=bean_data.drop(columns=['Class'])

min_max = MinMaxScaler(feature_range=(-1, 1))
X_scaled = min_max.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled)
X_scaled

y = bean_data.Class
# Encode labels if they are categorical
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

In [42]:
pd.DataFrame(y_train).value_counts(normalize=True)

0
3    0.260562
6    0.193699
5    0.148880
4    0.141624
2    0.119765
0    0.097079
1    0.038391
Name: proportion, dtype: float64

In [43]:
pd.DataFrame(y_test).value_counts(normalize=True)

0
3    0.260375
6    0.193537
5    0.149100
4    0.141755
2    0.119721
0    0.097319
1    0.038193
Name: proportion, dtype: float64

### Forming the model and making predictions

In [None]:
# Define the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(16,)),  # Input layer with 16 features
    Dropout(0.5),
    Dense(32, activation='relu'),  # Hidden layer
    Dropout(0.5),
    Dense(len(np.unique(y)), activation='softmax')  # Output layer for multi-class classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # Use sparse if labels are integers
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, 
                    validation_split=0.2, 
                    epochs=50, 
                    batch_size=32, 
                    verbose=1)

# Evaluate on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

In [19]:
# Define the model-building function
def build_model(hp):
    model = Sequential()
    # Input layer and first hidden layer
    model.add(Dense(
        units=hp.Int('units_layer1', min_value=32, max_value=256, step=32),
        activation='relu',
        input_shape=(16,)
    ))
    model.add(Dropout(hp.Float('dropout_layer1', min_value=0.2, max_value=0.5, step=0.1)))

    # Second hidden layer
    model.add(Dense(
        units=hp.Int('units_layer2', min_value=32, max_value=128, step=32),
        activation='relu'
    ))
    model.add(Dropout(hp.Float('dropout_layer2', min_value=0.2, max_value=0.5, step=0.1)))

    # Output layer
    model.add(Dense(len(np.unique(y)), activation='softmax'))

    # Compile the model
    model.compile(
        optimizer=Adam(learning_rate=hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

# Define the tuner
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,  # Number of models to try
    directory='my_dir',
    project_name='tune_tabular_model'
)

# Run the tuner
tuner.search(X_train, y_train, validation_split=0.2, epochs=50, batch_size=32)

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best Hyperparameters: {best_hps.values}")

# Train the best model
best_model = tuner.hypermodel.build(best_hps)
history = best_model.fit(X_train, y_train, validation_split=0.2, epochs=50, batch_size=32)


Reloading Tuner from my_dir\tune_tabular_model\tuner0.json
Best Hyperparameters: {'units_layer1': 192, 'dropout_layer1': 0.30000000000000004, 'units_layer2': 64, 'dropout_layer2': 0.30000000000000004, 'learning_rate': 0.001}
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
# predictions
predictions = best_model.predict(X_test)
predicted_classes = np.argmax(predictions, axis=1)



### Evaluation Metrics

In [27]:
#Add these above later
# Libraries for metrics and evaluation
import numpy as np
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    log_loss, roc_auc_score, roc_curve, precision_recall_curve,
    cohen_kappa_score
)
from sklearn.preprocessing import label_binarize

# Libraries for visualization
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# TensorFlow/Keras for top-k accuracy (if needed)
from tensorflow.keras.metrics import top_k_categorical_accuracy

In [26]:
# Evaluate on the test set
test_loss, test_accuracy = best_model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Test Accuracy: 92.07%


In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, predicted_classes, target_names=label_encoder.classes_))

              precision    recall  f1-score   support

    BARBUNYA       0.94      0.89      0.92       265
      BOMBAY       1.00      1.00      1.00       104
        CALI       0.91      0.95      0.93       326
    DERMASON       0.94      0.88      0.91       709
       HOROZ       0.97      0.95      0.96       386
       SEKER       0.96      0.95      0.95       406
        SIRA       0.83      0.91      0.87       527

    accuracy                           0.92      2723
   macro avg       0.93      0.93      0.93      2723
weighted avg       0.92      0.92      0.92      2723

