In [4]:
!pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [11]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Dropout , Flatten
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint , EarlyStopping 
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import StandardScaler


In [5]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
covertype = fetch_ucirepo(id=31) 
  
# data (as pandas dataframes) 
X = covertype.data.features 
y = covertype.data.targets 
  
# metadata 
print(covertype.metadata) 
  
# variable information 
print(covertype.variables) 


{'uci_id': 31, 'name': 'Covertype', 'repository_url': 'https://archive.ics.uci.edu/dataset/31/covertype', 'data_url': 'https://archive.ics.uci.edu/static/public/31/data.csv', 'abstract': 'Classification of pixels into 7 forest cover types based on attributes such as elevation, aspect, slope, hillshade, soil-type, and more.', 'area': 'Biology', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 581012, 'num_features': 54, 'feature_types': ['Categorical', 'Integer'], 'demographics': [], 'target_col': ['Cover_Type'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1998, 'last_updated': 'Sat Mar 16 2024', 'dataset_doi': '10.24432/C50K5N', 'creators': ['Jock Blackard'], 'intro_paper': None, 'additional_info': {'summary': 'Predicting forest cover type from cartographic variables only (no remotely sensed data).  The actual forest cover type for a given observation (30 x 30 meter cell) was determined from

In [8]:
print(X.head())
print(y.head())


   Elevation  Aspect  Slope  Horizontal_Distance_To_Hydrology  \
0       2596      51      3                               258   
1       2590      56      2                               212   
2       2804     139      9                               268   
3       2785     155     18                               242   
4       2595      45      2                               153   

   Vertical_Distance_To_Hydrology  Horizontal_Distance_To_Roadways  \
0                               0                              510   
1                              -6                              390   
2                              65                             3180   
3                             118                             3090   
4                              -1                              391   

   Hillshade_9am  Hillshade_Noon  Hillshade_3pm  \
0            221             232            148   
1            220             235            151   
2            234             238   

In [9]:
print(X.shape, y.shape)


(581012, 54) (581012, 1)


In [32]:
X_train_df, X_temp_df, y_train_df, y_temp_df = train_test_split(X, y, test_size=0.3, random_state=42)
X_val_df, X_test_df, y_val_df, y_test_df = train_test_split(X_temp_df, y_temp_df, test_size=0.5, random_state=42)

In [33]:
numeric_features = X_train_df.columns[:10]
numeric_features

Index(['Elevation', 'Aspect', 'Slope', 'Horizontal_Distance_To_Hydrology',
       'Vertical_Distance_To_Hydrology', 'Horizontal_Distance_To_Roadways',
       'Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm',
       'Horizontal_Distance_To_Fire_Points'],
      dtype='object')

In [None]:
scaler = StandardScaler()

X_train_scaled = X_train_df.copy()
X_val_scaled = X_val_df.copy()
X_test_scaled = X_test_df.copy()

X_train_scaled[numeric_features] = scaler.fit_transform(X_train_df[numeric_features])
X_val_scaled[numeric_features] = scaler.transform(X_val_df[numeric_features])
X_test_scaled[numeric_features] = scaler.transform(X_test_df[numeric_features])

X_train = X_train_scaled.values
X_val = X_val_scaled.values
X_test = X_test_scaled.values

y_train = tf.keras.utils.to_categorical(y_train_df.values.flatten() - 1, num_classes=7)
y_val = tf.keras.utils.to_categorical(y_val_df.values.flatten() - 1, num_classes=7)
y_test = tf.keras.utils.to_categorical(y_test_df.values.flatten() - 1, num_classes=7)

In [36]:
# 54 input features
# 7 output classes
model = Sequential()
model.add(Flatten(input_shape=(54,)))
model.add(Dense(128, activation='relu'))
model.add(Dense(7, activation='softmax'))
model.summary()

In [37]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
                metrics=['accuracy','auc'])

In [38]:
history = model.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    epochs=20)
                    

Epoch 1/20
[1m12710/12710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 3ms/step - accuracy: 0.7336 - auc: 0.9625 - loss: 0.6327 - val_accuracy: 0.7866 - val_auc: 0.9767 - val_loss: 0.4936
Epoch 2/20
[1m12710/12710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 3ms/step - accuracy: 0.7946 - auc: 0.9781 - loss: 0.4785 - val_accuracy: 0.8024 - val_auc: 0.9801 - val_loss: 0.4531
Epoch 3/20
[1m12710/12710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 3ms/step - accuracy: 0.8110 - auc: 0.9812 - loss: 0.4422 - val_accuracy: 0.8220 - val_auc: 0.9826 - val_loss: 0.4240
Epoch 4/20
[1m12710/12710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 3ms/step - accuracy: 0.8227 - auc: 0.9830 - loss: 0.4187 - val_accuracy: 0.8203 - val_auc: 0.9829 - val_loss: 0.4178
Epoch 5/20
[1m12710/12710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 3ms/step - accuracy: 0.8300 - auc: 0.9843 - loss: 0.4017 - val_accuracy: 0.8309 - val_auc: 0.9842 - val_loss: 0.4024
Epoch

In [39]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.8-py3-none-any.whl.metadata (5.6 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.8-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.4/129.4 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.8 kt-legacy-1.0.5


In [58]:
def build_model(hp):
    reg_method = hp.Choice('regularization_method', values=['dropout', 'l2'])
    
    model = Sequential()
    
    if reg_method == 'l2':
        l2_reg = hp.Choice('l2_value', values=[0.001, 0.01, 0.1])
        
        model.add(
            Dense(units=hp.Int('units_1', min_value=32, max_value=256, step=32), 
                  activation='relu', 
                  input_shape=(54,),
                  kernel_regularizer=l2(l2_reg))
        )
        model.add(
            Dense(units=hp.Int('units_2', min_value=32, max_value=256, step=32), 
                  activation='relu',
                  kernel_regularizer=l2(l2_reg))
        )
    else:  # dropout
        dropout_rate = hp.Choice('dropout_rate', values=[0.2, 0.3, 0.5])
        
        model.add(
            Dense(units=hp.Int('units_1', min_value=32, max_value=256, step=32), 
                  activation='relu', 
                  input_shape=(54,))
        )
        model.add(Dropout(dropout_rate))
        
        model.add(
            Dense(units=hp.Int('units_2', min_value=32, max_value=256, step=32), 
                  activation='relu')
        )
        model.add(Dropout(dropout_rate))

    model.add(Dense(7, activation='softmax'))  # 7 classes for covertype

    model.compile(optimizer=Adam(learning_rate=hp.Choice('learning_rate', values=[0.001,0.01])),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [59]:
from kerastuner.tuners import RandomSearch

tuner = RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    directory='covertype_tuning',
    project_name='covertype_hyperparameter_search'
)

Reloading Tuner from covertype_tuning/covertype_hyperparameter_search/tuner0.json


In [60]:
tuner.search(X_train, y_train, epochs=5, validation_data=(X_val, y_val) , batch_size=32)


In [61]:
tuner.results_summary()

Results summary
Results in covertype_tuning/covertype_hyperparameter_search
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 4 summary
Hyperparameters:
regularization_method: dropout
dropout_rate: 0.3
units_1: 256
units_2: 192
learning_rate: 0.001
l2_value: 0.1
Score: 0.8632733821868896

Trial 3 summary
Hyperparameters:
regularization_method: dropout
dropout_rate: 0.3
units_1: 64
units_2: 64
learning_rate: 0.001
l2_value: 0.001
Score: 0.798019528388977

Trial 2 summary
Hyperparameters:
regularization_method: dropout
dropout_rate: 0.3
units_1: 128
units_2: 96
learning_rate: 0.01
l2_value: 0.1
Score: 0.7767119407653809

Trial 1 summary
Hyperparameters:
regularization_method: l2
dropout_rate: 0.3
units_1: 32
units_2: 128
learning_rate: 0.001
l2_value: 0.01
Score: 0.7577680349349976

Trial 0 summary
Hyperparameters:
regularization_method: l2
dropout_rate: 0.5
units_1: 128
units_2: 192
learning_rate: 0.01
l2_value: 0.001
Score: 0.7544978857040405


In [62]:
best_model = tuner.get_best_models(num_models=1)[0]
best_model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


In [63]:
best_parameters = tuner.get_best_hyperparameters(num_trials=1)[0]
best_parameters.values

{'regularization_method': 'dropout',
 'dropout_rate': 0.3,
 'units_1': 256,
 'units_2': 192,
 'learning_rate': 0.001,
 'l2_value': 0.1}

In [47]:
model_2 = Sequential([
        Dense(units=best_parameters.get('units_1'), activation='relu', input_shape=(54,)),
        Dropout(best_parameters.get('dropout_rate')),
        Dense(units=best_parameters.get('units_2'), activation='relu'),
        Dropout(best_parameters.get('dropout_rate')),
        Dense(7, activation='softmax')
    ])
model_2.compile(optimizer=Adam(learning_rate=best_parameters.get('learning_rate')), loss='categorical_crossentropy', metrics=['accuracy','auc'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', monitor='val_loss', save_best_only=True)
history_2 = model_2.fit(X_train, y_train, epochs=50, validation_data=(X_val, y_val), callbacks=[early_stopping, model_checkpoint])

Epoch 1/50
[1m12710/12710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 3ms/step - accuracy: 0.7360 - auc: 0.9633 - loss: 0.6221 - val_accuracy: 0.8108 - val_auc: 0.9814 - val_loss: 0.4433
Epoch 2/50
[1m12710/12710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 3ms/step - accuracy: 0.7968 - auc: 0.9782 - loss: 0.4736 - val_accuracy: 0.8352 - val_auc: 0.9852 - val_loss: 0.3895
Epoch 3/50
[1m12710/12710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 3ms/step - accuracy: 0.8131 - auc: 0.9813 - loss: 0.4377 - val_accuracy: 0.8488 - val_auc: 0.9876 - val_loss: 0.3608
Epoch 4/50
[1m12710/12710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 3ms/step - accuracy: 0.8245 - auc: 0.9830 - loss: 0.4159 - val_accuracy: 0.8562 - val_auc: 0.9887 - val_loss: 0.3442
Epoch 5/50
[1m12710/12710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 3ms/step - accuracy: 0.8316 - auc: 0.9842 - loss: 0.4003 - val_accuracy: 0.8657 - val_auc: 0.9899 - val_loss: 0.3273
Epoch

Epoch 1/50
[1m12710/12710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 3ms/step - accuracy: 0.7360 - auc: 0.9633 - loss: 0.6221 - val_accuracy: 0.8108 - val_auc: 0.9814 - val_loss: 0.4433
Epoch 2/50
[1m12710/12710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 3ms/step - accuracy: 0.7968 - auc: 0.9782 - loss: 0.4736 - val_accuracy: 0.8352 - val_auc: 0.9852 - val_loss: 0.3895
Epoch 3/50
[1m12710/12710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 3ms/step - accuracy: 0.8131 - auc: 0.9813 - loss: 0.4377 - val_accuracy: 0.8488 - val_auc: 0.9876 - val_loss: 0.3608
Epoch 4/50
[1m12710/12710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 3ms/step - accuracy: 0.8245 - auc: 0.9830 - loss: 0.4159 - val_accuracy: 0.8562 - val_auc: 0.9887 - val_loss: 0.3442
Epoch 5/50
[1m12710/12710[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 3ms/step - accuracy: 0.8316 - auc: 0.9842 - loss: 0.4003 - val_accuracy: 0.8657 - val_auc: 0.9899 - val_loss: 0.3273
Epoch

<keras.src.callbacks.history.History at 0x799ea97505c0>

In [52]:
test_loss, test_accuracy, test_auc = model_2.evaluate(X_test, y_test)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")
print(f"Test AUC: {test_auc}")

[1m2724/2724[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.8982 - auc: 0.9940 - loss: 0.2576
Test Loss: 0.2550227642059326
Test Accuracy: 0.8997154235839844
Test AUC: 0.9941146373748779
