In [40]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

import tensorflow_datasets as tfds
import pandas as pd
from kerastuner import RandomSearch
from sklearn.datasets import fetch_covtype

In [41]:
data = fetch_covtype(as_frame=True)

In [42]:
X = data.data
y = data.target

In [43]:
X.head()

Unnamed: 0,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points,...,Soil_Type_30,Soil_Type_31,Soil_Type_32,Soil_Type_33,Soil_Type_34,Soil_Type_35,Soil_Type_36,Soil_Type_37,Soil_Type_38,Soil_Type_39
0,2596.0,51.0,3.0,258.0,0.0,510.0,221.0,232.0,148.0,6279.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2590.0,56.0,2.0,212.0,-6.0,390.0,220.0,235.0,151.0,6225.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2804.0,139.0,9.0,268.0,65.0,3180.0,234.0,238.0,135.0,6121.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2785.0,155.0,18.0,242.0,118.0,3090.0,238.0,238.0,122.0,6211.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2595.0,45.0,2.0,153.0,-1.0,391.0,220.0,234.0,150.0,6172.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [44]:
X.shape

(581012, 54)

In [45]:
X.info()

<class 'pandas.DataFrame'>
RangeIndex: 581012 entries, 0 to 581011
Data columns (total 54 columns):
 #   Column                              Non-Null Count   Dtype  
---  ------                              --------------   -----  
 0   Elevation                           581012 non-null  float64
 1   Aspect                              581012 non-null  float64
 2   Slope                               581012 non-null  float64
 3   Horizontal_Distance_To_Hydrology    581012 non-null  float64
 4   Vertical_Distance_To_Hydrology      581012 non-null  float64
 5   Horizontal_Distance_To_Roadways     581012 non-null  float64
 6   Hillshade_9am                       581012 non-null  float64
 7   Hillshade_Noon                      581012 non-null  float64
 8   Hillshade_3pm                       581012 non-null  float64
 9   Horizontal_Distance_To_Fire_Points  581012 non-null  float64
 10  Wilderness_Area_0                   581012 non-null  float64
 11  Wilderness_Area_1                   5

In [46]:
from sklearn.model_selection import train_test_split
X_train,X_rem,y_train,y_rem=train_test_split(X,y,test_size=0.3,random_state=42)

In [47]:
X_test,X_val,y_test,y_val=train_test_split(X_rem,y_rem,test_size=0.5,random_state=42)

In [48]:
# the data is already one hot encoded
pass

In [49]:
numerical_features = [
    "Elevation",
    "Aspect",
    "Slope",
    "Horizontal_Distance_To_Hydrology",
    "Vertical_Distance_To_Hydrology",
    "Horizontal_Distance_To_Roadways",
    "Hillshade_9am",
    "Hillshade_Noon",
    "Hillshade_3pm",
    "Horizontal_Distance_To_Fire_Points"
]

X_num = X_train[numerical_features]


In [50]:
# scale
from sklearn.preprocessing import StandardScaler


scaler = StandardScaler()

X_train[numerical_features] = scaler.fit_transform(X_train[numerical_features])
X_test[numerical_features]  = scaler.transform(X_test[numerical_features])
X_val[numerical_features] = scaler.transform(X_val[numerical_features] )




In [51]:
X_train.head()

Unnamed: 0,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points,...,Soil_Type_30,Soil_Type_31,Soil_Type_32,Soil_Type_33,Soil_Type_34,Soil_Type_35,Soil_Type_36,Soil_Type_37,Soil_Type_38,Soil_Type_39
110220,-1.213285,1.52197,-1.215756,-0.985653,-0.709902,-0.962484,-0.229567,0.541278,0.614032,-0.199579,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
363425,0.666947,-0.684312,-0.413945,-0.444373,0.182714,-0.831053,0.817019,-0.166989,-0.667745,-0.668463,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
111410,0.178158,1.039625,-0.28031,1.447756,-0.229263,2.016207,-0.864994,0.996592,1.346476,-0.496589,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
552313,-0.289224,0.512618,1.590582,-0.952706,-0.194931,0.389659,-0.93975,1.502497,1.241841,-0.500358,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
107881,0.745438,-1.068401,-1.082121,-0.364357,-0.349423,0.473647,0.293726,0.136554,-0.013777,1.807877,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [52]:
model = Sequential([
    Dense(10, activation="relu", input_shape=(X_train.shape[1],)),
    Dense(10, activation="relu"),
    Dense(7, activation="softmax"),
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [53]:
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [54]:
X_train.head()

Unnamed: 0,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points,...,Soil_Type_30,Soil_Type_31,Soil_Type_32,Soil_Type_33,Soil_Type_34,Soil_Type_35,Soil_Type_36,Soil_Type_37,Soil_Type_38,Soil_Type_39
110220,-1.213285,1.52197,-1.215756,-0.985653,-0.709902,-0.962484,-0.229567,0.541278,0.614032,-0.199579,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
363425,0.666947,-0.684312,-0.413945,-0.444373,0.182714,-0.831053,0.817019,-0.166989,-0.667745,-0.668463,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
111410,0.178158,1.039625,-0.28031,1.447756,-0.229263,2.016207,-0.864994,0.996592,1.346476,-0.496589,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
552313,-0.289224,0.512618,1.590582,-0.952706,-0.194931,0.389659,-0.93975,1.502497,1.241841,-0.500358,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
107881,0.745438,-1.068401,-1.082121,-0.364357,-0.349423,0.473647,0.293726,0.136554,-0.013777,1.807877,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [55]:
y.head()

0    5
1    5
2    2
3    2
4    5
Name: Cover_Type, dtype: int32

In [56]:
from tensorflow.keras.utils import to_categorical

y_train_oh = to_categorical(y_train - 1)
y_val_oh   = to_categorical(y_val - 1)
y_test_oh  = to_categorical(y_test - 1)

# model.fit(
#     X_train, y_train_oh,
#     validation_data=(X_val, y_val_oh),
#     epochs=20,
#     batch_size=512
# )

### dropout model

In [57]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers


In [58]:

def build_model(hp, input_dim):
    model = Sequential()

    # First hidden layer
    units_1 = hp.Choice('units_1', [64, 128, 256])
    dropout_1 = hp.Choice('dropout_1', [0.2, 0.3, 0.5])
    l2_1 = hp.Choice('l2_1', [0.001, 0.01])
    model.add(Dense(units_1, activation='relu',
                    kernel_regularizer=regularizers.l2(l2_1),
                    input_shape=(input_dim,)))
    model.add(Dropout(dropout_1))

    units_2 = hp.Choice('units_2', [64, 128, 256])
    dropout_2 = hp.Choice('dropout_2', [0.2, 0.3, 0.5])
    l2_2 = hp.Choice('l2_2', [0.001, 0.01])
    model.add(Dense(units_2, activation='relu',
                    kernel_regularizer=regularizers.l2(l2_2)))
    model.add(Dropout(dropout_2))

    # out
    model.add(Dense(7, activation='softmax'))

    lr = hp.Choice('learning_rate', [0.01, 0.001])
    model.compile(
        optimizer=Adam(learning_rate=lr),
        loss='categorical_crossentropy',
        metrics=['accuracy', 'AUC']
    )

    return model


In [59]:
input_dim = X_train.shape[1]  # number of features

turner = RandomSearch(
    lambda hp: build_model(hp, input_dim=input_dim),
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=3,
    directory='my_dir',
    project_name='helloworld'
)

Reloading Tuner from my_dir/helloworld/tuner0.json


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping

checkpoint = ModelCheckpoint(
    filepath='best_model.h5',
    monitor='val_auc',
    verbose=1,
    save_best_only=True,
    mode='max'
)

earlystop = EarlyStopping(
    monitor='val_auc',
    patience=5,
    restore_best_weights=True,
    mode='max',
    verbose=1
)


In [None]:
turner.search(
    X_train.values, y_train_oh,
    validation_data=(X_val.values, y_val_oh),
    epochs=20,
    callbacks=[checkpoint, earlystop],
)

In [62]:
from tensorflow.keras.models import load_model

# load the best saved model
best_model = load_model('best_model.h5')

test_accuracy, test_auc = best_model.evaluate(X_test, y_test_oh)

# results
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test AUC: {test_auc:.4f}")



[1m2724/2724[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.7646 - loss: 0.5543
Test Accuracy: 0.5543
Test AUC: 0.7646
