In [1]:
!pip install keras-tuner



In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import pandas as pd 

data_df = pd.read_csv("cleaned_mode.csv")
data_df.head()

Unnamed: 0,city,city_development_index,gender,relevent_experience,enrolled_university,education_level,major_discipline,experience,company_size,company_type,last_new_job,training_hours,target
0,city_103,0.92,Male,1,none,Graduate,STEM,>=10,50-99,Pvt Ltd,1,36,1.0
1,Other,0.776,Male,0,none,Graduate,STEM,>=10,50-99,Pvt Ltd,>4,47,0.0
2,city_21,0.624,Male,0,full-time,Graduate,STEM,5-9,50-99,Pvt Ltd,never,83,0.0
3,Other,0.789,Male,0,none,Graduate,Business Degree,0-2,50-99,Pvt Ltd,never,52,1.0
4,Other,0.767,Male,1,none,Masters,STEM,>=10,50-99,Funded Startup,4,8,0.0


In [3]:
# Convert categorical data to numeric with `pd.get_dummies`
data_df = pd.get_dummies(data_df)
data_df

Unnamed: 0,city_development_index,relevent_experience,training_hours,target,city_Other,city_city_100,city_city_102,city_city_103,city_city_104,city_city_11,...,company_type_NGO,company_type_Other,company_type_Public Sector,company_type_Pvt Ltd,last_new_job_1,last_new_job_2,last_new_job_3,last_new_job_4,last_new_job_>4,last_new_job_never
0,0.920,1,36,1.0,0,0,0,1,0,0,...,0,0,0,1,1,0,0,0,0,0
1,0.776,0,47,0.0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,1,0
2,0.624,0,83,0.0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
3,0.789,0,52,1.0,1,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
4,0.767,1,8,0.0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19153,0.878,0,42,1.0,1,0,0,0,0,0,...,0,0,0,1,1,0,0,0,0,0
19154,0.920,1,52,1.0,0,0,0,1,0,0,...,0,0,0,1,0,0,0,1,0,0
19155,0.920,1,44,0.0,0,0,0,1,0,0,...,0,0,0,1,0,0,0,1,0,0
19156,0.802,1,97,0.0,1,0,0,0,0,0,...,0,0,0,1,0,1,0,0,0,0


In [4]:
# Split our preprocessed data into our features and target arrays
X = data_df.drop(['target'], axis=1)
y = data_df['target']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [5]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [6]:
# Obtrain training shape
X_train_scaled.shape

(14368, 59)

In [7]:
# Obtain training features
input_features = X_train_scaled.shape[1]

In [8]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model_op1 = tf.keras.models.Sequential()

    # Allow keras-tuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow keras-tuner to decide number of neurons in first layer
    nn_model_op1.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value= 90,
        step=5), activation=activation, input_dim=input_features))

    # Allow keras-tuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 5)):
        nn_model_op1.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=30,
            step=5),
            activation=activation))
    
    nn_model_op1.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model_op1.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model_op1

In [9]:
# Import the keras-tuner library
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

In [10]:
# Run the keras-tuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

Trial 60 Complete [00h 00m 10s]
val_accuracy: 0.7839248180389404

Best val_accuracy So Far: 0.7889353036880493
Total elapsed time: 00h 04m 21s
INFO:tensorflow:Oracle triggered exit


In [11]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters()[0]
best_hyper.values

{'activation': 'sigmoid',
 'first_units': 71,
 'num_layers': 3,
 'units_0': 21,
 'units_1': 26,
 'units_2': 1,
 'units_3': 21,
 'units_4': 1,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 7,
 'tuner/bracket': 1,
 'tuner/round': 1,
 'tuner/trial_id': '0049'}

In [12]:
# Evaluate best model against full test data
best_model = tuner.get_best_models()[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

150/150 - 0s - loss: 0.4599 - accuracy: 0.7889 - 200ms/epoch - 1ms/step
Loss: 0.45989447832107544, Accuracy: 0.7889353036880493
