In [1]:
# import dependencies
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint

In [2]:
# read data
df = pd.read_csv("ML_data_unscaled_dummys_encoded.csv")
df = df.drop('Entry_ID',axis=1)
df.head()

Unnamed: 0,Age,Hours_per_day,BPM,Anxiety,Depression,Insomnia,OCD,While_working_No,While_working_Yes,Instrumentalist_No,...,Rock_Rarely,Rock_Sometimes,Rock_Very frequently,Video game music_Never,Video game music_Rarely,Video game music_Sometimes,Video game music_Very frequently,Music_effects_Improve,Music_effects_No effect,Music_effects_Worsen
0,18.0,4.0,132.0,7.0,7.0,10.0,2.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
1,61.0,2.5,84.0,9.0,7.0,3.0,3.0,0.0,1.0,0.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
2,18.0,4.0,107.0,7.0,2.0,5.0,9.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
3,18.0,5.0,86.0,8.0,8.0,7.0,7.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
4,18.0,3.0,66.0,4.0,8.0,6.0,0.0,0.0,1.0,1.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 511 entries, 0 to 510
Data columns (total 94 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Age                               511 non-null    float64
 1   Hours_per_day                     511 non-null    float64
 2   BPM                               511 non-null    float64
 3   Anxiety                           511 non-null    float64
 4   Depression                        511 non-null    float64
 5   Insomnia                          511 non-null    float64
 6   OCD                               511 non-null    float64
 7   While_working_No                  511 non-null    float64
 8   While_working_Yes                 511 non-null    float64
 9   Instrumentalist_No                511 non-null    float64
 10  Instrumentalist_Yes               511 non-null    float64
 11  Composer_No                       511 non-null    float64
 12  Composer

## Depression model

In [4]:
# split data into features and target
y = df.Depression.values
X = df.drop('Depression', axis=1).values

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [5]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [13]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    # activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=32, max_value=512, step=32), activation=hp.Choice('activation',['relu','tanh','sigmoid'])))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=32, max_value=512, step=32),
            activation=hp.Choice('activation',['relu','tanh','sigmoid'])))
    
    nn_model.add(tf.keras.layers.Dense(units=1))

    # Compile the model
    nn_model.compile(loss='mean_absolute_error', optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [19]:
# Import the kerastuner library
import kerastuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=500,
    hyperband_iterations=2,
    directory='dep_hyp',
    overwrite=True,
    project_name='dep_hyp')

In [15]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,validation_data=(X_test_scaled,y_test))

Trial 1422 Complete [00h 00m 06s]
val_accuracy: 0.0625

Best val_accuracy So Far: 0.140625
Total elapsed time: 01h 04m 59s

Search: Running Trial #1423

Value             |Best Value So Far |Hyperparameter
448               |128               |first_units
tanh              |tanh              |activation
5                 |1                 |num_layers
416               |64                |units_0
288               |64                |units_1
416               |352               |units_2
288               |224               |units_3
512               |480               |units_4
512               |32                |units_5
167               |56                |tuner/epochs
56                |0                 |tuner/initial_epoch
2                 |2                 |tuner/bracket
1                 |0                 |tuner/round
1407              |None              |tuner/trial_id

Epoch 57/167
Epoch 58/167
Epoch 59/167
Epoch 60/167
Epoch 61/167
Epoch 62/167
Epoch 63/167
Epoch 64/167
E

KeyboardInterrupt: 

In [16]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'first_units': 128,
 'activation': 'tanh',
 'num_layers': 1,
 'units_0': 64,
 'units_1': 64,
 'units_2': 352,
 'units_3': 224,
 'units_4': 480,
 'units_5': 32,
 'tuner/epochs': 56,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 2,
 'tuner/round': 0}

In [17]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

4/4 - 0s - loss: 4.7623 - accuracy: 0.1406 - 134ms/epoch - 33ms/step
Loss: 4.762311935424805, Accuracy: 0.140625


Loss: 4.762311935424805, Accuracy: 0.140625

In [18]:
best_model.save("models/trained_dep.h5")