In [17]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
import keras_tuner as kt
from sklearn.model_selection import train_test_split
from tensorflow import keras

In [18]:
def read_csv_to_df(path):
    return pd.read_csv(path)

f_range = (10,950)

h_data_path = f"s_data/healthy_f_{f_range[0]}_{f_range[1]}.csv"
p_data_path = f"s_data/pathology_f_{f_range[0]}_{f_range[1]}.csv"

h_df = read_csv_to_df(h_data_path)
p_df = read_csv_to_df(p_data_path)

h_data = []
p_data = []

for spectrogram_id, group in h_df.groupby('spectrogram_id'):
    # Extract frequency, time, and power values
    power = []
    for f, g in group.groupby("frequency"):
        p = g['power']
        p_num = p.to_numpy()
        power.append(p_num)
    p_ = np.array(power)
    p_shaped = p_.reshape(p_.shape[0], p_.shape[1], 1)  # Shape will be (129, 41, 1)
    h_data.append(p_shaped)

for spectrogram_id, group in p_df.groupby('spectrogram_id'):
    # Extract frequency, time, and power values
    power = []
    for f, g in group.groupby("frequency"):
        p = g['power']
        p_num = p.to_numpy()
        power.append(p_num)
    p_ = np.array(power)
    p_shaped = p_.reshape(p_.shape[0], p_.shape[1], 1)  # Shape will be (129, 41, 1)
    p_data.append(p_shaped)

In [19]:
# Stack the arrays for class 1 and class 0
X_class_1 = np.stack(h_data)  # Shape will be (n_samples_1, 129, 41, 1)
X_class_0 = np.stack(p_data)  # Shape will be (n_samples_0, 129, 41, 1)

# Print shapes to verify
print("Shape of X_class_1:", X_class_1.shape)  # Should show (n_samples_1, 129, 41, 1)
print("Shape of X_class_0:", X_class_0.shape)  # Should show (n_samples_0, 129, 41, 1)

# Concatenate the stacked arrays
X = np.concatenate((X_class_0, X_class_1), axis=0)  # Shape will be (total_samples, 129, 41, 1)
print("Shape of X:", X.shape) 

y_class_0 = np.zeros(X_class_0.shape[0])  # Labels for class 0
y_class_1 = np.ones(X_class_1.shape[0])   # Labels for class 1
y = np.concatenate((y_class_0, y_class_1))  # Combine labels
print(y.shape)

Shape of X_class_1: (51, 257, 33, 1)
Shape of X_class_0: (38, 257, 33, 1)
Shape of X: (89, 257, 33, 1)
(89,)


In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, shuffle=True,  random_state=42)
y_train = np.array(y_train, dtype=np.int32)  # Convert labels to int32
y_test = np.array(y_test, dtype=np.int32)  # Convert labels to int32


In [34]:
def model_builder(hp):
  model = keras.Sequential()
 
  model.add(Conv2D(32, (3, 3), activation='relu', ))

    
  model.add(keras.layers.Flatten())
  # Tune the number of units in the first Dense layer
  # Choose an optimal value between 32-512
  hp_units = hp.Int('units', min_value=32, max_value=512, step=32)
  model.add(keras.layers.Dense(units=hp_units, activation='relu'))
  model.add(keras.layers.Dense(2, activation="softmax"))

  # Tune the learning rate for the optimizer
  # Choose an optimal value from 0.01, 0.001, or 0.0001
  hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

  model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])

  return model

tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='my_dir',
                     project_name='intro_to_kt')

#model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

tuner.search(X_train, y_train, epochs=50, validation_split=0.2, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is {best_hps.get('units')} and the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
""")

Reloading Tuner from my_dir/intro_to_kt/tuner0.json

The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is 448 and the optimal learning rate for the optimizer
is 0.001.



In [35]:
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train, y_train, epochs=50, validation_split=0.2)

val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

Epoch 1/50


ValueError: Input 0 of layer "conv2d_1" is incompatible with the layer: expected min_ndim=4, found ndim=2. Full shape received: (None, 8481)

In [29]:
hypermodel = tuner.hypermodel.build(best_hps)

# Retrain the model
hypermodel.fit(X_train, y_train, epochs=best_epoch, validation_split=0.2)

Epoch 1/6
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step - accuracy: 0.3327 - loss: 1.6290 - val_accuracy: 0.7778 - val_loss: 3.9027
Epoch 2/6
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.8336 - loss: 1.6044 - val_accuracy: 0.7778 - val_loss: 5.6694
Epoch 3/6
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.8926 - loss: 0.1927 - val_accuracy: 0.7778 - val_loss: 10.3857
Epoch 4/6
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.8821 - loss: 0.2697 - val_accuracy: 0.7778 - val_loss: 12.3713
Epoch 5/6
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.9116 - loss: 0.2207 - val_accuracy: 0.6667 - val_loss: 13.0964
Epoch 6/6
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.9116 - loss: 0.1532 - val_accuracy: 0.6667 - val_loss: 12.8791


<keras.src.callbacks.history.History at 0x760e2eb1ef50>

In [31]:
eval_result = hypermodel.evaluate(X_test, y_test)
print("[test loss, test accuracy]:", eval_result)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4970 - loss: 26.0044 
[test loss, test accuracy]: [23.605451583862305, 0.5111111402511597]
