## Importing necessary libraries

In [1]:
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error 
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import warnings 
import tensorflow as tf
import h5py
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from sklearn.metrics import accuracy_score


warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)


np.random.seed(1)
tf.random.set_seed(1)

## Loading the dataset

In [2]:
df = pd.read_csv('asthma_scaled.csv',header = 0)



df

Unnamed: 0,Sampler State,Indoor total spores,Outdoor total spores,Smoking at home,Pre-existing Respiratory Severity,Post-Hurricane Respiratory Symptoms,Post-Hurricane Respiratory Severity,Blower Door Test at 50 Pa,Nose Conjestion,Runny Nose,Sore Throat,Body Aches,Shortness of Breath,Coughing,Wheezing,Fatigue,Other
0,Louisiana,0.857531,0.600928,0,0.935478,0,0.000000,0.302308,0,0,0,0,0,1,1,1,1
1,Louisiana,0.736371,0.523923,1,0.000000,1,0.000000,0.167651,0,1,1,1,1,1,1,1,1
2,Louisiana,0.482403,0.307157,1,0.000000,1,0.000000,0.233777,1,1,1,1,1,1,1,1,0
3,Louisiana,1.000000,0.536898,1,0.000000,0,0.732677,0.491702,1,1,1,1,0,0,0,1,1
4,Louisiana,0.000000,0.806030,1,0.000000,1,0.000000,0.455453,1,1,1,1,1,1,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,Florida,0.402976,0.591573,0,0.000000,1,0.000000,0.072765,1,1,1,1,1,1,1,1,0
58,Florida,0.589576,0.626633,1,0.587859,0,0.000000,0.058077,0,0,0,0,0,0,0,0,1
59,Florida,0.482403,0.400078,1,0.000000,1,0.000000,0.078863,1,1,1,1,1,1,1,1,0
60,Florida,0.697962,0.884263,0,0.000000,1,0.000000,0.051627,1,1,1,1,1,1,1,1,0


## Selecting important features for the analysis based on correlation matrix and forward feature selection

In [3]:
selected_features =     [
#    'Outdoor total spores', 
    'Indoor total spores',
#     'Blower Door Test @50 pascals (CFM)', 
#     'Pre-severity', 
#    'Post-Hurricane Respiratory Severity', 
    'Nose Conjestion', 
#     'Runny Nose',
    'Sore Throat', 
#     'Body Aches', 
#     'Shortness of Breath', 
    'Coughing',
#     'Wheezing', 
   'Fatigue', 
#    'Other'
    ]

## Spliting the train and test dataset

In [4]:
from sklearn.model_selection import train_test_split


X = np.float32(df[selected_features].values)
Y= df['Post-Hurricane Respiratory Symptoms']

# Split data into training and testing sets
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, random_state=42)

## Classification TSNE  model developement

In [5]:
import numpy as np
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler

# Combine training and test data for t-SNE
scaler = StandardScaler()
X_scaled = scaler.fit_transform(np.vstack((Xtrain, Xtest)))

# Apply t-SNE transformation to the combined data
tsne = TSNE(n_components=2, random_state=1)
X_tsne = tsne.fit_transform(X_scaled)

# Split the t-SNE transformed data back into training and test sets
Xtrain_tsne = X_tsne[:len(Xtrain)]
Xtest_tsne = X_tsne[len(Xtrain):]

# Define the input layer (based on the transformed 2D data)
inputs = tf.keras.Input(shape=(Xtrain_tsne.shape[1],))

# Define the hidden layers
hidden = tf.keras.layers.Dense(16, activation='relu', kernel_initializer=tf.keras.initializers.he_normal(seed=1),
                               bias_initializer='zeros', kernel_regularizer=tf.keras.regularizers.L1(0.001))(inputs)
hidden = tf.keras.layers.Dense(32, activation='relu', kernel_initializer=tf.keras.initializers.he_normal(seed=1),
                               bias_initializer='zeros', kernel_regularizer=tf.keras.regularizers.L1(0.001))(hidden)

# Define the output layer for binary classification
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(hidden)


# Create the model
model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
model.summary()

# Custom early stopping callback
class CustomEarlyStopping(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if logs['val_loss'] < 0.6506 and logs['loss'] < 0.6506:
            self.model.stop_training = True

# Instantiate callbacks
custom_early_stopping = CustomEarlyStopping()
model_checkpoint = ModelCheckpoint('model.h5', monitor='val_loss', save_best_only=True)

# Exponential decay learning rate schedule
lr = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.0001,
    decay_steps=150,
    decay_rate=0.92,
    staircase=True
)

# Compile the model for binary classification
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    Xtrain_tsne,
    Ytrain,
    batch_size=8,
    epochs=2000,
    validation_data=(Xtest_tsne, Ytest),
    callbacks=[custom_early_stopping, model_checkpoint]
)


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 2)]               0         
                                                                 
 dense (Dense)               (None, 16)                48        
                                                                 
 dense_1 (Dense)             (None, 32)                544       
                                                                 
 dense_2 (Dense)             (None, 1)                 33        
                                                                 
Total params: 625
Trainable params: 625
Non-trainable params: 0
_________________________________________________________________
Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
Epoch 9/2000
Epoch 10/2000
Epoch 11/2000
Epoch 12/2000
Epoch 13/2000
Epoch 14/2000
Epoch 15

Epoch 111/2000
Epoch 112/2000
Epoch 113/2000
Epoch 114/2000
Epoch 115/2000
Epoch 116/2000
Epoch 117/2000
Epoch 118/2000
Epoch 119/2000
Epoch 120/2000
Epoch 121/2000
Epoch 122/2000
Epoch 123/2000
Epoch 124/2000
Epoch 125/2000
Epoch 126/2000
Epoch 127/2000
Epoch 128/2000
Epoch 129/2000
Epoch 130/2000
Epoch 131/2000
Epoch 132/2000
Epoch 133/2000
Epoch 134/2000
Epoch 135/2000
Epoch 136/2000
Epoch 137/2000
Epoch 138/2000
Epoch 139/2000
Epoch 140/2000
Epoch 141/2000
Epoch 142/2000
Epoch 143/2000
Epoch 144/2000
Epoch 145/2000
Epoch 146/2000
Epoch 147/2000
Epoch 148/2000
Epoch 149/2000
Epoch 150/2000
Epoch 151/2000
Epoch 152/2000
Epoch 153/2000
Epoch 154/2000
Epoch 155/2000
Epoch 156/2000
Epoch 157/2000
Epoch 158/2000
Epoch 159/2000
Epoch 160/2000
Epoch 161/2000
Epoch 162/2000
Epoch 163/2000
Epoch 164/2000
Epoch 165/2000
Epoch 166/2000
Epoch 167/2000


Epoch 168/2000
Epoch 169/2000
Epoch 170/2000
Epoch 171/2000
Epoch 172/2000
Epoch 173/2000
Epoch 174/2000
Epoch 175/2000
Epoch 176/2000
Epoch 177/2000
Epoch 178/2000
Epoch 179/2000
Epoch 180/2000
Epoch 181/2000
Epoch 182/2000
Epoch 183/2000
Epoch 184/2000
Epoch 185/2000
Epoch 186/2000
Epoch 187/2000
Epoch 188/2000
Epoch 189/2000
Epoch 190/2000
Epoch 191/2000
Epoch 192/2000
Epoch 193/2000
Epoch 194/2000
Epoch 195/2000
Epoch 196/2000
Epoch 197/2000
Epoch 198/2000
Epoch 199/2000
Epoch 200/2000
Epoch 201/2000
Epoch 202/2000
Epoch 203/2000
Epoch 204/2000
Epoch 205/2000
Epoch 206/2000
Epoch 207/2000
Epoch 208/2000
Epoch 209/2000
Epoch 210/2000
Epoch 211/2000
Epoch 212/2000
Epoch 213/2000
Epoch 214/2000
Epoch 215/2000
Epoch 216/2000
Epoch 217/2000
Epoch 218/2000
Epoch 219/2000
Epoch 220/2000
Epoch 221/2000
Epoch 222/2000
Epoch 223/2000
Epoch 224/2000
Epoch 225/2000


Epoch 226/2000
Epoch 227/2000
Epoch 228/2000
Epoch 229/2000
Epoch 230/2000
Epoch 231/2000
Epoch 232/2000
Epoch 233/2000
Epoch 234/2000
Epoch 235/2000
Epoch 236/2000
Epoch 237/2000
Epoch 238/2000
Epoch 239/2000
Epoch 240/2000
Epoch 241/2000
Epoch 242/2000
Epoch 243/2000
Epoch 244/2000
Epoch 245/2000
Epoch 246/2000
Epoch 247/2000
Epoch 248/2000
Epoch 249/2000
Epoch 250/2000
Epoch 251/2000
Epoch 252/2000
Epoch 253/2000
Epoch 254/2000
Epoch 255/2000
Epoch 256/2000
Epoch 257/2000
Epoch 258/2000
Epoch 259/2000
Epoch 260/2000
Epoch 261/2000
Epoch 262/2000
Epoch 263/2000
Epoch 264/2000
Epoch 265/2000
Epoch 266/2000
Epoch 267/2000
Epoch 268/2000
Epoch 269/2000
Epoch 270/2000
Epoch 271/2000
Epoch 272/2000
Epoch 273/2000
Epoch 274/2000
Epoch 275/2000
Epoch 276/2000
Epoch 277/2000
Epoch 278/2000
Epoch 279/2000
Epoch 280/2000
Epoch 281/2000
Epoch 282/2000
Epoch 283/2000


Epoch 284/2000
Epoch 285/2000
Epoch 286/2000
Epoch 287/2000
Epoch 288/2000
Epoch 289/2000
Epoch 290/2000
Epoch 291/2000
Epoch 292/2000
Epoch 293/2000
Epoch 294/2000
Epoch 295/2000
Epoch 296/2000
Epoch 297/2000
Epoch 298/2000
Epoch 299/2000
Epoch 300/2000
Epoch 301/2000
Epoch 302/2000
Epoch 303/2000
Epoch 304/2000
Epoch 305/2000
Epoch 306/2000
Epoch 307/2000
Epoch 308/2000
Epoch 309/2000
Epoch 310/2000
Epoch 311/2000
Epoch 312/2000
Epoch 313/2000
Epoch 314/2000
Epoch 315/2000
Epoch 316/2000
Epoch 317/2000
Epoch 318/2000


## Model evaluation for the train and test

In [6]:
from sklearn.metrics import accuracy_score
import numpy as np

# Predict and evaluate with t-SNE transformed features
y_pred_train = model.predict(Xtrain_tsne)
y_pred_test = model.predict(Xtest_tsne)

# Convert predicted probabilities to binary labels
y_pred_train_binary = np.round(y_pred_train).astype(int)
y_pred_test_binary = np.round(y_pred_test).astype(int)

# Calculate accuracy using the converted binary labels
train_accuracy = accuracy_score(Ytrain, y_pred_train_binary)
test_accuracy = accuracy_score(Ytest, y_pred_test_binary)

print(f"Training Accuracy: {train_accuracy:.3f}")
print(f"Test Accuracy: {test_accuracy:.3f}")


Training Accuracy: 0.816
Test Accuracy: 0.923


## Saving the model

In [10]:
# Specify the path and filename for the model
model_path = 'asthma_TSNE.h5'

# Use the .save() method to save your model
model.save(model_path)

print(f"Model saved to {model_path}")


Model saved to asthma_TSNE.h5


## Calculating confusion matrix and specificity and sendsitivity

In [7]:
from sklearn.metrics import confusion_matrix

# Calculate confusion matrix for training set
cm_train = confusion_matrix(Ytrain, y_pred_train_binary)

# Calculate confusion matrix for test set
cm_test = confusion_matrix(Ytest, y_pred_test_binary)

# Calculate specificity and sensitivity for training set
tn_train, fp_train, fn_train, tp_train = cm_train.ravel()
specificity_train = tn_train / (tn_train + fp_train)
sensitivity_train = tp_train / (tp_train + fn_train)

# Calculate specificity and sensitivity for test set
tn_test, fp_test, fn_test, tp_test = cm_test.ravel()
specificity_test = tn_test / (tn_test + fp_test)
sensitivity_test = tp_test / (tp_test + fn_test)

print("Confusion Matrix - Training Set:")
print(cm_train)
print("\nSpecificity - Training Set:", specificity_train)
print("Sensitivity - Training Set:", sensitivity_train)

print("\nConfusion Matrix - Test Set:")
print(cm_test)
print("\nSpecificity - Test Set:", specificity_test)
print("Sensitivity - Test Set:", sensitivity_test)


Confusion Matrix - Training Set:
[[21  4]
 [ 5 19]]

Specificity - Training Set: 0.84
Sensitivity - Training Set: 0.7916666666666666

Confusion Matrix - Test Set:
[[6 1]
 [0 6]]

Specificity - Test Set: 0.8571428571428571
Sensitivity - Test Set: 1.0


## Calculating F1 score and F2 score

In [8]:
from sklearn.metrics import f1_score

# Calculate F1 score for both sets
f1_train = f1_score(Ytrain, y_pred_train_binary)
f1_test = f1_score(Ytest, y_pred_test_binary)

print("\nF1 Score - Training Set:", f1_train)
print("F1 Score - Test Set:", f1_test)

# Calculate F2 score for both sets
beta = 2  # Weighting factor for recall
f2_train = (1 + beta**2) * (specificity_train * sensitivity_train) / ((beta**2 * specificity_train) + sensitivity_train)
f2_test = (1 + beta**2) * (specificity_test * sensitivity_test) / ((beta**2 * specificity_test) + sensitivity_test)

print("\nF2 Score - Training Set:", f2_train)
print("F2 Score - Test Set:", f2_test)



F1 Score - Training Set: 0.8085106382978724
F1 Score - Test Set: 0.923076923076923

F2 Score - Training Set: 0.8008831794460056
F2 Score - Test Set: 0.9677419354838709


## Calculating AUC and False alarm rate

In [9]:
from sklearn.metrics import roc_auc_score, confusion_matrix
import numpy as np

# Predict probabilities on t-SNE transformed data
y_pred_train_probabilities = model.predict(Xtrain_tsne)
y_pred_test_probabilities = model.predict(Xtest_tsne)

# Calculate AUC for both sets
auc_train = roc_auc_score(Ytrain, y_pred_train_probabilities)
auc_test = roc_auc_score(Ytest, y_pred_test_probabilities)

print("\nAUC - Training Set:", auc_train)
print("AUC - Test Set:", auc_test)

# Calculate confusion matrices to get True Negatives (TN) and False Positives (FP)
tn_train, fp_train, fn_train, tp_train = confusion_matrix(Ytrain, np.round(y_pred_train_probabilities)).ravel()
false_alarm_rate_train = fp_train / (fp_train + tn_train)

tn_test, fp_test, fn_test, tp_test = confusion_matrix(Ytest, np.round(y_pred_test_probabilities)).ravel()
false_alarm_rate_test = fp_test / (fp_test + tn_test)

print("\nFalse Alarm Rate - Training Set:", false_alarm_rate_train)
print("False Alarm Rate - Test Set:", false_alarm_rate_test)



AUC - Training Set: 0.87
AUC - Test Set: 0.9285714285714286

False Alarm Rate - Training Set: 0.16
False Alarm Rate - Test Set: 0.14285714285714285
