To - Do:
- Categorize the Data and rerun the FNN model. (Can use same code just organize data) - Sana
- Check the predictions, threshold (0.1,0.2,0.3,0.4) - Sihaam
- Play around with the layers - Sihaam
- Look into pretrained/prebuilt models/architextures. (Tabnet, or tree based models. Random Forest, etc) - Sihaam 
- Categotize the data with a random forest. - Sana

Imports

In [27]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report






load and process data

In [None]:
df = pd.read_csv("data.csv")
# Encode labels: H = 0 (Healthy), P = 1 (Patient)
df['label'] = df['class'].map({'H': 0, 'P': 1})
df = df.dropna()

# Classification Split
M_endings = ('1', '14', '18', '23') 
G_endings = ('2', '3', '4', '5','21', '24')
C_endings = ('6','7', '8', '9', '10', '11', '12', '13', '15', '16', '17', '19', '22', '25') 

memorization_features = [col for col in df.columns if col.endswith(M_endings)]
graphic_features = [col for col in df.columns if col.endswith(G_endings)]
copy_features = [col for col in df.columns if col.endswith(C_endings)]

memorization_df = df[memorization_features]
graphic_df = df[graphic_features]
copy_df = df[copy_features]

# (combine the dataframes), setting up features
X = pd.concat([memorization_df, graphic_df, copy_df], axis=1)
y = df['label'].values

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, stratify=y, random_state=42
)




Logistic Regression

In [52]:

# Train logistic regression
model = LogisticRegression(max_iter=5000)  
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"\nLogistic Regression Accuracy: {acc:.4f}")




Logistic Regression Accuracy: 0.6857


FNN Model

In [53]:
# Build FNN
model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(0.4),

    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(64, activation='relu'),
    Dropout(0.2),

    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping
early_stop = EarlyStopping(patience=10, restore_best_weights=True)

# Train
model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=150,
    batch_size=16,
    callbacks=[early_stop],
    verbose=1
)

# Predict
y_pred = (model.predict(X_test) > 0.5).astype("int32")
acc = accuracy_score(y_test, y_pred)
print(f"\nFinal Model Accuracy: {acc:.4f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 60ms/step - accuracy: 0.5176 - loss: 0.8439 - val_accuracy: 0.6786 - val_loss: 0.4329
Epoch 2/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.8602 - loss: 0.4068 - val_accuracy: 0.8571 - val_loss: 0.3398
Epoch 3/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.8772 - loss: 0.3522 - val_accuracy: 0.8929 - val_loss: 0.3042
Epoch 4/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.9128 - loss: 0.2222 - val_accuracy: 0.8929 - val_loss: 0.2712
Epoch 5/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.9470 - loss: 0.1639 - val_accuracy: 0.8929 - val_loss: 0.2543
Epoch 6/150
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.9781 - loss: 0.1117 - val_accuracy: 0.9286 - val_loss: 0.2440
Epoch 7/150
[1m7/7[0m [32m━━━━━━━━━━━

Cross Validation

In [54]:
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = []

for train_idx, test_idx in kfold.split(X_scaled, y):
    X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    model = Sequential([
        Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
        BatchNormalization(),
        Dropout(0.4),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, validation_split=0.2, epochs=100, batch_size=16, verbose=0)
    
    y_pred = (model.predict(X_test) > 0.5).astype("int32")
    scores.append(accuracy_score(y_test, y_pred))

print("Cross-validated accuracy: %.4f ± %.4f" % (np.mean(scores), np.std(scores)))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 212ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step
Cross-validated accuracy: 0.8277 ± 0.0507


Random Forest

In [55]:
model = RandomForestClassifier(n_estimators=100, random_state=42) #model will use 100 decision trees
model.fit(X_train, y_train)

y_pred = model.predict(X_test)


In [56]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.8823529411764706
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.88      0.88        17
           1       0.88      0.88      0.88        17

    accuracy                           0.88        34
   macro avg       0.88      0.88      0.88        34
weighted avg       0.88      0.88      0.88        34

