In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from tqdm import tqdm

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [2]:
# --- 1. Define Constants and Load Data Splits ---
PROCESSED_DATA_DIR = '../data/processed/'
train_df = pd.read_csv(os.path.join(PROCESSED_DATA_DIR, 'train.csv'))
val_df = pd.read_csv(os.path.join(PROCESSED_DATA_DIR, 'val.csv'))

In [3]:
# --- 2. Create a Function to Load and Flatten Images ---
def load_data_and_flatten(dataframe):
    """
    Loads .npy image files listed in a dataframe and flattens them into 1D vectors.
    """
    # Get the project root directory to build absolute paths
    project_root = os.path.abspath(os.path.join(PROCESSED_DATA_DIR, '..', '..'))
    
    images = []
    labels = []
    
    for idx, row in tqdm(dataframe.iterrows(), total=len(dataframe), desc="Loading data"):
        # Construct absolute path to the image
        img_path = os.path.join(project_root, row['filepath'])
        
        # Load the processed .npy image array
        img_array = np.load(img_path)
        
        # Flatten the 2D image (128x128) into a 1D vector (16384)
        images.append(img_array.flatten())
        labels.append(row['label'])
        
    return np.array(images), np.array(labels)

In [4]:
# --- 3. Prepare Training and Validation Sets ---
print("Preparing training data...")
X_train, y_train = load_data_and_flatten(train_df)

print("\nPreparing validation data...")
X_val, y_val = load_data_and_flatten(val_df)

print(f"\nTraining data shape: {X_train.shape}")
print(f"Validation data shape: {X_val.shape}")

Preparing training data...


Loading data: 100%|██████████| 1750/1750 [00:00<00:00, 4330.09it/s]



Preparing validation data...


Loading data: 100%|██████████| 375/375 [00:00<00:00, 4261.31it/s]


Training data shape: (1750, 16384)
Validation data shape: (375, 16384)





In [5]:
# --- 4. Train the Logistic Regression Model ---
print("\nTraining Logistic Regression model...")

# Initialize the model
# max_iter is increased to ensure the model converges with this many features
model = LogisticRegression(max_iter=1000, random_state=42)

# Train the model
model.fit(X_train, y_train)

print("Model training complete.")


Training Logistic Regression model...
Model training complete.


In [6]:
# --- 5. Evaluate the Model ---
print("\nEvaluating model on the validation set...")

# Make predictions on the validation data
y_pred = model.predict(X_val)

# Calculate and print accuracy
accuracy = accuracy_score(y_val, y_pred)
print(f"\nValidation Accuracy: {accuracy:.4f}")

# Print a detailed classification report
print("\nClassification Report:")
print(classification_report(y_val, y_pred, target_names=['Normal (0)', 'Hemorrhage (1)']))


Evaluating model on the validation set...

Validation Accuracy: 0.9387

Classification Report:
                precision    recall  f1-score   support

    Normal (0)       0.96      0.97      0.96       328
Hemorrhage (1)       0.76      0.74      0.75        47

      accuracy                           0.94       375
     macro avg       0.86      0.86      0.86       375
  weighted avg       0.94      0.94      0.94       375



In [7]:
# --- 6. Build the MLP Model ---
print("\nBuilding the MLP model...")

# Get the number of features from the training data shape
input_features = X_train.shape[1]

model_mlp = Sequential([
    # Input layer - specify the input shape
    Dense(128, activation='relu', input_shape=(input_features,)),
    
    # A dropout layer to prevent overfitting
    Dropout(0.3),
    
    # Hidden layer
    Dense(64, activation='relu'),

    # Output layer for binary classification
    Dense(1, activation='sigmoid') 
])

# Compile the model
model_mlp.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Print the model summary
model_mlp.summary()


Building the MLP model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
# --- 7. Train the MLP Model ---
print("\nTraining the MLP model...")

history = model_mlp.fit(
    X_train,
    y_train,
    validation_data=(X_val, y_val),
    epochs=10,  # You can experiment with more epochs
    batch_size=32,
    verbose=1
)

print("\nMLP model training complete.")


Training the MLP model...
Epoch 1/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.8099 - loss: 0.6414 - val_accuracy: 0.8747 - val_loss: 0.3113
Epoch 2/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8493 - loss: 0.3783 - val_accuracy: 0.8747 - val_loss: 0.3089
Epoch 3/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8767 - loss: 0.3209 - val_accuracy: 0.8880 - val_loss: 0.2502
Epoch 4/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8819 - loss: 0.2758 - val_accuracy: 0.8907 - val_loss: 0.2294
Epoch 5/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8973 - loss: 0.2343 - val_accuracy: 0.9013 - val_loss: 0.2185
Epoch 6/10
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9275 - loss: 0.1747 - val_accuracy: 0.9173 - val_loss: 0.2097
Epoch 7/10
[

In [9]:
# --- 8. Evaluate the MLP Model ---
print("\nEvaluating MLP model on the validation set...")

# Make predictions (the output will be probabilities)
y_pred_proba = model_mlp.predict(X_val)

# Convert probabilities to binary class labels (0 or 1)
y_pred_mlp = (y_pred_proba > 0.5).astype(int)

# Calculate and print accuracy
accuracy_mlp = accuracy_score(y_val, y_pred_mlp)
print(f"\nValidation Accuracy (MLP): {accuracy_mlp:.4f}")

# Print a detailed classification report
print("\nClassification Report (MLP):")
print(classification_report(y_val, y_pred_mlp, target_names=['Normal (0)', 'Hemorrhage (1)']))


Evaluating MLP model on the validation set...
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 

Validation Accuracy (MLP): 0.9200

Classification Report (MLP):
                precision    recall  f1-score   support

    Normal (0)       0.93      0.98      0.96       328
Hemorrhage (1)       0.77      0.51      0.62        47

      accuracy                           0.92       375
     macro avg       0.85      0.74      0.79       375
  weighted avg       0.91      0.92      0.91       375

