In [None]:
# HABNet ML Pipeline (Regenerated: Full Version)

import os
import shutil
import numpy as np
import pandas as pd
from PIL import Image
import cv2
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.metrics import classification_report
from xgboost import XGBClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, TimeDistributed, LSTM, Masking, Input
from tensorflow.keras.utils import to_categorical

# Constants
N_DAYS = 10
N_MODALITIES = 7
IMG_SIZE = (100, 100)
DATASET_DIR = 'data_set'  # Must have folders '0/' and '1/' with subfolders as samples

# -----------------------------
# Load Data
# -----------------------------
def load_real_data(base_dir):
    X, y = [], []

    for label_dir in ['0', '1']:
        label = int(label_dir)
        label_path = os.path.join(base_dir, label_dir)
        if not os.path.exists(label_path):
            continue

        for sample_folder in os.listdir(label_path):
            sample_path = os.path.join(label_path, sample_folder)
            day_images = []
            for day in range(1, N_DAYS + 1):
                modalities = []
                for mod in range(1, N_MODALITIES + 1):
                    img_path = os.path.join(sample_path, f'{mod}', f'{day:02}.png')
                    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                    if img is not None:
                        img = cv2.resize(img, IMG_SIZE)
                        img = img.astype('float32') / 255.0
                        modalities.append(img)
                if modalities:
                    day_stack = np.stack(modalities, axis=-1)
                    day_images.append(day_stack)
            if day_images:
                sequence = np.stack(day_images, axis=0)  # (10, H, W, 7)
                X.append(sequence)
                y.append(label)
    return np.array(X), np.array(y)

X, y = load_real_data(DATASET_DIR)
print("Loaded:", X.shape, y.shape)

# -----------------------------
# Classical Models
# -----------------------------
X_flat = X.reshape(len(X), -1)
X_train, X_test, y_train, y_test = train_test_split(X_flat, y, test_size=0.2, random_state=42)

print("\nLogistic Regression")
logreg = LogisticRegression(max_iter=200)
logreg.fit(X_train, y_train)
print(classification_report(y_test, logreg.predict(X_test)))

print("\nRidge Classifier")
ridge = RidgeClassifier()
ridge.fit(X_train, y_train)
print(classification_report(y_test, ridge.predict(X_test)))

print("\nXGBoost")
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train, y_train)
print(classification_report(y_test, xgb.predict(X_test)))

# -----------------------------
# Deep Learning Model
# -----------------------------
X_train_dl, X_test_dl, y_train_dl, y_test_dl = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential([
    Input(shape=(N_DAYS, IMG_SIZE[0], IMG_SIZE[1], N_MODALITIES)),
    TimeDistributed(Conv2D(16, (3, 3), activation='relu')),
    TimeDistributed(MaxPooling2D((2, 2))),
    TimeDistributed(Conv2D(32, (3, 3), activation='relu')),
    TimeDistributed(MaxPooling2D((2, 2))),
    TimeDistributed(Flatten()),
    Masking(mask_value=0.0),
    LSTM(64),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train_dl, y_train_dl, epochs=10, batch_size=4, validation_split=0.1)

# Evaluation
preds = (model.predict(X_test_dl) > 0.5).astype(int)
print("\nDeep Learning Model Report:")
print(classification_report(y_test_dl, preds))
