In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, Dataset
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from torch.nn.utils import spectral_norm
from scipy import signal
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import random

def set_seeds(seed=42):
    torch.manual_seed(seed)
    torch.mps.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

set_seeds()

device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: mps


### Setup Dataset Class

In [3]:
class SensorDataset(Dataset):
    def __init__(self, X, y, session_ids, window_size=3):
        self.X = []
        self.y = []
        self.window_size = window_size

        # Group data by session
        session_groups = {}
        for idx, session_id in enumerate(session_ids):
            if session_id not in session_groups:
                session_groups[session_id] = []
            session_groups[session_id].append((X[idx], y[idx]))

        # Create sequences within each session
        for session_id, session_data in session_groups.items():
            # Create sliding windows of walking steps
            for i in range(len(session_data) - window_size + 1):
                # Option 1: Concatenate the walking steps into a single sequence
                # This creates a 2D tensor with shape [window_size * time_steps, num_sensors]
                steps = [data[0] for data in session_data[i:i+window_size]]
                window_X = np.vstack(steps)  # Stack vertically to create one long sequence
                
                # Use the label from the last step in the window
                window_y = session_data[i+window_size-1][1]
                
                self.X.append(window_X)
                self.y.append(window_y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.long)

### Load Data

In [4]:
X_original = np.load('sensor_data.npy')  # Shape: (num_steps, time_steps, num_sensors)
metadata = pd.read_csv('combined_metadata.csv')
y = metadata['has_ms'].values

### Split Data

In [5]:
# Split by session for sequence integrity while stratifying
sessions = metadata['session_id'].values
unique_sessions = np.unique(sessions)

# Create a mapping of session_id to MS status
session_to_ms_status = {}
for session_id in unique_sessions:
    # Get all rows for this session
    session_mask = metadata['session_id'] == session_id
    # If any row has MS, the whole session is labeled as MS
    has_ms = any(metadata.loc[session_mask, 'has_ms'] == 1)
    session_to_ms_status[session_id] = 1 if has_ms else 0

# Create lists of session IDs by MS status
ms_sessions = [s for s, status in session_to_ms_status.items() if status == 1]
non_ms_sessions = [s for s, status in session_to_ms_status.items() if status == 0]

# Perform stratified split on MS and non-MS sessions separately
train_ms, temp_ms = train_test_split(ms_sessions, test_size=0.3, random_state=42, shuffle=True)
train_non_ms, temp_non_ms = train_test_split(non_ms_sessions, test_size=0.3, random_state=42, shuffle=True)

# Further split temp sets into validation and test
val_ms, test_ms = train_test_split(temp_ms, test_size=0.5, random_state=42, shuffle=True)
val_non_ms, test_non_ms = train_test_split(temp_non_ms, test_size=0.5, random_state=42, shuffle=True)

# Combine MS and non-MS sessions for each split
train_sessions = train_ms + train_non_ms
val_sessions = val_ms + val_non_ms
test_sessions = test_ms + test_non_ms

train_indices = metadata['session_id'].isin(train_sessions)
val_indices = metadata['session_id'].isin(val_sessions)
test_indices = metadata['session_id'].isin(test_sessions)

In [6]:
# Downsample X to have 10 timesteps instead of 100
X = X_original.copy()#reshape(X_original.shape[0], 25, -1, X_original.shape[2]).mean(axis=2)

X_train, X_val, X_test = X[train_indices], X[val_indices], X[test_indices]
y_train, y_val, y_test = y[train_indices], y[val_indices], y[test_indices]

# Normalize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X.shape[2])).reshape(X_train.shape)
X_val = scaler.transform(X_val.reshape(-1, X.shape[2])).reshape(X_val.shape)
X_test = scaler.transform(X_test.reshape(-1, X.shape[2])).reshape(X_test.shape)

# Session IDs for reference
train_sessions_ids = sessions[train_indices]
val_sessions_ids = sessions[val_indices]
test_sessions_ids = sessions[test_indices]

window_size = 4

# Create datasets
train_dataset = SensorDataset(X_train, y_train, train_sessions_ids, window_size=window_size)
val_dataset = SensorDataset(X_val, y_val, val_sessions_ids, window_size=window_size)
test_dataset = SensorDataset(X_test, y_test, test_sessions_ids, window_size=window_size)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

In [149]:
class EMGCNN(nn.Module):
    def __init__(self, output_dim=2):
        super(EMGCNN, self).__init__()
        
        self.conv1 = nn.Conv1d(in_channels=7, out_channels=32, kernel_size=5, dilation=2, padding=4)
        self.bn1 = nn.BatchNorm1d(32)
        
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=5, dilation=2, padding=4)
        self.bn2 = nn.BatchNorm1d(64)
        
        self.conv3 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm1d(128)

        self.gap = nn.AdaptiveAvgPool1d(1)  # Global Average Pooling
        self.dropout = nn.Dropout(0.4)

        self.fc1 = nn.Linear(128, 64)
        self.bn_fc = nn.BatchNorm1d(64)
        self.fc2 = nn.Linear(64, output_dim)

        self.lstm = nn.LSTM(128, 64, batch_first=True, num_layers=2, bidirectional=True, dropout=0.3)
        self.layer_norm = nn.LayerNorm(64 * 2)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))

        x = x.permute(0, 2, 1)
        x, _ = self.lstm(x)
        x = self.layer_norm(x)
        x = x[:, -1, :]

        x = self.dropout(F.relu(self.bn_fc(self.fc1(x))))
        return self.fc2(x)

In [150]:
class SimpleMLP(nn.Module):
    def __init__(self, input_size, hidden_size=64, output_size=2, dropout_rate=0.5):
        super(SimpleMLP, self).__init__()
        
        self.model = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.BatchNorm1d(hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            
            nn.Linear(hidden_size, hidden_size),
            nn.BatchNorm1d(hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            
            nn.Linear(hidden_size, output_size)
        )
    
    def forward(self, x):
        # Flatten if not already flat
        if x.dim() > 2:
            x = x.view(x.size(0), -1)
        return self.model(x)

In [164]:
class SimpleCNN(nn.Module):
    def __init__(self, num_sensors=7, num_timesteps=10, window_size=4, output_size=2):
        super(SimpleCNN, self).__init__()
        
        # Calculate input size after reshape: [batch, sensors, timesteps * window_size]
        self.num_sensors = num_sensors
        self.timesteps_per_window = num_timesteps * window_size
        
        # Simple 1D convolution layers
        self.conv1 = nn.Conv1d(num_sensors, 16, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm1d(16)
        self.pool1 = nn.MaxPool1d(2)
        
        self.conv2 = nn.Conv1d(16, 32, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(32)
        self.pool2 = nn.MaxPool1d(2)
        
        # Calculate size after convolutions and pooling
        self.flat_size = 32 * (self.timesteps_per_window // 4)
        
        # Fully connected layers
        self.fc1 = nn.Linear(self.flat_size, 64)
        self.bn_fc = nn.BatchNorm1d(64)
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(64, output_size)
    
    def forward(self, x):
        # Reshape input: [batch, 1, sensors*timesteps*window] -> [batch, sensors, timesteps*window]
        x = x.view(-1, self.num_sensors, self.timesteps_per_window)
        
        # Apply convolutions
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
        
        # Flatten
        x = x.view(-1, self.flat_size)
        
        # Fully connected layers
        x = self.dropout(F.relu(self.bn_fc(self.fc1(x))))
        x = self.fc2(x)
        
        return x

class LSTMModel(nn.Module):
    def __init__(self, input_size=7, hidden_size=128, num_classes=2, num_layers=3, dropout=0.3, bidirectional=False):
        super(LSTMModel, self).__init__()
        self.lstm1 = nn.LSTM(
            input_size, 
            hidden_size, 
            num_layers=2, 
            batch_first=True, 
            bidirectional=bidirectional
        )

        self.bn1 = nn.BatchNorm1d(hidden_size * 2)

        self.lstm2 = nn.LSTM(
            hidden_size * 2, 
            hidden_size, 
            num_layers=2, 
            batch_first=True, 
            bidirectional=bidirectional
        )

        fc_input_size = hidden_size * 2 if bidirectional else hidden_size
        self.fc1 = nn.Linear(fc_input_size, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc2 = nn.Linear(128, 64)
        self.bn3 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, num_classes)
        
    def forward(self, x):
        lstm1_out, _ = self.lstm1(x)
        _, seq_len, _ = lstm1_out.size()
        lstm1_out_reshaped = lstm1_out[:, -1, :].contiguous()
        lstm1_out_bn = self.bn1(lstm1_out_reshaped)
        lstm1_out_expanded = lstm1_out_bn.unsqueeze(1).expand(-1, seq_len, -1)
        
        # Second LSTM layer
        lstm2_out, _ = self.lstm2(lstm1_out_expanded)
        x = lstm2_out[:, -1, :]
        
        x = self.fc1(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.bn3(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        
        return x

In [7]:
def train(model, train_loader, criterion, optimizer, epochs=20, scheduler=None):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            optimizer.zero_grad()
            output = model(X_batch)
            loss = criterion(output, y_batch)
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f'Epoch {epoch + 1}, Loss: {avg_loss:.4f}')

        if scheduler is not None:
            scheduler.step(avg_loss) 

In [8]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction
        
    def forward(self, inputs, targets):
        CE_loss = F.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-CE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * CE_loss
        
        if self.reduction == 'mean':
            return torch.mean(F_loss)
        else:
            return F_loss

In [167]:
ms_weight = len(y_train) / (2 * np.sum(y_train == 1))
ms_weight = ms_weight * 0.9

weights = torch.tensor([1.0, ms_weight], dtype=torch.float32).to(device)

# Setup for your data dimensions
num_sensors = 7
timesteps_per_step = 10
window_size = 3

# Calculate total input size
input_size = num_sensors * timesteps_per_step * window_size

# Create the MLP with the correct input size
model = LSTMModel(bidirectional=True).to(device)
criterion = FocalLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

# Train and evaluate
train(model, train_loader, criterion, optimizer, epochs=50, scheduler=scheduler)

Epoch 1, Loss: 0.1756
Epoch 2, Loss: 0.1611
Epoch 3, Loss: 0.1587
Epoch 4, Loss: 0.1582
Epoch 5, Loss: 0.1576
Epoch 6, Loss: 0.1578
Epoch 7, Loss: 0.1577
Epoch 8, Loss: 0.1577
Epoch 9, Loss: 0.1572
Epoch 10, Loss: 0.1567
Epoch 11, Loss: 0.1566
Epoch 12, Loss: 0.1566
Epoch 13, Loss: 0.1571
Epoch 14, Loss: 0.1587
Epoch 15, Loss: 0.1593
Epoch 16, Loss: 0.1586
Epoch 17, Loss: 0.1592
Epoch 18, Loss: 0.1591
Epoch 19, Loss: 0.1584
Epoch 20, Loss: 0.1572
Epoch 21, Loss: 0.1577
Epoch 22, Loss: 0.1574
Epoch 23, Loss: 0.1566
Epoch 24, Loss: 0.1564
Epoch 25, Loss: 0.1556
Epoch 26, Loss: 0.1553
Epoch 27, Loss: 0.1560
Epoch 28, Loss: 0.1557
Epoch 29, Loss: 0.1549
Epoch 30, Loss: 0.1548
Epoch 31, Loss: 0.1544
Epoch 32, Loss: 0.1549
Epoch 33, Loss: 0.1536
Epoch 34, Loss: 0.1536
Epoch 35, Loss: 0.1537
Epoch 36, Loss: 0.1528
Epoch 37, Loss: 0.1522
Epoch 38, Loss: 0.1532
Epoch 39, Loss: 0.1525
Epoch 40, Loss: 0.1523
Epoch 41, Loss: 0.1533
Epoch 42, Loss: 0.1548
Epoch 43, Loss: 0.1524
Epoch 44, Loss: 0.15

In [8]:
def evaluate(model, val_loader):
    model.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            output = model(X_batch)
            probs = torch.softmax(output, dim=1)[:, 1]
            predicted = (probs > 0.5).int()
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(y_batch.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    prec = precision_score(all_labels, all_preds)
    rec = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)

    print(f'Accuracy: {acc:.2f}')
    print(f'Precision: {prec:.2f}')
    print(f'Recall: {rec:.2f}')
    print(f'F1 Score: {f1:.2f}')

In [169]:
evaluate(model, val_loader)

Accuracy: 0.63
Precision: 1.00
Recall: 0.05
F1 Score: 0.09


### Traditional

In [None]:
import numpy as np
from scipy import stats
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
import xgboost as xgb
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

def extract_features(dataset):
    """Extract statistical features from time series data."""
    features = []
    labels = []
    
    for i in range(len(dataset)):
        X, y = dataset[i]
        X = X.numpy()  # Convert from tensor to numpy
        
        # For each sequence, compute statistical features
        sequence_features = []
        
        # Statistical features for each sensor
        for j in range(X.shape[1]):  # For each sensor
            sensor_data = X[:, j]
            
            # Basic statistics
            sequence_features.extend([
                np.mean(sensor_data),        # Mean
                np.std(sensor_data),         # Standard deviation
                np.min(sensor_data),         # Minimum
                np.max(sensor_data),         # Maximum
                np.median(sensor_data),      # Median
                stats.skew(sensor_data),     # Skewness
                stats.kurtosis(sensor_data), # Kurtosis
                np.percentile(sensor_data, 25),  # 25th percentile
                np.percentile(sensor_data, 75),  # 75th percentile
                np.ptp(sensor_data),         # Range (peak-to-peak)
            ])
        
        # Add features for trends/dynamics
        for j in range(X.shape[1]):  # For each sensor
            sensor_data = X[:, j]
            if len(sensor_data) > 5:  # Need sufficient data points
                # Linear trend
                from scipy import signal
                detrended = signal.detrend(sensor_data)
                trend = sensor_data - detrended
                sequence_features.append(np.mean(trend))
                
                # First and second derivatives
                first_diff = np.diff(sensor_data)
                sequence_features.extend([
                    np.mean(np.abs(first_diff)),   # Mean absolute change
                    np.std(first_diff),            # Variability of change
                ])
                
                if len(first_diff) > 1:
                    second_diff = np.diff(first_diff)
                    sequence_features.append(np.mean(np.abs(second_diff)))  # Acceleration
        
        features.append(sequence_features)
        labels.append(y.item())  # Convert from tensor to scalar
    
    return np.array(features), np.array(labels)

# Extract features
X_train_features, y_train = extract_features(train_dataset)
X_val_features, y_val = extract_features(val_dataset)
X_test_features, y_test = extract_features(test_dataset)

print(f"Extracted {X_train_features.shape[1]} features per sequence")

Extracted 98 features per sequence


In [None]:
# Train and evaluate different models
models = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced'),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42),
    "XGBoost": xgb.XGBClassifier(
            n_estimators=100,
            learning_rate=0.1,
            max_depth=5,
            subsample=0.8,
            colsample_bytree=0.8,
            objective='binary:logistic',
            random_state=42
        )
}

results = {}

for name, model in models.items():
    print(f"\nTraining {name}...")
    
    # Special handling for XGBoost to use early stopping
    model.fit(X_train_features, y_train)
    
    # Predict on validation set
    y_val_pred = model.predict(X_val_features)
    
    # Calculate metrics
    accuracy = accuracy_score(y_val, y_val_pred)
    precision = precision_score(y_val, y_val_pred)
    recall = recall_score(y_val, y_val_pred)
    f1 = f1_score(y_val, y_val_pred)
    
    results[name] = {
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1
    }
    
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 Score: {f1:.2f}")
    print("\nClassification Report:")
    print(classification_report(y_val, y_val_pred))
            

In [17]:
X_combined = np.vstack((X_train_features, X_val_features))
y_combined = np.concatenate((y_train, y_val))

# Best parameters: {'learning_rate': 0.2, 'max_depth': 3, 'min_samples_split': 2, 'n_estimators': 100, 'subsample': 1.0}
optimized_gb = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.2,
    max_depth=3,
    min_samples_split=2,
    random_state=42
)

optimized_gb.fit(X_combined, y_combined)
y_test_pred = optimized_gb.predict(X_test_features)

# Calculate metrics
accuracy = accuracy_score(y_test, y_test_pred)
precision = precision_score(y_test, y_test_pred)
recall = recall_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_test_pred))

Accuracy: 0.8959
Precision: 0.8573
Recall: 0.8256
F1 Score: 0.8412

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.93      0.92      1509
           1       0.86      0.83      0.84       757

    accuracy                           0.90      2266
   macro avg       0.89      0.88      0.88      2266
weighted avg       0.90      0.90      0.90      2266



In [None]:
from sklearn.model_selection import StratifiedKFold, cross_validate

# Combine training and validation data
X_combined = np.vstack((X_train_features, X_val_features))
y_combined = np.concatenate((y_train, y_val))

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

cv_results = cross_validate(
    optimized_gb, 
    X_combined, 
    y_combined, 
    cv=cv,
    scoring=['accuracy', 'precision', 'recall', 'f1']
)

# Print results
print("Cross-Validation Results (5 folds):")
print(f"Accuracy: {cv_results['test_accuracy'].mean():.4f} ± {cv_results['test_accuracy'].std():.4f}")
print(f"Precision: {cv_results['test_precision'].mean():.4f} ± {cv_results['test_precision'].std():.4f}")
print(f"Recall: {cv_results['test_recall'].mean():.4f} ± {cv_results['test_recall'].std():.4f}")
print(f"F1 Score: {cv_results['test_f1'].mean():.4f} ± {cv_results['test_f1'].std():.4f}")

Cross-Validation Results (5 folds):
Accuracy: 0.9980 ± 0.0007
Precision: 0.9974 ± 0.0023
Recall: 0.9971 ± 0.0023
F1 Score: 0.9972 ± 0.0009


#### GB Regular CV:
Accuracy: 0.8883 ± 0.0505
Precision: 0.8854 ± 0.0345
Recall: 0.7929 ± 0.1382
F1 Score: 0.8308 ± 0.0894

In [230]:
import pickle

# Save the model to a file
with open('optimized_gb_model.pkl', 'wb') as file:
    pickle.dump(optimized_gb, file)

print("Model saved to 'optimized_gb_model.pkl'")

Model saved to 'optimized_gb_model.pkl'


In [None]:
import numpy as np
from sklearn.ensemble import BaggingClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Combine training and validation sets
X_combined = np.vstack((X_train_features, X_val_features))
y_combined = np.concatenate((y_train, y_val))

# Define the base estimator with your tuned parameters
base_gb = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.2,
    max_depth=3,
    min_samples_split=2,
    random_state=42
)

# Create an ensemble of multiple instances of your GradientBoostingClassifier
ensemble_gb = BaggingClassifier(
    estimator=base_gb,
    n_estimators=5,          # number of copies
    max_samples=0.8,         # each model trains on 80% of the combined data (bootstrapped)
    bootstrap=True,
    random_state=42
)

# Train the ensemble on the combined training set
ensemble_gb.fit(X_combined, y_combined)

# Evaluate on the test set
y_test_pred = ensemble_gb.predict_proba(X_test_features)[:, 1]
y_test_pred = (y_test_pred >= 0.5).astype(int)

accuracy = accuracy_score(y_test, y_test_pred)
precision = precision_score(y_test, y_test_pred)
recall = recall_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred)

print(f"Ensemble Accuracy: {accuracy:.4f}")
print(f"Ensemble Precision: {precision:.4f}")
print(f"Ensemble Recall: {recall:.4f}")
print(f"Ensemble F1 Score: {f1:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_test_pred))

Ensemble Accuracy: 0.9095
Ensemble Precision: 0.8710
Ensemble Recall: 0.8560
Ensemble F1 Score: 0.8634

Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.94      0.93      1509
           1       0.87      0.86      0.86       757

    accuracy                           0.91      2266
   macro avg       0.90      0.90      0.90      2266
weighted avg       0.91      0.91      0.91      2266



In [None]:
y_test_pred = ensemble_gb.predict_proba(X_test_features)[:, 1]
y_test_pred = (y_test_pred >= 0.5).astype(int)

accuracy = accuracy_score(y_test, y_test_pred)
precision = precision_score(y_test, y_test_pred)
recall = recall_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred)

print(f"Ensemble Accuracy: {accuracy:.4f}")
print(f"Ensemble Precision: {precision:.4f}")
print(f"Ensemble Recall: {recall:.4f}")
print(f"Ensemble F1 Score: {f1:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_test_pred))

Ensemble Accuracy: 0.9210
Ensemble Precision: 0.8595
Ensemble Recall: 0.9128
Ensemble F1 Score: 0.8853

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.93      0.94      1509
           1       0.86      0.91      0.89       757

    accuracy                           0.92      2266
   macro avg       0.91      0.92      0.91      2266
weighted avg       0.92      0.92      0.92      2266

