In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score, confusion_matrix
from imblearn.metrics import geometric_mean_score
from sklearn.model_selection import train_test_split
from metric_learn import MLKR
from scipy.optimize import differential_evolution

def construct_data_blocks(train_df):
    target_column = train_df.columns[0]
    class_counts = train_df[target_column].value_counts()
    Smin = train_df[train_df[target_column] == class_counts.idxmin()]
    Smaj = train_df[train_df[target_column] == class_counts.idxmax()]
    Nmin, Nmaj = len(Smin), len(Smaj)
    ir = round(Nmaj / Nmin)
    Smaj = Smaj.sample(frac=1, random_state=42).reset_index(drop=True)
    chunks = [[] for _ in range(ir)]
    for i, row in Smaj.iterrows():
        chunks[i % ir].append(row)
    data_blocks = [pd.concat([pd.DataFrame(chunk, columns=train_df.columns), Smin], ignore_index=True) for chunk in chunks]
    return data_blocks, target_column

def train_mlkr(X_train, y_train):
    mlkr = MLKR()
    mlkr.fit(X_train, y_train)
    return mlkr

def train_random_forest(X_train, y_train):
    rf = RandomForestClassifier(n_estimators=100, random_state=42)
    rf.fit(X_train, y_train)
    return rf

def train_xgboost(X_train, y_train):
    xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
    xgb.fit(X_train, y_train)
    return xgb

def train_svm(X_train, y_train):
    svm = SVC(probability=True, random_state=42)
    svm.fit(X_train, y_train)
    return svm

def train_knn(X_train, y_train):
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X_train, y_train)
    return knn

def train_logistic_regression(X_train, y_train):
    lr = LogisticRegression(max_iter=1000, random_state=42)
    lr.fit(X_train, y_train)
    return lr

def train_gradient_boosting(X_train, y_train):
    gb = GradientBoostingClassifier(n_estimators=100, random_state=42)
    gb.fit(X_train, y_train)
    return gb

def train_mlp(X_train, y_train):
    class MLP(nn.Module):
        def __init__(self, input_size):
            super(MLP, self).__init__()
            self.fc1 = nn.Linear(input_size, 16)
            self.relu = nn.ReLU()
            self.fc2 = nn.Linear(16, 1)
            self.sigmoid = nn.Sigmoid()
        def forward(self, x):
            x = self.relu(self.fc1(x))
            x = self.sigmoid(self.fc2(x))
            return x

    input_size = X_train.shape[1]
    model = MLP(input_size)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
    dataset = TensorDataset(X_train_tensor, y_train_tensor)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

    for epoch in range(50):
        for batch_X, batch_y in dataloader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

    return model

def evaluate(y_true, y_pred, y_proba):
    return {
        "Precision": precision_score(y_true, y_pred),
        "Recall": recall_score(y_true, y_pred),
        "F1 Score": f1_score(y_true, y_pred),
        "G-Mean": geometric_mean_score(y_true, y_pred),
        "ROC AUC": roc_auc_score(y_true, y_proba),
        "Accuracy": accuracy_score(y_true, y_pred),
        "Confusion Matrix": confusion_matrix(y_true, y_pred)
    }

df = pd.read_csv("Train.csv")
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
data_blocks, target_column = construct_data_blocks(train_df)

rf_models, mlp_models, xgb_models, svm_models, knn_models, lr_models, gb_models = [], [], [], [], [], [], []
X_test = test_df.drop(columns=[target_column]).values
y_test = test_df[target_column].values

for block in data_blocks:
    X, y = block.drop(columns=[target_column]).values, block[target_column].values
    mlkr = train_mlkr(X, y)
    X_train_transformed = mlkr.transform(X)

    rf_models.append(train_random_forest(X_train_transformed, y))
    xgb_models.append(train_xgboost(X_train_transformed, y))
    svm_models.append(train_svm(X_train_transformed, y))
    mlp_models.append(train_mlp(X_train_transformed, y))
    knn_models.append(train_knn(X_train_transformed, y))
    lr_models.append(train_logistic_regression(X_train_transformed, y))
    gb_models.append(train_gradient_boosting(X_train_transformed, y))

y_proba_rf = np.mean([rf.predict_proba(X_test)[:, 1] for rf in rf_models], axis=0)
y_proba_xgb = np.mean([xgb.predict_proba(X_test)[:, 1] for xgb in xgb_models], axis=0)
y_proba_svm = np.mean([svm.predict_proba(X_test)[:, 1] for svm in svm_models], axis=0)
y_proba_knn = np.mean([knn.predict_proba(X_test)[:, 1] for knn in knn_models], axis=0)
y_proba_lr = np.mean([lr.predict_proba(X_test)[:, 1] for lr in lr_models], axis=0)
y_proba_gb = np.mean([gb.predict_proba(X_test)[:, 1] for gb in gb_models], axis=0)

def optimize_weights(weights):
    weights = np.clip(weights, 0, 1)
    weights /= np.sum(weights)
    model_probs = np.array([y_proba_rf, y_proba_xgb, y_proba_mlp, y_proba_svm, y_proba_knn, y_proba_lr, y_proba_gb])
    y_proba = np.dot(weights, model_probs)
    y_pred = np.round(y_proba)
    return -f1_score(y_test, y_pred)

bounds = [(0, 1)] * 7  # Seven models
result = differential_evolution(optimize_weights, bounds, strategy='best1bin', maxiter=1000)
optimal_weights = result.x / np.sum(result.x)  # Normalize

y_proba = np.dot(optimal_weights, np.array([y_proba_rf, y_proba_xgb, y_proba_mlp, y_proba_svm, y_proba_knn, y_proba_lr, y_proba_gb]))
y_pred = np.round(y_proba)

metrics = evaluate(y_test, y_pred, y_proba)
print("Optimal Weights:", optimal_weights)
print(metrics)

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encode

Optimal Weights: [0.00466651 0.33135146 0.40227728 0.00604525 0.10091342 0.11190012
 0.04284596]
{'Precision': 0.34328358208955223, 'Recall': 0.696969696969697, 'F1 Score': 0.46, 'G-Mean': np.float64(0.8173029975005109), 'ROC AUC': np.float64(0.9430314487025262), 'Accuracy': 0.9505041246562786, 'Confusion Matrix': array([[1014,   44],
       [  10,   23]])}
