In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [14]:
df = pd.read_csv('4.3-RDKit3D_scaled_aug_data.csv')

In [15]:
X = df.drop('values', axis=1)
y = df['values']

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state=42)

# **ensembl**

In [None]:
import joblib
import numpy as np
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Load saved models
rf_model = joblib.load("3-3DRdkit_RF_model.pkl")
xgb_model = joblib.load("3-3DRdkit_xgb_model.pkl")
svm_model = joblib.load("3-3DRdkit_svm_model.pkl")

# Create Ensemble Model (Soft Voting)
ensemble_model = VotingClassifier(
    estimators=[
        ("rf", rf_model),
        ("xgb", xgb_model),
        ("svm", svm_model),
    ],
    voting="soft"
)

# Train the ensemble model
ensemble_model.fit(X_train, y_train)

# Make predictions
y_pred_ensemble = ensemble_model.predict(X_test)

# Compute evaluation metrics
accuracy = accuracy_score(y_test, y_pred_ensemble)
precision = precision_score(y_test, y_pred_ensemble)
recall = recall_score(y_test, y_pred_ensemble)
f1 = f1_score(y_test, y_pred_ensemble)
roc_auc = roc_auc_score(y_test, ensemble_model.predict_proba(X_test)[:, 1])  # AUC for probabilities

# Print evaluation results
print(f"Ensemble Model Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"ROC AUC: {roc_auc:.4f}")


# **stacking ensembl**

In [None]:
import joblib
import numpy as np
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Load saved models
rf_model = joblib.load("3-3DRdkit_RF_model.pkl")
xgb_model = joblib.load("3-3DRdkit_xgb_model.pkl")
svm_model = joblib.load("3-3DRdkit_svm_model.pkl")

# Define base models for stacking
base_models = [
    ("rf", rf_model),
    ("xgb", xgb_model),
    ("svm", svm_model)
]

# Define meta-model (Logistic Regression)
meta_model = LogisticRegression()

# Create StackingClassifier
stacked_model = StackingClassifier(
    estimators=base_models,
    final_estimator=meta_model,  # Meta-model for final prediction
    passthrough=True,  # Pass original features along with base model predictions
    cv=5,  # 5-fold cross-validation for robustness
    n_jobs=-1
)

# Train the stacking model
stacked_model.fit(X_train, y_train)

# Make predictions
y_pred_stacked = stacked_model.predict(X_test)

# Compute evaluation metrics
accuracy = accuracy_score(y_test, y_pred_stacked)
precision = precision_score(y_test, y_pred_stacked)
recall = recall_score(y_test, y_pred_stacked)
f1 = f1_score(y_test, y_pred_stacked)
roc_auc = roc_auc_score(y_test, stacked_model.predict_proba(X_test)[:, 1])  # AUC for probabilities

# Print evaluation results
print(f"Stacking Ensemble Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"ROC AUC: {roc_auc:.4f}")


In [None]:
import joblib
import numpy as np
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Load saved models
rf_model = joblib.load("3-3DRdkit_RF_model.pkl")
xgb_model = joblib.load("3-3DRdkit_xgb_model.pkl")
svm_model = joblib.load("3-3DRdkit_svm_model.pkl")

# Define base models for stacking
base_models = [
    ("rf", rf_model),
    ("xgb", xgb_model),
    ("svm", svm_model)
]

# Define meta-model (Logistic Regression)
meta_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Create StackingClassifier
stacked_model = StackingClassifier(
    estimators=base_models,
    final_estimator=meta_model,  # Meta-model for final prediction
    passthrough=True,  # Pass original features along with base model predictions
    cv=5,  # 5-fold cross-validation for robustness
    n_jobs=-1
)

# Train the stacking model
stacked_model.fit(X_train, y_train)

# Make predictions
y_pred_stacked = stacked_model.predict(X_test)

# Compute evaluation metrics
accuracy = accuracy_score(y_test, y_pred_stacked)
precision = precision_score(y_test, y_pred_stacked)
recall = recall_score(y_test, y_pred_stacked)
f1 = f1_score(y_test, y_pred_stacked)
roc_auc = roc_auc_score(y_test, stacked_model.predict_proba(X_test)[:, 1])  # AUC for probabilities

# Print evaluation results
print(f"Stacking Ensemble Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"ROC AUC: {roc_auc:.4f}")


In [None]:
import joblib
import numpy as np
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Load saved models
rf_model = joblib.load("3-3DRdkit_RF_model.pkl")
xgb_model = joblib.load("3-3DRdkit_xgb_model.pkl")
svm_model = joblib.load("3-3DRdkit_svm_model.pkl")

# Define base models for stacking
base_models = [
    ("rf", rf_model),
    ("xgb", xgb_model),
    ("svm", svm_model)
]

# Use a Neural Network (MLP) as meta-model
from sklearn.neural_network import MLPClassifier
meta_model = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=500)

# Create StackingClassifier
stacked_model = StackingClassifier(
    estimators=base_models,
    final_estimator=meta_model,  # Meta-model for final prediction
    passthrough=True,  # Pass original features along with base model predictions
    cv=5,  # 5-fold cross-validation for robustness
    n_jobs=-1
)

# Train the stacking model
stacked_model.fit(X_train, y_train)

# Make predictions
y_pred_stacked = stacked_model.predict(X_test)

# Compute evaluation metrics
accuracy = accuracy_score(y_test, y_pred_stacked)
precision = precision_score(y_test, y_pred_stacked)
recall = recall_score(y_test, y_pred_stacked)
f1 = f1_score(y_test, y_pred_stacked)
roc_auc = roc_auc_score(y_test, stacked_model.predict_proba(X_test)[:, 1])  # AUC for probabilities

# Print evaluation results
print(f"Stacking Ensemble Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"ROC AUC: {roc_auc:.4f}")


In [None]:
import joblib
import numpy as np
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from xgboost import XGBClassifier
# Load saved models
rf_model = joblib.load("3-3DRdkit_RF_model.pkl")
xgb_model = joblib.load("3-3DRdkit_xgb_model.pkl")
svm_model = joblib.load("3-3DRdkit_svm_model.pkl")

# Define base models for stacking
base_models = [
    ("rf", rf_model),
    ("xgb", xgb_model),
    ("svm", svm_model)
]

# Use XGBoost as meta-model
meta_model = XGBClassifier(n_estimators=100, learning_rate=0.05)

# Create StackingClassifier
stacked_model = StackingClassifier(
    estimators=base_models,
    final_estimator=meta_model,  # Meta-model for final prediction
    passthrough=True,  # Pass original features along with base model predictions
    cv=5,  # 5-fold cross-validation for robustness
    n_jobs=-1
)

# Train the stacking model
stacked_model.fit(X_train, y_train)

# Make predictions
y_pred_stacked = stacked_model.predict(X_test)

# Compute evaluation metrics
accuracy = accuracy_score(y_test, y_pred_stacked)
precision = precision_score(y_test, y_pred_stacked)
recall = recall_score(y_test, y_pred_stacked)
f1 = f1_score(y_test, y_pred_stacked)
roc_auc = roc_auc_score(y_test, stacked_model.predict_proba(X_test)[:, 1])  # AUC for probabilities

# Print evaluation results
print(f"Stacking Ensemble Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")
print(f"ROC AUC: {roc_auc:.4f}")
