In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB #Example additional base model

# Load your dataset (replace 'your_data.csv' with your actual file path)
data = pd.read_csv('C:/Users/abdulssekyanzi/EDA Dataset.csv/100.csv')

# Assuming your target variable is in a separate file or column.
# For demonstration purposes, I'll create a dummy target.
# You'll need to replace this with your actual target variable.
# Example: If your target is in a 'target.csv' file:
# target_data = pd.read_csv('target.csv')
# y = target_data['target_column']



In [10]:
#Example of creating a dummy target, replace with your actual target!
import numpy as np
y = np.random.randint(0, 2, size=len(data)) #Binary classification example.

# Select features (time_ms, MLII, V5)
X = data[['time_ms', 'MLII', 'V5']]

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)



In [None]:
# Base Models
rf = RandomForestClassifier(n_estimators=100, random_state=42)
gb = GradientBoostingClassifier(random_state=42)
nb = GaussianNB() #Example additional model

# Train base models and generate out-of-fold predictions
rf_oof_predictions = cross_val_predict(rf, X_train, y_train, cv=5) #5 fold cross validation
gb_oof_predictions = cross_val_predict(gb, X_train, y_train, cv=5)
nb_oof_predictions = cross_val_predict(nb, X_train, y_train, cv=5)

# Create the meta-features (predictions from base models)
meta_features = pd.DataFrame({
    'rf_predictions': rf_oof_predictions,
    'gb_predictions': gb_oof_predictions,
    'nb_predictions': nb_oof_predictions,
})



In [4]:
# Train the meta-model (Logistic Regression in this example)
meta_model = LogisticRegression(random_state=42)
meta_model.fit(meta_features, y_train)

# Generate predictions on the test set from the base models
rf.fit(X_train, y_train)
gb.fit(X_train, y_train)
nb.fit(X_train, y_train)

rf_test_predictions = rf.predict(X_test)
gb_test_predictions = gb.predict(X_test)
nb_test_predictions = nb.predict(X_test)

# Create meta-features for the test set
test_meta_features = pd.DataFrame({
    'rf_predictions': rf_test_predictions,
    'gb_predictions': gb_test_predictions,
    'nb_predictions': nb_test_predictions,
})

# Make final predictions using the meta-model
final_predictions = meta_model.predict(test_meta_features)



In [5]:
# Evaluate the model
print("Stacking Model Results:")
print(classification_report(y_test, final_predictions))
print("Accuracy:", accuracy_score(y_test, final_predictions))



Stacking Model Results:
              precision    recall  f1-score   support

           0       0.50      0.73      0.60     64859
           1       0.50      0.27      0.35     65141

    accuracy                           0.50    130000
   macro avg       0.50      0.50      0.47    130000
weighted avg       0.50      0.50      0.47    130000

Accuracy: 0.5009461538461538


In [6]:
#Optional: Print base model results for comparison.
print("\nBase Model Results (Random Forest):")
print(classification_report(y_test, rf_test_predictions))
print("Accuracy:", accuracy_score(y_test, rf_test_predictions))



Base Model Results (Random Forest):
              precision    recall  f1-score   support

           0       0.50      0.51      0.50     64859
           1       0.50      0.50      0.50     65141

    accuracy                           0.50    130000
   macro avg       0.50      0.50      0.50    130000
weighted avg       0.50      0.50      0.50    130000

Accuracy: 0.5016615384615385


In [7]:

print("\nBase Model Results (Gradient Boosting):")
print(classification_report(y_test, gb_test_predictions))
print("Accuracy:", accuracy_score(y_test, gb_test_predictions))




Base Model Results (Gradient Boosting):
              precision    recall  f1-score   support

           0       0.50      0.67      0.57     64859
           1       0.50      0.33      0.40     65141

    accuracy                           0.50    130000
   macro avg       0.50      0.50      0.48    130000
weighted avg       0.50      0.50      0.48    130000

Accuracy: 0.4996846153846154


In [8]:
print("\nBase Model Results (Naive Bayes):")
print(classification_report(y_test, nb_test_predictions))
print("Accuracy:", accuracy_score(y_test, nb_test_predictions))


Base Model Results (Naive Bayes):
              precision    recall  f1-score   support

           0       0.50      0.15      0.23     64859
           1       0.50      0.85      0.63     65141

    accuracy                           0.50    130000
   macro avg       0.50      0.50      0.43    130000
weighted avg       0.50      0.50      0.43    130000

Accuracy: 0.500523076923077
