In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import  GradientBoostingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load your dataset
data = pd.read_csv("water_potability.csv")

# Handle missing values by filling with column means
numerical_cols = data.select_dtypes(include=['number']).columns
data[numerical_cols] = data[numerical_cols].fillna(data[numerical_cols].mean())

# Assuming 'Potability' is the target variable, and other columns are features
X = data.drop(columns=['Potability'])
y = data['Potability']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Base models

gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
lr_model = LogisticRegression()

# Fit the base models

gb_model.fit(X_train, y_train)
lr_model.fit(X_train, y_train)

# Make predictions on the test set

gb_preds = gb_model.predict(X_test)
lr_preds = lr_model.predict(X_test)

# Weighted Averaging
weighted_avg_preds = ( 0.4 * gb_preds + 0.2 * lr_preds)

# Stacking
stacked_model = StackingClassifier(estimators=[ ('gb', gb_model), ('lr', lr_model)])
stacked_model.fit(X_train, y_train)
stacked_preds = stacked_model.predict(X_test)

# Blending
blend_preds = ( gb_preds) / 2

# Boosting (using GradientBoostingClassifier as an example)
boosted_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
boosted_model.fit(X_train, y_train)
boosted_preds = boosted_model.predict(X_test)

import numpy as np  # Import NumPy for array operations

# ...

# Convert weighted_avg_preds to binary predictions
threshold = 0.5  # You can adjust the threshold as needed
binary_weighted_avg_preds = np.where(weighted_avg_preds >= threshold, 1, 0)

# Convert blend_preds to binary predictions
binary_blend_preds = np.where(blend_preds >= threshold, 1, 0)

# ...

# Evaluate the models with binary predictions
print("Weighted Averaging Accuracy:", accuracy_score(y_test, binary_weighted_avg_preds))
print("Stacking Accuracy:", accuracy_score(y_test, stacked_preds))
print("Blending Accuracy:", accuracy_score(y_test, binary_blend_preds))
print("Boosting Accuracy:", accuracy_score(y_test, boosted_preds))



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Weighted Averaging Accuracy: 0.7125
Stacking Accuracy: 0.75
Blending Accuracy: 0.7375
Boosting Accuracy: 0.7375
