In [18]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from sklearn.utils import resample
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from scipy.stats import mode
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [19]:
def telco_preprocess():
    dataFrame = pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')
    

    dataFrame['TotalCharges'] = pd.to_numeric(dataFrame['TotalCharges'], errors='coerce')
    

    dataFrame['TotalCharges'] = dataFrame['TotalCharges'].fillna(dataFrame['TotalCharges'].mean())
    

    dataFrame = dataFrame.dropna(subset=['TotalCharges'])
    

    labels = dataFrame['Churn'].map({'Yes': 1, 'No': 0})
    

    features = dataFrame.drop(columns=['Churn'])
    

    X_train, X_test, y_train, y_test = train_test_split(
        features, labels, test_size=0.2, random_state=1, stratify=labels
    )
    

    scaler = StandardScaler()
    

    numeric_columns = X_train.select_dtypes(include=['int64', 'float64']).columns
    X_train[numeric_columns] = scaler.fit_transform(X_train[numeric_columns])
    X_test[numeric_columns] = scaler.transform(X_test[numeric_columns])
    

    X_train = pd.get_dummies(X_train, drop_first=True)
    X_test = pd.get_dummies(X_test, drop_first=True)
    

    X_train, X_test = X_train.align(X_test, join='left', axis=1, fill_value=0)
    
 
    X_train = X_train.values.astype(np.float64)
    X_test = X_test.values.astype(np.float64)
    y_train = y_train.values.astype(np.float64)
    y_test = y_test.values.astype(np.float64)
    
    return X_train, y_train, X_test, y_test


In [20]:
def creditcard():
            dataFrame=pd.read_csv('creditcard.csv')
           
            features=dataFrame.drop(columns=['Class'])
            labels=dataFrame['Class']
            
            X_train, X_test, y_train, y_test = train_test_split(features, labels,
                                                    test_size=0.2,
                                                    random_state=1,
                                                    stratify=labels)
            scaler=StandardScaler()
            columns=X_train.columns
            X_train[columns]=scaler.fit_transform(X_train[columns])
            X_test[columns]=scaler.transform(X_test[columns])
            return X_train,y_train,X_test,y_test


In [21]:
class LogisticRegression():
    
    def __init__(self):
        self.weights = None
    
    def sigmoid(self, z):
        #print("z's value in sigmoid func ")
        #print(z)
        return 1 / (1 + np.exp(-z))  
    
    def computecost(self, X, y):
        h = self.sigmoid(np.dot(X, self.weights))
        epsilon = 1e-5 
        cost = -(1 / len(y)) * np.sum(y * np.log(h + epsilon) + (1 - y) * np.log(1 - h + epsilon))
        return cost
    
    def gradient_descent(self, X, y, learning_rate, iterations):
        m = len(y)
        cost_history = []
        
        for i in range(iterations):
            h = self.sigmoid(np.dot(X, self.weights))
            gradient = np.dot(X.T, (h - y)) / m
            self.weights -= learning_rate * gradient  
            cost = self.computecost(X, y)
            cost_history.append(cost)
        
        return self.weights, cost_history
    
    def fit(self, X, y, learning_rate=0.01, iterations=1000):
        if X.ndim == 1:
            X = X.reshape(-1, 1)  
        self.weights = np.zeros(X.shape[1], dtype=float) 
        self.weights, cost_history = self.gradient_descent(X, y, learning_rate, iterations)
        return self.weights
    
    def predict(self, X):
        predictions = self.sigmoid(np.dot(X, self.weights))
        return np.where(predictions >= 0.5, 1, 0)



In [22]:
from sklearn.metrics import confusion_matrix, roc_auc_score, precision_recall_curve, auc

def calculate_metrics(y_true, y_pred, y_prob):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    
    specificity = tn / (tn + fp)
    precision = tp / (tp + fp)
    accuracy = (tp + tn) / (tp + tn + fp + fn)
    sensitivity = tp / (tp + fn)
    f1 = 2 * (precision * sensitivity) / (precision + sensitivity) if (precision + sensitivity) > 0 else 0
    auc_score = roc_auc_score(y_true, y_prob)

    precision_vals, recall_vals, _ = precision_recall_curve(y_true, y_prob)
    aupr = auc(recall_vals, precision_vals)

    return accuracy, sensitivity, specificity, precision, f1, auc_score, aupr


In [23]:
def bagging():
    
    try:
        
        X_train, y_train, X_test, y_test = creditcard()

        
        base_models = []
        metrics = []
        
        
        for i in range(9):
            lr=LogisticRegression()
            X_sample, y_sample = resample(X_train, y_train, replace=True, random_state=i)
            
            lr.fit(X_sample, y_sample)
            
            base_models.append(lr)
            
            y_pred = lr.predict(X_test)
            
            y_pred_proba = lr.sigmoid(np.dot(X_test, lr.weights))
            
            model_metrics = calculate_metrics(y_test, y_pred, y_pred_proba)
            metrics.append(model_metrics)
            
    except Exception as e:
        print(f"Error occurred in preprocessing: {e}")
    
    avg_metrics = np.mean(metrics, axis=0)
    
    print("Returning models and test data")
    return base_models, X_train, X_test, y_train, y_test, avg_metrics


In [24]:
base_models, X_train, X_test, y_train, y_test, avg_metrics = bagging()

if avg_metrics is not None:
    print("Average Performance Metrics:")
    print(f"Accuracy: {avg_metrics[0]}")
    print(f"Sensitivity: {avg_metrics[1]}")
    print(f"Specificity: {avg_metrics[2]}")
    print(f"Precision: {avg_metrics[3]}")
    print(f"F1-score: {avg_metrics[4]}")
    print(f"AUC: {avg_metrics[5]}")
    print(f"AUPR: {avg_metrics[6]}")


KeyboardInterrupt: 

In [13]:
from sklearn.metrics import confusion_matrix, roc_auc_score, precision_recall_curve, auc

def stacking(base_models, X_train, y_train, X_test, y_test):
    try:
        meta_train = np.zeros((X_train.shape[0], len(base_models)))  
        meta_test = np.zeros((X_test.shape[0], len(base_models)))   

        for idx, model in enumerate(base_models):
            meta_train[:, idx] = model.predict(X_train)
            
            meta_test[:, idx] = model.predict(X_test)

        print(f"meta_train shape: {meta_train.shape}, meta_test shape: {meta_test.shape}")

        meta_model = LogisticRegression()  
        meta_model.fit(meta_train, y_train)

        final_predictions = meta_model.predict(meta_test)

        
        y_prob = meta_model.predict_proba(meta_test)[:, 1]  

        
        accuracy, sensitivity, specificity, precision, f1, auc_score, aupr = calculate_metrics(y_test, final_predictions, y_prob)


        return meta_model, final_predictions, accuracy, sensitivity, specificity, precision, f1, auc_score, aupr
    
    except Exception as e:
        print(f"Error in stacking: {e}")



In [14]:

def base_prediction():
        
        meta_model, final_predictions, accuracy, sensitivity, specificity, precision, f1, auc_score, aupr = stacking(base_models, X_train, y_train, X_test, y_test)
    
   
        print(f'Accuracy (from stacking): {accuracy}')
        
        print(f"Sensitivity: {sensitivity}")
        print(f"Specificity: {specificity}")
        print(f"Precision: {precision}")
        print(f"F1-score: {f1}")
        print(f"AUC: {auc_score}")
        print(f"AUPR: {aupr}")

        return accuracy

In [15]:
def majority_voting(base_models, X_test):
    base_predictions = np.column_stack([model.predict(X_test) for model in base_models])
    
    final_predictions, _ = mode(base_predictions, axis=1)
    
    return final_predictions.flatten()

In [16]:
def voting_prediction():
    voting_predictions = majority_voting(base_models, X_test)
    
    voting_probabilities = np.mean([model.predict_proba(X_test)[:, 1] for model in base_models], axis=0)

    accuracy, sensitivity, specificity, precision, f1, auc_score, aupr = calculate_metrics(y_test, voting_predictions, voting_probabilities)

    print(f'Accuracy: {accuracy}')
    print(f'Sensitivity: {sensitivity}')
    print(f'Specificity: {specificity}')
    print(f'Precision: {precision}')
    print(f'F1-score: {f1}')
    print(f'AUC: {auc_score}')
    print(f'AUPR: {aupr}')

    return accuracy, sensitivity, specificity, precision, f1, auc_score, aupr

In [17]:
#acc=voting_prediction()
base_accuracy=base_prediction()


meta_train shape: (227845, 9), meta_test shape: (56962, 9)
Error in stacking: 'LogisticRegression' object has no attribute 'predict_proba'


TypeError: cannot unpack non-iterable NoneType object

In [96]:
voting_accuracy, voting_sensitivity, voting_specificity, voting_precision, voting_f1, voting_auc, voting_aupr = voting_prediction() 

Accuracy: 0.9993328885923949
Sensitivity: 0.7040816326530612
Specificity: 0.9998417276308385
Precision: 0.8846153846153846
F1-score: 0.7840909090909091
AUC: 0.9796372368587276
AUPR: 0.8168181747899139
