In [3]:
from sklearn.metrics import brier_score_loss
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
import numpy as np
import pymc as pm
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:

def print_ensemble(e_prob,y_test):
    print("------Ensemble 3 classifier-------")
    print("Test BS:",round(brier_score_loss(y_test,e_prob),3))
    print()
    y_pre_ens=np.where(pd.Series(e_prob)>0.5,1,0)
    print("Test Accuracy:",round(accuracy_score(y_pre_ens,y_test),3))
    print()
    print("Test F1-score:",f1_score(y_pre_ens,y_test))
    
def print_base_ensemble(probs, y_test):
    ens_proba=(probs['lgr']+probs['rfc']+probs['xgb'])/3
    print_ensemble(ens_proba,y_test)

In [None]:

def bayesian_base(probs, y_test, f1_rfc, f1_xgb, f1_lgr, f1_svm, uniform_prior=False):
    ## krev ensemble:
    probs_lgr = probs['lgr']
    probs_rfc = probs['rfc']
    probs_xgb = probs['xgb']
    probs_svm = probs['svm']

    # Equal priors or based on validation scores

    # all of these give the same result
    raw_weights = [f1_rfc, f1_xgb, f1_lgr, f1_svm]  # Adjust these based on your model validation scores
    if uniform_prior:
        raw_weights = [1, 1, 1, 1]  
    # raw_weights = [acc_rfc, acc_xgb, acc_lgr]  
    # raw_weights = [bs_rfc, bs_xgb, bs_lgr]

    weights = [weight / sum(raw_weights) for weight in raw_weights]

    bayesian_ensemble_proba = weights[0] * probs_rfc + weights[1] * probs_xgb + weights[2] * probs_lgr + weights[3] * probs_svm 
    print("Naive Bayes Ensemble Result")
    print_ensemble(bayesian_ensemble_proba,y_test)


    with pm.Model() as model:
        # Priors for model weights
        weights = pm.Dirichlet('weights', a=np.array(raw_weights))  

        # Model predictions as deterministic functions of weights and individual model predictions
        model_prediction = pm.Deterministic('prediction', weights[0] * probs_rfc + 
                                                        weights[1] * probs_xgb + 
                                                        weights[2] * probs_lgr + weights[3] * probs_svm )

        # Likelihood (sampling distribution) of observations
        observed = pm.Bernoulli('obs', p=model_prediction, observed=y_test)

        # Posterior distribution
        trace = pm.sample(2000, return_inferencedata=True)

    with model:
        ppc = pm.sample_posterior_predictive(trace)

    # # Obtain the mean prediction from the posterior predictive distribution
    mean_prediction = np.mean(ppc['posterior_predictive']['obs'], axis=0)
    final_predictions = (mean_prediction > 0.5).astype(int)

    mp = np.mean(mean_prediction, axis=0)
    final_predictions = (mp > 0.5).astype(int)

    print("Bayesian Ensemble Accuracy:", accuracy_score(y_test, final_predictions))
    print("Bayesian Ensemble F1 Score:", f1_score(y_test, final_predictions))


In [None]:
def svm(x_train, y_train, x_test, y_test):
    # Initialize the LinearSVC model
    svm = LinearSVC(random_state=1, max_iter=10000)  # You can adjust max_iter based on convergence requirements

    # Since SVMs are sensitive to the scaling of the data, it's often good practice to scale the features
    # especially for high-dimensional data. We use a pipeline to combine scaling and the classifier.
    pipeline = make_pipeline(StandardScaler(), CalibratedClassifierCV(svm))

    # Fit the model on the training data
    pipeline.fit(x_train, y_train)

    # Predict probability estimates for the test set
    y_pre_proba_svm = pipeline.predict_proba(x_test)  # Probability of the positive class

    y_pre_svm=np.where(pd.Series(y_pre_proba_svm[:,1])>0.5,1,0)
    f1_svm = f1_score(y_pre_svm,y_test)
    print("Test F1-score:",round(f1_svm,3))
    
    return f1_svm, y_pre_proba_svm


In [2]:
def stack(X_train, y_train, X_test, y_test, lgr, xgb, rfc):
    stack = StackingClassifier(
        estimators=[('lgr', lgr), ('xgb', xgb), ('rfc', rfc)],
        final_estimator=LogisticRegression(),
        passthrough=True,
        cv=5
    )

    stack.fit(X_train, y_train)
    # Predict and evaluate the model
    # y_pred = stack.predict(X_test)
    y_pred_proba = stack.predict_proba(X_test)
    
    y_pred = np.where(pd.Series(y_pred_proba[:,1])>0.5,1,0)
    accuracy = accuracy_score(y_test, y_pred)
    f1_stack = f1_score(y_test, y_pred)
    print(f"Stacking Model Accuracy: {accuracy:.3f}")
    print(f"Stacking Model F1: {f1_stack:.3f}")
    return f1_stack, y_pred_proba



In [None]:
def gaussian_process():
    # an attempt at the gaussian process 