In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_predict, cross_val_score, RandomizedSearchCV, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import recall_score, precision_score, log_loss, accuracy_score, roc_auc_score, confusion_matrix, classification_report
from imblearn.over_sampling import RandomOverSampler, SMOTE
from sklearn.preprocessing import (
    MaxAbsScaler,
    MinMaxScaler,
    Normalizer,
    PowerTransformer,
    QuantileTransformer,
    RobustScaler,
    StandardScaler,
    minmax_scale,
)
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, HistGradientBoostingClassifier, GradientBoostingClassifier, AdaBoostClassifier, VotingClassifier
from sklearn.svm import LinearSVC,NuSVC,SVC
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.impute import SimpleImputer
from sklearn.calibration import CalibratedClassifierCV
from joblib import dump, load
from imblearn.pipeline import make_pipeline
from imblearn.over_sampling import SMOTE
from joblib import dump
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_parquet('liver.parquet')

In [None]:
skf = StratifiedKFold(n_splits=5)

In [None]:
X = Liver_disease.drop(columns=['Direct_Bilirubin','Aspartate_Aminotransferase','Total_Protiens','Albumin','Dataset'])
y = Liver_disease.iloc[:,-1]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.25)

In [None]:
ros = RandomOverSampler(random_state=42)
X_train_ros, y_train_ros = ros.fit_resample(X_train, y_train)
scaler = Normalizer()
X_train_scaled = scaler.fit_transform(X_train_ros)
X_test_scaled = scaler.transform(X_test)

In [None]:
results = {
    'models' : [
        ('mlpc', MLPClassifier()),
        ('lsvc', SVC(kernel='linear',probability=True)),
        ('rfc', RandomForestClassifier()),
        ('hgbc', HistGradientBoostingClassifier()),
        ('lda', LinearDiscriminantAnalysis()),
        ],
    'mean_score' :[],
    'std_dev_score' :[]
}
for _, model in results.get('models') :
    scores = cross_val_score(model,
                            X_test_scaled,
                            y_test,
                            scoring = 'roc_auc',
                            cv = skf,
                            n_jobs = -1)
    results['mean_score'].append(scores.mean())
    results['std_dev_score'].append(scores.std())

    name = type(model).__name__
    print(f'{name} - Roc AUC score: {scores.mean():.4f} ± {scores.std():.4f}')

MLPClassifier - Roc AUC score: 0.4871 ± 0.0865
SVC - Roc AUC score: 0.6035 ± 0.1485
RandomForestClassifier - Roc AUC score: 0.7345 ± 0.1024
HistGradientBoostingClassifier - Roc AUC score: 0.7223 ± 0.1103
LinearDiscriminantAnalysis - Roc AUC score: 0.7089 ± 0.1135


In [None]:
voting = VotingClassifier(
    estimators=results.get('models'),
    voting='soft',
    weights=results.get('mean_score'),
    verbose=True,
    n_jobs=-1

)

In [None]:
scores = cross_val_score(voting,
                        X_test_scaled,
                        y_test,
                        scoring = 'roc_auc',
                        cv = skf,
                        n_jobs = -1)

print(f'Roc AUC score: {scores.mean():.4f} ± {scores.std():.4f}')

Roc AUC score: 0.7417 ± 0.1095


In [None]:
voting.fit(X_train_scaled,y_train_ros)

In [None]:
predictions = voting.predict(X_test_scaled)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix
print(classification_report(y_test,predictions))
print("\n")
print(confusion_matrix(y_test,predictions))

              precision    recall  f1-score   support

           0       0.50      0.55      0.52        49
           1       0.75      0.71      0.73        93

    accuracy                           0.65       142
   macro avg       0.62      0.63      0.63       142
weighted avg       0.66      0.65      0.66       142



[[27 22]
 [27 66]]


In [None]:
dump(scaler, 'scaler_liver.joblib')
dump(voting, 'voting_liver.joblib')

['scaler_liver.joblib']