In [None]:
import pandas as pd
from pycaret.classification import setup, compare_models, tune_model, finalize_model, predict_model

# Load Android dataset
file_path = 'dataset/system-logs/multiple-system-log-dataset/preprocessed-data/Android_preprocessed.csv'
df = pd.read_csv(file_path)

# Check class distribution
print("Class distribution in Android:")
print(df['error'].value_counts())

# Setup the data in PyCaret without automatic imbalance handling
setup_data = setup(data=df, target='error', session_id=42, fold=10, fix_imbalance=False, verbose=True)

# Compare and evaluate models
best_model = compare_models()
tuned_best_model = tune_model(best_model, optimize='AUC', n_iter=30)
final_model = finalize_model(tuned_best_model)

# Displaying the final model
print("Final tuned model performance for Android:")
predict_model(final_model)


Class distribution in Android:
0    1400773
1     154232
Name: error, dtype: int64


Unnamed: 0,Description,Value
0,Session id,42
1,Target,error
2,Target type,Binary
3,Original data shape,"(1555005, 5)"
4,Transformed data shape,"(1555005, 5)"
5,Transformed train set shape,"(1088503, 5)"
6,Transformed test set shape,"(466502, 5)"
7,Numeric features,1
8,Categorical features,3
9,Preprocess,True


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,0.9604,0.0,0.6007,1.0,0.7506,0.7305,0.7586,7.166
dt,Decision Tree Classifier,0.9604,0.0,0.6007,1.0,0.7506,0.7305,0.7586,1.14
svm,SVM - Linear Kernel,0.9604,0.9903,0.6007,1.0,0.7506,0.7305,0.7586,1.202
rf,Random Forest Classifier,0.9604,0.0,0.6007,1.0,0.7506,0.7305,0.7586,2.557
ada,Ada Boost Classifier,0.9604,0.8004,0.6007,1.0,0.7506,0.7305,0.7586,1.171
gbc,Gradient Boosting Classifier,0.9604,0.8262,0.6007,1.0,0.7506,0.7305,0.7586,3.361
et,Extra Trees Classifier,0.9604,0.0,0.6007,1.0,0.7506,0.7305,0.7586,2.12
lightgbm,Light Gradient Boosting Machine,0.9604,0.0,0.6007,1.0,0.7506,0.7305,0.7586,1.47
lr,Logistic Regression,0.9603,0.9901,0.601,0.9987,0.7504,0.7303,0.7582,1.372
lda,Linear Discriminant Analysis,0.9562,0.9847,0.5634,0.9918,0.7186,0.6968,0.7297,1.168


Processing:   0%|          | 0/61 [00:00<?, ?it/s]

Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 30 candidates, totalling 300 fits
