In [1]:
import pandas as pd
from pycaret.classification import setup, compare_models, finalize_model, predict_model, pull
import gc

# Function to free up memory
def free_memory():
    gc.collect()

# Load Windows dataset
file_path = 'dataset/system-logs/multiple-system-log-dataset/preprocessed-data/Windows_preprocessed.csv'
df = pd.read_csv(file_path)

# Reduce the dataset size significantly for faster processing (if necessary)
df = df.sample(frac=0.05, random_state=42)  # Use 5% of the data

# Check class distribution
print("Class distribution in Windows:")
print(df['error'].value_counts())

# Setup the data in PyCaret with reduced verbosity and fewer folds
try:
    setup_data = setup(data=df, target='error', session_id=42, fold=3, fix_imbalance=False, verbose=False)
except MemoryError:
    print("Memory error during setup. Please try reducing the data size further.")
    exit()

# Compare all models
try:
    best_models = compare_models(n_select=3)  # Select top 3 models
except MemoryError:
    print("Memory error during model comparison. Exiting.")
    exit()

# Iterate through each model, finalize, and make predictions
for model in best_models:
    try:
        # Finalize the model
        final_model = finalize_model(model)

        # Free memory before making predictions
        free_memory()

        # Display the final model performance
        print(f"Final tuned model performance for {model}:")
        predictions = predict_model(final_model)
        print(predictions)
        
        # Pull and print the metrics
        metrics = pull()
        print(metrics)

    except MemoryError:
        print(f"Memory error during processing model {model}. Skipping.")
        continue

# Additional memory management
free_memory()


Class distribution in Windows:
0    5704647
1      16468
Name: error, dtype: int64


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,0.9999,0.0,0.9732,1.0,0.9864,0.9864,0.9865,42.1633
dt,Decision Tree Classifier,0.9999,0.0,0.9732,1.0,0.9864,0.9864,0.9865,8.3033
rf,Random Forest Classifier,0.9999,0.0,0.9732,1.0,0.9864,0.9864,0.9865,15.7833
ada,Ada Boost Classifier,0.9999,0.9866,0.9732,1.0,0.9864,0.9864,0.9865,15.2967
gbc,Gradient Boosting Classifier,0.9999,0.9793,0.9732,1.0,0.9864,0.9864,0.9865,20.9833
et,Extra Trees Classifier,0.9999,0.0,0.9732,1.0,0.9864,0.9864,0.9865,5.9267
lightgbm,Light Gradient Boosting Machine,0.9999,0.0,0.9732,1.0,0.9864,0.9864,0.9865,5.19
lr,Logistic Regression,0.9998,0.9993,0.9305,1.0,0.9639,0.9638,0.9645,27.0533
svm,SVM - Linear Kernel,0.9989,0.9996,0.6156,1.0,0.762,0.7615,0.7842,9.81
lda,Linear Discriminant Analysis,0.9989,0.9967,0.6209,1.0,0.7661,0.7655,0.7875,3.8167


Processing:   0%|          | 0/67 [00:00<?, ?it/s]

Final tuned model performance for KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=-1, n_neighbors=5, p=2,
                     weights='uniform'):


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,K Neighbors Classifier,1.0,0.9929,0.9858,1.0,0.9929,0.9928,0.9929


                     timestamp  \
65904285   2016-02-15 03:04:57   
21875662   2015-05-19 03:01:20   
37568917   2015-09-14 03:12:43   
50721232   2015-11-16 03:15:57   
108496704  2017-03-16 03:02:44   
...                        ...   
46443425   2015-10-19 03:24:39   
108950892  2017-03-21 03:10:56   
1388595    2016-10-17 03:17:43   
24403704   2015-06-15 03:03:19   
17642834   2015-05-10 15:16:40   

65904285   [',', 'Info', 'CBS', 'Applicability(ComponentA...        0   
21875662   [',', 'Info', 'CBS', 'Applicability(ComponentA...        0   
37568917   [',', 'Info', 'CBS', 'Appl:', 'Selfupdate,', '...        0   
50721232   [',', 'Info', 'CBS', 'Applicability(ComponentA...        0   
108496704  [',', 'Info', 'CBS', 'Appl:', 'detect', 'Paren...        0   
...                                                      ...      ...   
46443425   [',', 'Info', 'CBS', 'Appl:', 'detect', 'Paren...        0   
108950892  [',', 'Info', 'CBS', 'Applicability(ComponentA...        0   
1388595

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Decision Tree Classifier,1.0,0.9929,0.9858,1.0,0.9929,0.9928,0.9929


                     timestamp  \
65904285   2016-02-15 03:04:57   
21875662   2015-05-19 03:01:20   
37568917   2015-09-14 03:12:43   
50721232   2015-11-16 03:15:57   
108496704  2017-03-16 03:02:44   
...                        ...   
46443425   2015-10-19 03:24:39   
108950892  2017-03-21 03:10:56   
1388595    2016-10-17 03:17:43   
24403704   2015-06-15 03:03:19   
17642834   2015-05-10 15:16:40   

65904285   [',', 'Info', 'CBS', 'Applicability(ComponentA...        0   
21875662   [',', 'Info', 'CBS', 'Applicability(ComponentA...        0   
37568917   [',', 'Info', 'CBS', 'Appl:', 'Selfupdate,', '...        0   
50721232   [',', 'Info', 'CBS', 'Applicability(ComponentA...        0   
108496704  [',', 'Info', 'CBS', 'Appl:', 'detect', 'Paren...        0   
...                                                      ...      ...   
46443425   [',', 'Info', 'CBS', 'Appl:', 'detect', 'Paren...        0   
108950892  [',', 'Info', 'CBS', 'Applicability(ComponentA...        0   
1388595

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Random Forest Classifier,1.0,0.9929,0.9858,1.0,0.9929,0.9928,0.9929


                     timestamp  \
65904285   2016-02-15 03:04:57   
21875662   2015-05-19 03:01:20   
37568917   2015-09-14 03:12:43   
50721232   2015-11-16 03:15:57   
108496704  2017-03-16 03:02:44   
...                        ...   
46443425   2015-10-19 03:24:39   
108950892  2017-03-21 03:10:56   
1388595    2016-10-17 03:17:43   
24403704   2015-06-15 03:03:19   
17642834   2015-05-10 15:16:40   

65904285   [',', 'Info', 'CBS', 'Applicability(ComponentA...        0   
21875662   [',', 'Info', 'CBS', 'Applicability(ComponentA...        0   
37568917   [',', 'Info', 'CBS', 'Appl:', 'Selfupdate,', '...        0   
50721232   [',', 'Info', 'CBS', 'Applicability(ComponentA...        0   
108496704  [',', 'Info', 'CBS', 'Appl:', 'detect', 'Paren...        0   
...                                                      ...      ...   
46443425   [',', 'Info', 'CBS', 'Appl:', 'detect', 'Paren...        0   
108950892  [',', 'Info', 'CBS', 'Applicability(ComponentA...        0   
1388595