In [None]:
import pandas as pd
from pycaret.classification import setup, compare_models, finalize_model, predict_model, pull
import gc

# Function to free up memory
def free_memory():
    gc.collect()

# Load Windows dataset
file_path = 'dataset/system-logs/multiple-system-log-dataset/preprocessed-data/Windows_preprocessed.csv'
df = pd.read_csv(file_path)

# Reduce the dataset size significantly for faster processing (if necessary)
df = df.sample(frac=0.05, random_state=42)  # Use 5% of the data

# Check class distribution
print("Class distribution in Windows:")
print(df['error'].value_counts())

# Setup the data in PyCaret with reduced verbosity and fewer folds
try:
    setup_data = setup(data=df, target='error', session_id=42, fold=3, fix_imbalance=False, verbose=False)
except MemoryError:
    print("Memory error during setup. Please try reducing the data size further.")
    exit()

# Compare all models
try:
    best_models = compare_models(n_select=3)  # Select top 3 models
except MemoryError:
    print("Memory error during model comparison. Exiting.")
    exit()

# Iterate through each model, finalize, and make predictions
for model in best_models:
    try:
        # Finalize the model
        final_model = finalize_model(model)

        # Free memory before making predictions
        free_memory()

        # Display the final model performance
        print(f"Final tuned model performance for {model}:")
        predictions = predict_model(final_model)
        print(predictions)
        
        # Pull and print the metrics
        metrics = pull()
        print(metrics)

    except MemoryError:
        print(f"Memory error during processing model {model}. Skipping.")
        continue

# Additional memory management
free_memory()


Class distribution in Windows:
0    5704647
1      16468
Name: error, dtype: int64


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
knn,K Neighbors Classifier,0.9999,0.0,0.9732,1.0,0.9864,0.9864,0.9865,25.1467
dt,Decision Tree Classifier,0.9999,0.0,0.9732,1.0,0.9864,0.9864,0.9865,3.5
rf,Random Forest Classifier,0.9999,0.0,0.9732,1.0,0.9864,0.9864,0.9865,8.5967
ada,Ada Boost Classifier,0.9999,0.9866,0.9732,1.0,0.9864,0.9864,0.9865,3.3433
gbc,Gradient Boosting Classifier,0.9999,0.9793,0.9732,1.0,0.9864,0.9864,0.9865,14.76
et,Extra Trees Classifier,0.9999,0.0,0.9732,1.0,0.9864,0.9864,0.9865,5.7867
lr,Logistic Regression,0.9998,0.9993,0.9305,1.0,0.9639,0.9638,0.9645,3.8233
svm,SVM - Linear Kernel,0.9989,0.9996,0.6156,1.0,0.762,0.7615,0.7842,3.6433
lda,Linear Discriminant Analysis,0.9989,0.9967,0.6209,1.0,0.7661,0.7655,0.7875,3.43
ridge,Ridge Classifier,0.9988,0.9968,0.5933,1.0,0.7447,0.7442,0.7698,3.3767


Processing:   0%|          | 0/63 [00:00<?, ?it/s]