In [None]:
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import VotingClassifier
from pycaret.classification import setup, compare_models, tune_model, finalize_model, predict_model

# Load original datasets
datasets = {
    'Android': 'dataset/system-logs/multiple-system-log-dataset/preprocessed-data/Android_preprocessed.csv',
    'Linux': 'dataset/system-logs/multiple-system-log-dataset/preprocessed-data/Linux_preprocessed.csv',
    'Mac': 'dataset/system-logs/multiple-system-log-dataset/preprocessed-data/Mac_preprocessed.csv',
    'Windows': 'dataset/system-logs/multiple-system-log-dataset/preprocessed-data/Windows_preprocessed.csv'
}

for system_name, file_path in datasets.items():
    df = pd.read_csv(file_path)
    
    # Check class distribution
    print(f"Class distribution in {system_name}:")
    print(df['error'].value_counts())
    
    # Setup the data in PyCaret without automatic imbalance handling
    setup_data = setup(data=df, target='error', session_id=42, fold=10, fix_imbalance=False, verbose=True)
    
    # Define individual models
    mnb = MultinomialNB()
    lr = LogisticRegression(max_iter=1000)
    dt = DecisionTreeClassifier()
    qda = QuadraticDiscriminantAnalysis()
    
    # Combine models using Voting Classifier
    ensemble_model = VotingClassifier(estimators=[
        ('mnb', mnb), 
        ('lr', lr), 
        ('dt', dt), 
        ('qda', qda)
    ], voting='soft')  # Use 'soft' voting to predict the class label based on the predicted probabilities
    
    # Tune the ensemble model
    tuned_ensemble_model = tune_model(ensemble_model, optimize='AUC', n_iter=30)
    final_model = finalize_model(tuned_ensemble_model)
    
    # Displaying the final model performance
    print(f"Final tuned model performance for {system_name}:")
    predict_model(final_model)


In [None]:
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import VotingClassifier
from pycaret.classification import setup, compare_models, tune_model, finalize_model, predict_model

# Load original datasets
datasets = {
    'Android': 'dataset/system-logs/multiple-system-log-dataset/preprocessed-data/Android_preprocessed.csv',
    'Linux': 'dataset/system-logs/multiple-system-log-dataset/preprocessed-data/Linux_preprocessed.csv',
    'Mac': 'dataset/system-logs/multiple-system-log-dataset/preprocessed-data/Mac_preprocessed.csv',
    'Windows': 'dataset/system-logs/multiple-system-log-dataset/preprocessed-data/Windows_preprocessed.csv'
}

for system_name, file_path in datasets.items():
    df = pd.read_csv(file_path)
    
    # Check class distribution
    print(f"Class distribution in {system_name}:")
    print(df['error'].value_counts())
    
    # Setup the data in PyCaret without automatic imbalance handling
    setup_data = setup(data=df, target='error', session_id=42, fold=10, fix_imbalance=False, verbose=True)
    
    # Define individual models
    mnb = MultinomialNB()
    lr = LogisticRegression(max_iter=1000)
    dt = DecisionTreeClassifier()
    qda = QuadraticDiscriminantAnalysis()
    
    # Combine models using Voting Classifier
    ensemble_model = VotingClassifier(estimators=[
        ('mnb', mnb), 
        ('lr', lr), 
        ('dt', dt), 
        ('qda', qda)
    ], voting='soft')  # Use 'soft' voting to predict the class label based on the predicted probabilities
    
    # Tune the ensemble model
    tuned_ensemble_model = tune_model(ensemble_model, optimize='AUC', n_iter=30)
    final_model = finalize_model(tuned_ensemble_model)
    
    # Displaying the final model performance
    print(f"Final tuned model performance for {system_name}:")
    predict_model(final_model)
