# Overview

In this notebook we detect outliers using pycaret for various strategies. We will then compare the results.

### Detecting Outliers using Pycaret

In [None]:
from pycaret.anomaly import AnomalyExperiment
from pycaret.datasets import get_data

There are more model options available: Search create_model:
https://pycaret.readthedocs.io/en/stable/api/anomaly.html#pycaret.anomaly.create_model

In [2]:
# Load dataset
data = get_data('anomaly')

# Initialize experiment
exp = AnomalyExperiment()
exp.setup(data, session_id=123, normalize=True, transformation=True)  # Added normalization and transformation

# Train multiple models
iforest = exp.create_model('iforest')
knn = exp.create_model('knn')
lof = exp.create_model('lof')
cof = exp.create_model('cof')  # Added Connectivity-Based Local Outlier Factor
svm = exp.create_model('svm')  # Added One-Class SVM

# Assign anomaly labels
iforest_results = exp.assign_model(iforest)
knn_results = exp.assign_model(knn)
lof_results = exp.assign_model(lof)
cof_results = exp.assign_model(cof)
svm_results = exp.assign_model(svm)

Unnamed: 0,Col1,Col2,Col3,Col4,Col5,Col6,Col7,Col8,Col9,Col10
0,0.263995,0.764929,0.138424,0.935242,0.605867,0.51879,0.912225,0.608234,0.723782,0.733591
1,0.546092,0.653975,0.065575,0.227772,0.845269,0.837066,0.272379,0.331679,0.429297,0.367422
2,0.336714,0.538842,0.192801,0.553563,0.074515,0.332993,0.365792,0.861309,0.899017,0.0886
3,0.092108,0.995017,0.014465,0.176371,0.24153,0.514724,0.562208,0.158963,0.073715,0.208463
4,0.325261,0.805968,0.957033,0.331665,0.307923,0.355315,0.501899,0.558449,0.885169,0.182754


Unnamed: 0,Description,Value
0,Session id,123
1,Original data shape,"(1000, 10)"
2,Transformed data shape,"(1000, 10)"
3,Numeric features,10
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,Transformation,True
9,Transformation method,yeo-johnson


In [3]:
# Combine anomaly assignments into a single table
anomaly_df = data.copy()
anomaly_df['IForest_Anomaly'] = iforest_results['Anomaly']
anomaly_df['KNN_Anomaly'] = knn_results['Anomaly']
anomaly_df['LOF_Anomaly'] = lof_results['Anomaly']
anomaly_df['cof_Anomaly'] = cof_results['Anomaly']
anomaly_df['SVM_Anomaly'] = svm_results['Anomaly']

In [5]:
# Save processed dataset
anomaly_df.to_csv('pycaret_anomaly_comparison.csv', index=False)