# LOW CODE ASSIGNMENTS PART 2

## **Pycaret: Anomaly Detection**

#### Installing required libraries

In [None]:
!pip install pycaret
!pip install kaggle

Collecting pycaret
  Downloading pycaret-3.3.2-py3-none-any.whl.metadata (17 kB)
Collecting scipy<=1.11.4,>=1.6.1 (from pycaret)
  Downloading scipy-1.11.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m767.7 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting joblib<1.4,>=1.2.0 (from pycaret)
  Downloading joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)
Collecting scikit-learn>1.4.0 (from pycaret)
  Downloading scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting pyod>=1.1.3 (from pycaret)
  Downloading pyod-2.0.2.tar.gz (165 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m165.8/165.8 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting category-encoders>=2.4.0 (from pycaret)
  Downloading category_encoders-2.6.3-py2.py3-none-any.whl.metadata 

<br>
<br>
<br>

### **Downloading the Pima Indians Diabetes Dataset**

In [None]:
!kaggle datasets download -d uciml/pima-indians-diabetes-database

Dataset URL: https://www.kaggle.com/datasets/uciml/pima-indians-diabetes-database
License(s): CC0-1.0
Downloading pima-indians-diabetes-database.zip to /content
  0% 0.00/8.91k [00:00<?, ?B/s]
100% 8.91k/8.91k [00:00<00:00, 5.57MB/s]


In [None]:
!unzip pima-indians-diabetes-database.zip

Archive:  pima-indians-diabetes-database.zip
  inflating: diabetes.csv            


### **Loading required libraries**

In [None]:
import numpy as np
import pandas as pd
from pycaret.anomaly import *

<br>
<br>
<br>


### **Loading the Pima Indians Diabetes Dataset**

In [None]:
print("Loading Kaggle Pima Indians Diabetes dataset...")
data = pd.read_csv('diabetes.csv')
data.head()

Loading Kaggle Pima Indians Diabetes dataset...


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


### **Setting up Pycaret**

In [None]:
clf_setup = setup(data, session_id = 123)

Unnamed: 0,Description,Value
0,Session id,123
1,Original data shape,"(768, 9)"
2,Transformed data shape,"(768, 9)"
3,Numeric features,9
4,Preprocess,True
5,Imputation type,simple
6,Numeric imputation,mean
7,Categorical imputation,mode
8,CPU Jobs,-1
9,Use GPU,False


### **Creating Models**

**Training an unsupervised anomaly detection model**

In [None]:
iforest = create_model('iforest')
iforest

Processing:   0%|          | 0/3 [00:00<?, ?it/s]

IForest(behaviour='new', bootstrap=False, contamination=0.05,
    max_features=1.0, max_samples='auto', n_estimators=100, n_jobs=-1,
    random_state=123, verbose=0)

Checking for available models

In [None]:
models()

Unnamed: 0_level_0,Name,Reference
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
abod,Angle-base Outlier Detection,pyod.models.abod.ABOD
cluster,Clustering-Based Local Outlier,pycaret.internal.patches.pyod.CBLOFForceToDouble
cof,Connectivity-Based Local Outlier,pyod.models.cof.COF
iforest,Isolation Forest,pyod.models.iforest.IForest
histogram,Histogram-based Outlier Detection,pyod.models.hbos.HBOS
knn,K-Nearest Neighbors Detector,pyod.models.knn.KNN
lof,Local Outlier Factor,pyod.models.lof.LOF
svm,One-class SVM detector,pyod.models.ocsvm.OCSVM
pca,Principal Component Analysis,pyod.models.pca.PCA
mcd,Minimum Covariance Determinant,pyod.models.mcd.MCD


**Assigning anomaly labels to training data**

In [None]:
iforest_anomalies = assign_model(iforest)
iforest_anomalies

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome,Anomaly,Anomaly_Score
0,6,148,72,35,0,33.599998,0.627,50,1,0,-0.092087
1,1,85,66,29,0,26.600000,0.351,31,0,0,-0.145203
2,8,183,64,0,0,23.299999,0.672,32,1,0,-0.061087
3,1,89,66,23,94,28.100000,0.167,21,0,0,-0.150138
4,0,137,40,35,168,43.099998,2.288,33,1,1,0.019710
...,...,...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.900002,0.171,63,0,1,0.000010
764,2,122,70,27,0,36.799999,0.340,27,0,0,-0.160647
765,5,121,72,23,112,26.200001,0.245,30,0,0,-0.143632
766,1,126,60,0,0,30.100000,0.349,47,1,0,-0.091137


### **Visualizations**

In [None]:
plot_model(iforest, plot = 'tsne')

### **Predicting on the test dataset**

In [None]:
iforest_pred = predict_model(iforest, data=data)
iforest_pred


X has feature names, but IsolationForest was fitted without feature names


X has feature names, but IsolationForest was fitted without feature names



Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome,Anomaly,Anomaly_Score
0,6.0,148.0,72.0,35.0,0.0,33.6,0.627,50.0,1.0,0,-0.092087
1,1.0,85.0,66.0,29.0,0.0,26.6,0.351,31.0,0.0,0,-0.145203
2,8.0,183.0,64.0,0.0,0.0,23.3,0.672,32.0,1.0,0,-0.061087
3,1.0,89.0,66.0,23.0,94.0,28.1,0.167,21.0,0.0,0,-0.150138
4,0.0,137.0,40.0,35.0,168.0,43.1,2.288,33.0,1.0,1,0.019710
...,...,...,...,...,...,...,...,...,...,...,...
763,10.0,101.0,76.0,48.0,180.0,32.9,0.171,63.0,0.0,1,0.000010
764,2.0,122.0,70.0,27.0,0.0,36.8,0.340,27.0,0.0,0,-0.160647
765,5.0,121.0,72.0,23.0,112.0,26.2,0.245,30.0,0.0,0,-0.143632
766,1.0,126.0,60.0,0.0,0.0,30.1,0.349,47.0,1.0,0,-0.091137


### **Saving Model**

In [None]:
save_model(iforest, 'iforest_pipeline')

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=Memory(location=None),
          steps=[('numerical_imputer',
                  TransformerWrapper(include=['Pregnancies', 'Glucose',
                                              'BloodPressure', 'SkinThickness',
                                              'Insulin', 'BMI',
                                              'DiabetesPedigreeFunction', 'Age',
                                              'Outcome'],
                                     transformer=SimpleImputer())),
                 ('categorical_imputer',
                  TransformerWrapper(include=[],
                                     transformer=SimpleImputer(strategy='most_frequent'))),
                 ('trained_model',
                  IForest(behaviour='new', bootstrap=False, contamination=0.05,
     max_features=1.0, max_samples='auto', n_estimators=100, n_jobs=-1,
     random_state=123, verbose=0))]),
 'iforest_pipeline.pkl')

### **Loading the saved model**

In [None]:
loaded_iforest_pipeline = load_model('iforest_pipeline')
loaded_iforest_pipeline

Transformation Pipeline and Model Successfully Loaded
