In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
import statsmodels as stm

from sklearn.datasets import load_breast_cancer, load_iris
from sklearn.neighbors import LocalOutlierFactor
from sklearn.ensemble import IsolationForest

%matplotlib inline

In [3]:
pd.set_option('display.max_rows',151)

#### **Datasets**

In [4]:
cancer_dataset, iris_dataset = load_breast_cancer(), load_iris()

##### **1. Cancer Dataset**
##### **Segregating Features and Labels**

In [5]:
X_cancer_df = pd.DataFrame(cancer_dataset.data, columns=cancer_dataset.feature_names)
y_cancer_df = pd.DataFrame(cancer_dataset.target, columns=['Label'])

In [6]:
X_cancer_df.shape

(569, 30)

In [7]:
cancer_dataset.target_names

array(['malignant', 'benign'], dtype='<U9')

In [8]:
y_cancer_df.shape, y_cancer_df.value_counts()

((569, 1),
 Label
 1        357
 0        212
 dtype: int64)

##### **2. Iris Dataset**
##### **Segregating Features and Labels**

In [9]:
X_iris_df = pd.DataFrame(iris_dataset.data,columns=iris_dataset.feature_names)
y_iris_df = pd.DataFrame(iris_dataset.target,columns=['Label'])

In [10]:
X_iris_df.shape, X_iris_df.head()

((150, 4),
    sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
 0                5.1               3.5                1.4               0.2
 1                4.9               3.0                1.4               0.2
 2                4.7               3.2                1.3               0.2
 3                4.6               3.1                1.5               0.2
 4                5.0               3.6                1.4               0.2)

In [11]:
iris_dataset.target.shape, iris_dataset.target_names

((150,), array(['setosa', 'versicolor', 'virginica'], dtype='<U10'))

In [12]:
y_iris_df.shape, y_iris_df.value_counts()

((150, 1),
 Label
 2        50
 1        50
 0        50
 dtype: int64)

In [41]:
iso_for = IsolationForest(n_estimators=15,max_samples=25,contamination=0.05,max_features=2,bootstrap=True,random_state=41)

In [42]:
iso_for.fit(X_iris_df)

IsolationForest(bootstrap=True, contamination=0.05, max_features=2,
                max_samples=25, n_estimators=15, random_state=41)

In [43]:
iso_for.estimators_

[ExtraTreeRegressor(max_depth=5, max_features=1, random_state=716905170),
 ExtraTreeRegressor(max_depth=5, max_features=1, random_state=180789943),
 ExtraTreeRegressor(max_depth=5, max_features=1, random_state=1315178973),
 ExtraTreeRegressor(max_depth=5, max_features=1, random_state=1681872075),
 ExtraTreeRegressor(max_depth=5, max_features=1, random_state=280069627),
 ExtraTreeRegressor(max_depth=5, max_features=1, random_state=1055549073),
 ExtraTreeRegressor(max_depth=5, max_features=1, random_state=1764538871),
 ExtraTreeRegressor(max_depth=5, max_features=1, random_state=405848271),
 ExtraTreeRegressor(max_depth=5, max_features=1, random_state=1224622560),
 ExtraTreeRegressor(max_depth=5, max_features=1, random_state=2088199739),
 ExtraTreeRegressor(max_depth=5, max_features=1, random_state=1220789893),
 ExtraTreeRegressor(max_depth=5, max_features=1, random_state=271136312),
 ExtraTreeRegressor(max_depth=5, max_features=1, random_state=1321442735),
 ExtraTreeRegressor(max_depth=

In [44]:
iso_for.estimators_features_

[array([1, 0]),
 array([0, 1]),
 array([0, 3]),
 array([1, 0]),
 array([2, 0]),
 array([2, 3]),
 array([1, 2]),
 array([2, 1]),
 array([2, 3]),
 array([0, 2]),
 array([3, 2]),
 array([2, 3]),
 array([0, 2]),
 array([2, 3]),
 array([3, 0])]

In [45]:
iso_for.estimators_samples_

[array([ 88,  29, 146,  21,  26,  26,  52,  90,  36, 141,  47, 141,  77,
         67, 115, 120, 133, 120,   3, 121,  41, 118, 136, 149, 130]),
 array([  8, 113,  84,  71,  13, 108,  54, 101,  37, 102,   3,  73, 115,
         23, 104,  98,  95,  93,  69,  53,  93,  44, 135,  55,   5]),
 array([  4,  15,  36,  27,  25,  40, 138, 116, 127,  25,  64,  86, 120,
        116,  33,  31, 125,  80, 102, 102, 145, 120,  43,  97,  50]),
 array([ 95, 122, 144,  20, 101,   5,  39, 109,  30, 144, 136,  39, 143,
         40,  38, 127, 107,  98,  39,   3,  98,  31,  88, 140,  14]),
 array([ 67,  82,  87,  56,  18,  14,  73,  47,  40, 118,  23,  35,  35,
        142, 139,  48, 125, 132, 112,  56,  60,  73, 110, 107,  41]),
 array([ 62,  42, 112, 142, 130,   2,  91,  41,  76,   4,  74,  62,  90,
         31, 145, 102, 147,  10,  41,  39,   6, 131,  25, 136, 127]),
 array([ 53, 102,  45, 116, 123,  78, 115,  22, 148,  22,  93,  12,  72,
        139,  59,  98,  63,  77,  72,  31, 104,  59,  60, 129,  52]),

In [46]:
iso_for.estimators_samples_[0].shape

(25,)

In [47]:
iso_for.offset_

-0.6031896746731866