In [None]:
#the goals of this assignment include:
#1. write isolation forest algorithm from scratch
#2. apply the algorithm to a dataset
#3. evaluate the performance of the algorithm
#4. write a report on the results

#the sources for this assignment are as follows:
#1 assignment7.pdf
#2 https://en.wikipedia.org/wiki/Isolation_forest
#3 https://cs.nju.edu.cn/zhouzh/zhouzh.files/publication/icdm08b.pdf

#the structure of the code is as follows:
#1. import libraries
#2. define classes
#3. define functions
#4. run the code



In [None]:
#1. import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [None]:
#defining the class for the isolation forest
class IsolationForest:
    def __init__(self, X, n_trees, sample_size):
        self.X = X
        self.n_trees = n_trees
        self.sample_size = sample_size
        self.trees = []
        self.fit()
        
    def fit(self):
        for i in range(self.n_trees):
            X = self.X
            X = X.sample(self.sample_size)
            tree = IsolationTree(X, 0)
            self.trees.append(tree)
            
    def anomaly_score(self, X):
        scores = np.array([tree.anomaly_score(X) for tree in self.trees])
        return scores.mean(axis=0)
    
    def anomaly_label(self, X, threshold):
        scores = self.anomaly_score(X)
        return scores > threshold
    
#defining the class for the isolation tree
class IsolationTree:
    def __init__(self, X, height):
        self.X = X
        self.size = len(X)
        self.height = height
        self.split(X)
        
    def split(self, X):
        if self.height >= 10 or len(X) <= 1:
            self.left = None
            self.right = None
            self.split_attribute = None
            self.split_value = None
            return
        n_features = X.shape[1]
        self.split_attribute = np.random.choice(n_features)
        self.split_value = np.random.uniform(X.iloc[:, self.split_attribute].min(), X.iloc[:, self.split_attribute].max())
        left = X[X.iloc[:, self.split_attribute] < self.split_value]
        right = X[X.iloc[:, self.split_attribute] >= self.split_value]
        self.left = IsolationTree(left, self.height + 1)
        self.right = IsolationTree(right, self.height + 1)
        
    def anomaly_score(self, X):
        if self.left is None:
            return self.height
        return 1 + (self.left.anomaly_score(X) if X.iloc[self.split_attribute] < self.split_value else self.right.anomaly_score(X))
    