### Task 1: Introduction to Isolation Forest
**Description**: Install the necessary library and load a sample dataset.

**Steps**:
1. Install scikit-learn
2. Load a sample dataset using Python

In [None]:
# write your code from here
# Step 1: Install scikit-learn (run in terminal or notebook)
# !pip install scikit-learn

# Step 2: Load a sample dataset using Python (e.g., load the iris dataset)
from sklearn.datasets import load_iris
import pandas as pd

iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
print(df.head())


### Task 2: Building an Isolation Forest
**Description**: Initialize an Isolation Forest model and fit it to the Boston dataset.

**Steps**:
1. Initialize Isolation Forest
2. Fit model

In [None]:
# write your code from here
from sklearn.ensemble import IsolationForest
from sklearn.datasets import load_boston
import pandas as pd

# Load Boston Housing dataset
boston = load_boston()
X = pd.DataFrame(boston.data, columns=boston.feature_names)

# Initialize Isolation Forest model
iso_forest = IsolationForest(random_state=42)

# Fit the model to the data
iso_forest.fit(X)


### Task 3: Detecting Anomalies
**Description**: Use the fitted Isolation Forest model to predict anomalies.

**Steps**:
1. Predict anomalies
2. Display anomaly counts

In [None]:
# write your code from here
# Predict anomalies (-1 for anomaly, 1 for normal)
anomaly_labels = iso_forest.predict(X)

# Count anomalies and normal points
import numpy as np
unique, counts = np.unique(anomaly_labels, return_counts=True)
anomaly_counts = dict(zip(unique, counts))

print("Anomaly counts:", anomaly_counts)


### Task 4: Visualizing Anomalies
**Description**: Visualize the results to see which samples are considered anomalies.

**Steps**:
1. Plot a scatter plot

In [None]:
# write your code from here
import matplotlib.pyplot as plt

# Assuming X is 2D or selecting two features for visualization
# For Boston dataset, select first two features for simplicity
plt.figure(figsize=(8,6))
plt.scatter(X[:, 0], X[:, 1], c=anomaly_labels, cmap='coolwarm', edgecolor='k', s=50)
plt.title('Isolation Forest Anomaly Detection')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.colorbar(label='Anomaly Label (1=Normal, -1=Anomaly)')
plt.show()


### Task 5: Interpret Contamination Parameter
**Description**: Experiment with different contamination levels.

In [None]:
# write your code from here
from sklearn.ensemble import IsolationForest
import numpy as np

# Define different contamination levels to test
contamination_levels = [0.01, 0.05, 0.1, 0.2]

for contamination in contamination_levels:
    iso_forest = IsolationForest(contamination=contamination, random_state=42)
    iso_forest.fit(X)
    preds = iso_forest.predict(X)
    n_anomalies = np.sum(preds == -1)
    print(f"Contamination: {contamination:.2f} -> Detected anomalies: {n_anomalies}")
