# **Isolation Forests**

In [1]:
from sklearn.ensemble import IsolationForest
from sklearn.datasets import make_classification
import numpy as np

# Create a synthetic dataset with anomalies
X, y = make_classification(n_samples=1000, n_features=10, n_informative=8, n_redundant=2, random_state=42)
# Introduce anomalies by replacing values in some instances with extreme values
X[5, :] = np.random.normal(loc=100, scale=10, size=10)

# Fit Isolation Forest model
clf_if = IsolationForest(contamination=0.05, random_state=42)
clf_if.fit(X)

# Predict outliers
y_pred_if = clf_if.predict(X)

# Print predicted outliers
print("Isolation Forest Predicted Outliers:", np.where(y_pred_if == -1))


Isolation Forest Predicted Outliers: (array([  5,  38,  58,  92,  94, 109, 115, 126, 148, 186, 218, 220, 239,
       252, 254, 290, 319, 333, 377, 389, 404, 416, 437, 473, 477, 490,
       520, 529, 557, 563, 567, 578, 583, 595, 614, 652, 682, 694, 707,
       767, 791, 838, 852, 855, 856, 889, 908, 914, 941, 987], dtype=int64),)


# **One-Class SVM**

In [2]:
from sklearn.svm import OneClassSVM

# Fit One-Class SVM model
clf_ocsvm = OneClassSVM(nu=0.05)
clf_ocsvm.fit(X)

# Predict outliers
y_pred_ocsvm = clf_ocsvm.predict(X)

# Print predicted outliers
print("One-Class SVM Predicted Outliers:", np.where(y_pred_ocsvm == -1))


One-Class SVM Predicted Outliers: (array([  5,  38,  58,  92,  94, 115, 126, 159, 186, 220, 252, 254, 263,
       290, 307, 319, 377, 389, 404, 439, 459, 473, 529, 557, 563, 567,
       578, 579, 583, 595, 618, 652, 666, 672, 682, 692, 694, 707, 767,
       791, 797, 816, 852, 855, 856, 908, 914, 967, 973, 987], dtype=int64),)


# **Autoencoders**

In [3]:
from keras.layers import Input, Dense
from keras.models import Model
from sklearn.preprocessing import StandardScaler

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define Autoencoder model
input_layer = Input(shape=(10,))
encoded = Dense(8, activation='relu')(input_layer)
decoded = Dense(10, activation='sigmoid')(encoded)

autoencoder = Model(input_layer, decoded)
autoencoder.compile(optimizer='adam', loss='mean_squared_error')

# Fit Autoencoder model
autoencoder.fit(X_scaled, X_scaled, epochs=50, batch_size=32, shuffle=True, validation_split=0.2)

# Predict reconstruction errors
X_pred = autoencoder.predict(X_scaled)
mse = np.mean(np.square(X_scaled - X_pred), axis=1)

# Set a threshold for anomaly detection
threshold = np.percentile(mse, 95)

# Predict outliers
y_pred_autoencoder = (mse > threshold).astype(int)

# Print predicted outliers
print("Autoencoder Predicted Outliers:", np.where(y_pred_autoencoder == 1))





Epoch 1/50

Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Autoencoder Predicted Outliers: (array([  5,  50,  58,  62,  94, 115, 126, 148, 186, 218, 254, 263, 307,
       319, 377, 389, 404, 416, 437, 439, 473, 477, 520, 529, 557, 563,
       567, 578, 583, 595, 666, 682, 692, 694, 767, 791, 797, 816, 827,
       838, 852, 856, 889, 890, 908, 946, 962, 967, 973, 987], dtype=int64),)


### **Explanation**


```python
from keras.layers import Input, Dense
from keras.models import Model
from sklearn.preprocessing import StandardScaler
```

- Import necessary modules:
  - `Input` and `Dense` are layers from Keras for building neural networks.
  - `Model` is used to create a Keras model.
  - `StandardScaler` from scikit-learn is imported for data standardization.

```python
# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
```

- Standardize the input data using `StandardScaler`:
  - Create a `StandardScaler` object.
  - Fit the scaler to the data (`X`) and transform the data to have zero mean and unit variance.

```python
# Define Autoencoder model
input_layer = Input(shape=(10,))
encoded = Dense(8, activation='relu')(input_layer)
decoded = Dense(10, activation='sigmoid')(encoded)
autoencoder = Model(input_layer, decoded)
```

- Define the structure of the autoencoder model:
  - `Input` layer with 10 neurons, representing the input features.
  - `Dense` layer with 8 neurons and ReLU activation for encoding.
  - Another `Dense` layer with 10 neurons and Sigmoid activation for decoding.
  - Create an instance of the `Model` class using the defined input and output layers.

```python
autoencoder.compile(optimizer='adam', loss='mean_squared_error')
```

- Compile the autoencoder model:
  - Use the Adam optimizer.
  - Use mean squared error (MSE) as the loss function.

```python
# Fit Autoencoder model
autoencoder.fit(X_scaled, X_scaled, epochs=50, batch_size=32, shuffle=True, validation_split=0.2)
```

- Train the autoencoder model:
  - Fit the model to the standardized data (`X_scaled`).
  - Use 50 epochs, a batch size of 32, and shuffle the training data.
  - Use 20% of the data for validation.

```python
# Predict reconstruction errors
X_pred = autoencoder.predict(X_scaled)
mse = np.mean(np.square(X_scaled - X_pred), axis=1)
```

- Predict reconstruction errors:
  - Use the trained autoencoder to predict reconstructed data (`X_pred`).
  - Calculate the mean squared error (MSE) between the original and reconstructed data for each sample.

```python
# Set a threshold for anomaly detection
threshold = np.percentile(mse, 95)
```

- Set a threshold for anomaly detection:
  - Determine a threshold based on the 95th percentile of the MSE values.

```python
# Predict outliers
y_pred_autoencoder = (mse > threshold).astype(int)
```

- Predict outliers:
  - Classify instances as outliers based on whether their MSE is above the threshold.
  - Convert the boolean predictions to integers.

```python
# Print predicted outliers
print("Autoencoder Predicted Outliers:", np.where(y_pred_autoencoder == 1))
```

- Print the predicted outliers:
  - Display the indices where the predicted outliers are found based on the threshold.