<a href="https://colab.research.google.com/github/niteshchoudhary12445/-Anomaly-detection/blob/main/Anomaly_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Anomaly Detection

## 1.Data Import and Exploration

In [None]:
!pip install tensorflow==2.16.1

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_excel("/content/AssignmentData.xlsx",sheet_name=2)

In [None]:
data

In [None]:
data.info()

In [None]:
data['Class'].value_counts()

In [None]:
data.hist(bins=50,figsize=(30,20))

## 2.Feature Engineering

In [None]:
from sklearn.preprocessing import RobustScaler
new_df = data.copy()
new_df["Amount"] = RobustScaler().fit_transform(new_df["Amount"].values.reshape(-1,1))

In [None]:
new_df

In [None]:
from sklearn.preprocessing import MinMaxScaler
minmax_scaler = MinMaxScaler()
new_df[["Time"]] = minmax_scaler.fit_transform(new_df[["Time"]])

In [None]:
new_df

In [None]:
col_to_convert = ["V2","V7","V9","V24"]
for col in col_to_convert:
  new_df[col] = pd.to_numeric(new_df[col],errors='coerce')

In [None]:
new_df.info()

In [None]:
for col in col_to_convert:
  new_df = new_df.dropna(subset=[col],axis=0)

In [None]:
new_df.info()

## 3.Visualizing the Anamolies

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
new_df_pca = pca.fit_transform(new_df.drop(columns=['Class']))

# Add the PCA features to the dataframe
new_df['PCA1'] = new_df_pca[:, 0]
new_df['PCA2'] = new_df_pca[:, 1]

# Visualize the data in 2D
sns.scatterplot(x='PCA1', y='PCA2', hue='Class', data=new_df, palette={0: 'blue', 1: 'red'})
plt.title('PCA of Transactions')
plt.show()

In [None]:
# Split the dataset
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(new_df.drop("Class",axis=1),new_df["Class"],test_size=0.2,random_state=42)

In [None]:
X_train.shape,y_train.shape

In [None]:
X_test.shape,y_test.shape

In [None]:
y_train.value_counts()

In [None]:
y_test.value_counts()

## 4.Anomaly Detection Model

In [None]:
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, roc_auc_score

# Initialize Isolation Forest
iso_forest_model = IsolationForest(contamination=0.0017)  # Set contamination to the percentage of fraudulent transactions
iso_forest_model.fit(X_train)

# Predict anomalies
y_pred = iso_forest_model.predict(X_test)
y_pred = pd.Series(y_pred).map({1: 0, -1: 1})

# Evaluation
print(classification_report(y_test, y_pred))
print("ROC-AUC Score:", roc_auc_score(y_test, y_pred))


This model predicting anamolies with 19% and ROC-AUC Score is 0.59031126768893388 which means it's basically guessing

In [None]:
counts = pd.Series(y_pred).value_counts()
print(counts)

In [None]:
import pickle as pkl
with open("iso_forest_model.pkl","wb") as f:
  pkl.dump(iso_forest_model,f)


In [None]:
import tensorflow as tf
# Prepare the train_dataset
train_features = tf.data.Dataset.from_tensor_slices(X_train)
train_labels = tf.data.Dataset.from_tensor_slices(y_train)
train_dataset = tf.data.Dataset.zip((train_features,train_labels)).batch(1024).prefetch(tf.data.AUTOTUNE)

In [None]:
# Prepare the test_dataset
test_features = tf.data.Dataset.from_tensor_slices(X_test)
test_labels = tf.data.Dataset.from_tensor_slices(y_test)
test_dataset = tf.data.Dataset.zip((test_features,test_labels)).batch(1024).prefetch(tf.data.AUTOTUNE)

In [None]:
y_train.shape

In [None]:
input_dims = X_train.shape[1]
input_dims

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.models import Model
inputs = layers.Input(shape=(input_dims,), name="Input_layer")
encoded = layers.Dense(128, activation="relu")(inputs)
encoded = layers.Dense(64, activation="relu")(encoded)  # Smaller dimension (Latent space)
encoded = layers.Dense(32, activation="relu")(encoded)
drop_layer = layers.Dropout(0.5)(encoded)
# Latent space (bottleneck)
latent_space = layers.Dense(16, activation="relu", name="Latent_space")(drop_layer)

# Decoder
decoded = layers.Dense(32, activation="relu")(latent_space)
decoded = layers.Dense(64, activation="relu")(decoded)
decoded = layers.Dense(128, activation="relu")(decoded)
drop_layer = layers.Dropout(0.5)(decoded)
outputs = layers.Dense(1, activation="sigmoid")(drop_layer)

# Autoencoder model
autoencoder_model = Model(inputs=inputs, outputs=outputs)

In [None]:
autoencoder_model.summary()

In [None]:
# Compile the model
autoencoder_model.compile(loss="binary_crossentropy",optimizer="adam")

In [None]:
file_path = "/content/Models/autoencoder_model5.keras"

In [None]:
import tensorflow as tf

# Set up the ModelCheckpoint callback
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=file_path,
    monitor="val_loss",
    save_best_only=True,
    verbose=1
)

# Set up the ReduceLROnPlateau callback
reduce_lr_callback = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,          # Factor by which the learning rate will be reduced
    patience=5,          # Number of epochs with no improvement after which learning rate will be reduced
    verbose=1,           # Print a message when the learning rate is reduced
    min_lr=1e-6          # Lower bound on the learning rate
)

# Fit the autoencoder models
history = autoencoder_model.fit(
    train_dataset,
    epochs=50,
    validation_data=test_dataset,
    callbacks=[checkpoint_callback, reduce_lr_callback]
)


In [None]:
autoencoder_model.evaluate(test_dataset)

In [None]:
load_model = tf.keras.models.load_model("/content/Models/autoencoder_model5.keras")

In [None]:
load_model.evaluate(test_dataset)

In [None]:
test_features = test_features.batch(1024).prefetch(tf.data.AUTOTUNE)

In [None]:
test_features

In [None]:
autoencoder_y_pred = load_model.predict(X_test)

In [None]:
autoencoder_y_pred = tf.squeeze(tf.round(autoencoder_y_pred))

In [None]:
unique_val, counts = np.unique(autoencoder_y_pred,return_counts=True)
for val,count in zip(unique_val,counts):
  print(f"Value: {val}  Count: {count}\n")

In [None]:
print(classification_report(y_test, autoencoder_y_pred))
print("ROC-AUC Score:", roc_auc_score(y_test, autoencoder_y_pred))

This model predicting anamolies with 82% accuracy

In [None]:
load_model.save("Autoencoder_model_streamlit.keras")

In [None]:
from google.colab import files
files.download("/content/Autoencoder_model_streamlit.keras")

## 5.Write a function that accepts a new dataset of credit card transactions and the trained anomaly detection model, returning a list of transactions classified as fraudulent.

In [None]:
# Ensure the new data has the same features as the model was trained on
def detect_fraudulent_transactions(new_data: pd.DataFrame, model) -> pd.DataFrame:
    """
    Detects fraudulent transactions from a new dataset using a trained anomaly detection model.

    Parameters:
    - new_data: pd.DataFrame. The new dataset containing credit card transactions.
    - model: A trained anomaly detection model that can predict whether a transaction is fraudulent.

    Returns:
    - fraudulent_transactions: pd.DataFrame. A DataFrame containing only the transactions classified as fraudulent.
    """

    features = new_data.columns.tolist()
    predictions = model.predict(new_data[features])
    fraudulent_transactions = new_data[predictions == 1]

    return fraudulent_transactions


In [None]:
dataset = new_df.drop("Class",axis=1)

In [None]:
Fraud_transaction = detect_fraudulent_transactions(model=load_model,new_data=dataset)
Fraud_transaction.head()

## Link for application: https://gc9hzmafjcithzgv4ycxrp.streamlit.app/

Note:Please ensure that data do not contain null value or string value(Use the data provided with this google link)