<a href="https://colab.research.google.com/github/sravansai-10/beee_LCD/blob/master/Sravan_VAE_anamoly_Detectiom.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score

import tensorflow as tf
from tensorflow.keras import layers, models, backend as K


In [None]:
data = pd.read_csv('/content/creditcard.csv')
print(data.head())


   Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26       V27       V28 

In [None]:
# Normalize 'Amount'
scaler = StandardScaler()
X['Amount'] = scaler.fit_transform(X['Amount'].values.reshape(-1, 1))

# Use only normal data for training
X_normal = X[y == 0]
X_fraud = X[y == 1]

# Split train/test
X_train, X_test_normal = train_test_split(X_normal, test_size=0.2, random_state=42)
X_test = pd.concat([X_test_normal, X_fraud])
y_test = np.concatenate([np.zeros(len(X_test_normal)), np.ones(len(X_fraud))])

X_train = np.array(X_train)
X_test = np.array(X_test)

In [None]:
input_dim = X_train.shape[1]
latent_dim = 2

# Encoder
def build_encoder():
    inputs = layers.Input(shape=(input_dim,))
    h = layers.Dense(16, activation='relu')(inputs)
    z_mean = layers.Dense(latent_dim)(h)
    z_log_var = layers.Dense(latent_dim)(h)

    def sampling(args):
        z_mean, z_log_var = args
        epsilon = tf.random.normal(shape=tf.shape(z_mean))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

    z = layers.Lambda(sampling)([z_mean, z_log_var])
    return tf.keras.Model(inputs, [z_mean, z_log_var, z], name='encoder')

# Decoder
def build_decoder():
    latent_inputs = layers.Input(shape=(latent_dim,))
    h = layers.Dense(16, activation='relu')(latent_inputs)
    outputs = layers.Dense(input_dim, activation='sigmoid')(h)
    return tf.keras.Model(latent_inputs, outputs, name='decoder')


In [None]:
class VAE(tf.keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder

    def call(self, inputs):
        z_mean, z_log_var, z = self.encoder(inputs)
        return self.decoder(z)

    def train_step(self, data):
        if isinstance(data, tuple):
            data = data[0]

        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)

            # Reconstruction loss
            recon_loss = tf.reduce_mean(tf.reduce_sum(tf.square(data - reconstruction), axis=1))

            # KL divergence loss
            kl_loss = -0.5 * tf.reduce_mean(tf.reduce_sum(
                1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), axis=1))

            total_loss = recon_loss + kl_loss

            # Let Keras know we're manually computing loss
            self.add_loss(total_loss)

        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))

        return {
            "loss": total_loss,
            "reconstruction_loss": recon_loss,
            "kl_loss": kl_loss
        }


In [None]:
vae = VAE(encoder, decoder)
vae.compile(optimizer='adam')
vae.fit(X_train, epochs=30, batch_size=128, validation_split=0.1)


Epoch 1/30
[1m1592/1600[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - kl_loss: nan - loss: nan - reconstruction_loss: nan

ValueError: No loss to compute. Provide a `loss` argument in `compile()`.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Reconstruct test data
z_mean, z_log_var, z = vae.encoder(X_test)
X_test_reconstructed = vae.decoder(z)
reconstruction_error = tf.reduce_mean(tf.square(X_test - X_test_reconstructed), axis=1)

# Threshold
threshold = np.percentile(reconstruction_error, 95)
y_pred = (reconstruction_error > threshold).numpy().astype(int)

# Evaluation
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

         0.0       0.99      1.00      1.00     56863
         1.0       0.00      0.00      0.00       492

    accuracy                           0.99     57355
   macro avg       0.50      0.50      0.50     57355
weighted avg       0.98      0.99      0.99     57355



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
