<a href="https://colab.research.google.com/github/snehhitvverma/Cyfuture-assignment/blob/main/Untitled5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
1️⃣ Install Dependencies

Ensure you have the required libraries:

pip install numpy pandas scikit-learn fastapi uvicorn streamlit torch torchvision tensorflow transformers langchain openai nltk spacy joblib polars

2️⃣ Load & Preprocess Data

We will use a sample fraud transaction dataset from Kaggle/UCI ML Repository.

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load dataset
df = pd.read_csv("fraud_transactions.csv")  # Replace with actual dataset path

# Selecting relevant features (assuming 'Class' column is 1 for fraud, 0 for normal)
features = [col for col in df.columns if col != 'Class']
X = df[features]
y = df['Class']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Save scaler for later use
import joblib
joblib.dump(scaler, "scaler.pkl")

3️⃣ Train Isolation Forest (Anomaly Detection Model)

This unsupervised model detects anomalies based on deviation from normal transactions.

from sklearn.ensemble import IsolationForest
import joblib

# Train Isolation Forest
model = IsolationForest(n_estimators=100, contamination=0.02, random_state=42)
model.fit(X_train)

# Save the model
joblib.dump(model, "fraud_detector.pkl")

print("Model trained and saved successfully!")

4️⃣ Train Autoencoder (Deep Learning Anomaly Model)

For deep learning-based fraud detection.

import torch
import torch.nn as nn
import torch.optim as optim

# Define Autoencoder model
class Autoencoder(nn.Module):
    def _init_(self, input_dim):
        super(Autoencoder, self)._init_()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 8)
        )
        self.decoder = nn.Sequential(
            nn.Linear(8, 16),
            nn.ReLU(),
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, input_dim)
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# Initialize model
input_dim = X_train.shape[1]
autoencoder = Autoencoder(input_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

# Train model
epochs = 50
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = autoencoder(X_train_tensor)
    loss = criterion(outputs, X_train_tensor)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item()}")

# Save Autoencoder Model
torch.save(autoencoder.state_dict(), "autoencoder.pth")
print("Autoencoder trained and saved!")

5️⃣ Deploy API with FastAPI

Create a FastAPI server to serve predictions.

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import numpy as np
import joblib
import torch

# Load trained models
scaler = joblib.load("scaler.pkl")
isolation_forest = joblib.load("fraud_detector.pkl")

# Load Autoencoder
input_dim = X_train.shape[1]
autoencoder = Autoencoder(input_dim)
autoencoder.load_state_dict(torch.load("autoencoder.pth"))
autoencoder.eval()

# Initialize API
app = FastAPI()

class TransactionInput(BaseModel):
    features: list  # List of numerical transaction features

@app.post("/predict")
async def predict(transaction: TransactionInput):
    try:
        transaction_array = np.array(transaction.features).reshape(1, -1)
        transaction_scaled = scaler.transform(transaction_array)

        # Isolation Forest Prediction
        isolation_pred = isolation_forest.predict(transaction_scaled)
        isolation_result = "Fraudulent" if isolation_pred[0] == -1 else "Legitimate"

        # Autoencoder Prediction
        with torch.no_grad():
            transaction_tensor = torch.tensor(transaction_scaled, dtype=torch.float32)
            reconstructed = autoencoder(transaction_tensor)
            reconstruction_error = torch.mean((transaction_tensor - reconstructed) ** 2).item()

        # Threshold for fraud detection (tune as needed)
        autoencoder_result = "Fraudulent" if reconstruction_error > 0.1 else "Legitimate"

        return {
            "IsolationForest Prediction": isolation_result,
            "Autoencoder Prediction": autoencoder_result
        }

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Run API
if _name_ == "_main_":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000, workers=4)

6️⃣ Create a Streamlit UI

A Streamlit app for real-time predictions.

import streamlit as st
import requests

st.title("Fraudulent Transaction Detector")

# Input fields
st.subheader("Enter transaction details:")
features = []
for i in range(30):  # Assuming 30 features in dataset
    value = st.number_input(f"Feature {i+1}", min_value=-100.0, max_value=100.0, value=0.0)
    features.append(value)

# Predict button
if st.button("Detect Fraud"):
    data = {"features": features}
    response = requests.post("http://localhost:8000/predict", json=data)

    if response.status_code == 200:
        result = response.json()
        st.write(f"Isolation Forest Prediction: *{result['IsolationForest Prediction']}*")
        st.write(f"Autoencoder Prediction: *{result['Autoencoder Prediction']}*")
    else:
        st.error("Error processing request.")

7️⃣ Run the Application

Start the FastAPI server

python fraud_api.py

Run the Streamlit UI

streamlit run fraud_ui.py

Scalability Features
	•	Batch inference: Modify FastAPI to handle multiple transactions.
	•	GPU acceleration: Move autoencoder inference to CUDA.
	•	Asynchronous processing: Use Celery & Redis for distributed prediction.
	•	Kubernetes & Docker: Deploy API at scale.

This end-to-end project implements fraud detection with machine learning & deep learning, a FastAPI server, and a Streamlit UI.

Would you like batch inference or Docker deployment?