<a href="https://colab.research.google.com/github/mukulre/Projects/blob/main/Differential_Privacy_Trade_offs_in_Blockchain_Secured_Ransomware_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.metrics import accuracy_score, precision_score, recall_score
from opacus import PrivacyEngine  # For DP (requires PyTorch integration)

# Load BitcoinHeist dataset (adjust path as needed)
data = pd.read_csv("bitcoinheist.csv")  # Replace with actual path
X = data[["income", "neighbors", "weight", "length", "count"]]
y = data["label"].apply(lambda x: 1 if x != "white" else 0)  # Binary: ransomware vs. benign

# Train XGBoost with DP (simplified)
model = xgb.XGBClassifier(max_depth=5, learning_rate=0.05, n_estimators=200)
model.fit(X, y)  # Non-DP baseline

# For DP, integrate with PyTorch (or custom DP noise)
# Example: Add noise to gradients (pseudo-code)
epsilon = 1.0
# Use Opacus or custom DP-SGD to perturb gradients during training

# Evaluate
y_pred = model.predict(X)
print(f"Accuracy: {accuracy_score(y, y_pred):.3f}")
print(f"Precision: {precision_score(y, y_pred):.3f}")
print(f"Recall: {recall_score(y, y_pred):.3f}")

# Log model updates to blockchain (simulated)
# Use web3.py to interact with Ethereum or Hyperledger

In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from opacus import PrivacyEngine
from web3 import Web3
import matplotlib.pyplot as plt
import json
import time

# 1. Load and Preprocess BitcoinHeist Dataset
def load_data(file_path="bitcoinheist.csv"):
    try:
        data = pd.read_csv(file_path)
    except FileNotFoundError:
        print("Error: bitcoinheist.csv not found. Please download from UCI.")
        return None, None

    # Select features and label
    features = ['income', 'neighbors', 'weight', 'length', 'count', 'looped']
    data['label'] = data['label'].apply(lambda x: 1 if x != 'white' else 0)  # Binary: ransomware vs. benign

    X = data[features]
    y = data['label']

    # Handle missing values
    X = X.fillna(X.mean())

    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return X_scaled, y

# 2. Define Neural Network Model
class SimpleNN(nn.Module):
    def __init__(self, input_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

# 3. Train XGBoost with Differential Privacy (Custom DP Approximation)
def train_xgboost_with_dp(X_train, X_test, y_train, y_test, epsilon):
    # Initialize model
    model = xgb.XGBClassifier(max_depth=5, learning_rate=0.05, n_estimators=200, objective='binary:logistic')

    # Train model (without DP for simplicity; DP approximation below)
    model.fit(X_train, y_train)

    # Simulate DP by perturbing model parameters (approximation)
    if epsilon > 0:
        noise_scale = 1.0 / (epsilon + 1e-6)  # Lower epsilon -> more noise
        for param in model.get_booster().get_fscore().keys():
            noise = np.random.normal(0, noise_scale, 1)
            # Perturb feature importance (simplified DP)
            model.get_booster().set_param(param, model.get_booster().get_fscore()[param] + noise)

    # Predict and evaluate
    y_pred = model.predict(X_test)
    metrics = {
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred, zero_division=0),
        'recall': recall_score(y_test, y_pred, zero_division=0)
    }

    return model, metrics

# 4. Train Neural Network with Differential Privacy
def train_nn_with_dp(X_train, X_test, y_train, y_test, epsilon, device='cpu'):
    # Convert to PyTorch tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
    y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).reshape(-1, 1).to(device)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).reshape(-1, 1).to(device)

    # Initialize model
    model = SimpleNN(input_size=X_train.shape[1]).to(device)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Apply Differential Privacy
    privacy_engine = PrivacyEngine()
    model, optimizer, data_loader = privacy_engine.make_private(
        module=model,
        optimizer=optimizer,
        data_loader=torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor),
            batch_size=32
        ),
        noise_multiplier=1.0 / (epsilon + 1e-6),
        max_grad_norm=1.0
    )

    # Training loop
    model.train()
    for epoch in range(50):  # Adjust epochs as needed
        for data, target in data_loader:
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

    # Evaluate
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test_tensor).round()
        y_pred = y_pred.cpu().numpy()
        y_test_np = y_test_tensor.cpu().numpy()
        metrics = {
            'accuracy': accuracy_score(y_test_np, y_pred),
            'precision': precision_score(y_test_np, y_pred, zero_division=0),
            'recall': recall_score(y_test_np, y_pred, zero_division=0)
        }

    return model, metrics

# 5. Simulate Blockchain Logging
def log_to_blockchain(model_params, web3_instance, contract_address, account):
    # Convert model parameters to JSON string
    params_str = json.dumps(model_params)

    # Connect to smart contract (assumes deployed contract)
    with open('contract_abi.json', 'r') as f:  # Replace with your contract ABI
        contract_abi = json.load(f)
    contract = web3_instance.eth.contract(address=contract_address, abi=contract_abi)

    # Estimate storage cost
    gas_estimate = contract.functions.storeModelUpdate(params_str).estimateGas()
    storage_cost = gas_estimate * web3_instance.eth.gas_price / 1e18  # Convert to ETH

    # Send transaction
    tx = contract.functions.storeModelUpdate(params_str).buildTransaction({
        'from': account,
        'nonce': web3_instance.eth.getTransactionCount(account),
        'gas': gas_estimate,
        'gasPrice': web3_instance.eth.gas_price
    })
    signed_tx = web3_instance.eth.account.signTransaction(tx, private_key='YOUR_PRIVATE_KEY')  # Replace with Ganache key
    tx_hash = web3_instance.eth.sendRawTransaction(signed_tx.rawTransaction)

    return storage_cost, tx_hash.hex()

# 6. Main Experiment
def main():
    # Load data
    X, y = load_data()
    if X is None:
        return

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize blockchain connection (Ganache)
    w3 = Web3(Web3.HTTPProvider('http://127.0.0.1:7545'))  # Ganache default
    account = w3.eth.accounts[0]  # Use first Ganache account
    contract_address = 'YOUR_CONTRACT_ADDRESS'  # Replace with deployed contract address

    # Test epsilon values
    epsilons = [0.5, 1.0, 5.0, 10.0]
    xgb_metrics = {eps: {'accuracy': [], 'precision': [], 'recall': []} for eps in epsilons}
    nn_metrics = {eps: {'accuracy': [], 'precision': [], 'recall': []} for eps in epsilons}
    storage_costs = []

    # Train and evaluate models
    for eps in epsilons:
        print(f"Training with epsilon = {eps}")

        # XGBoost
        xgb_model, xgb_m = train_xgboost_with_dp(X_train, X_test, y_train, y_test, eps)
        for metric in xgb_metrics[eps]:
            xgb_metrics[eps][metric].append(xgb_m[metric])

        # Neural Network
        nn_model, nn_m = train_nn_with_dp(X_train, X_test, y_train, y_test, eps)
        for metric in nn_metrics[eps]:
            nn_metrics[eps][metric].append(nn_m[metric])

        # Log model updates to blockchain (simplified for XGBoost)
        model_params = {'epsilon': eps, 'accuracy': xgb_m['accuracy']}
        cost, tx_hash = log_to_blockchain(model_params, w3, contract_address, account)
        storage_costs.append(cost)
        print(f"Blockchain tx hash: {tx_hash}, Storage cost: {cost:.6f} ETH")

    # Plot results
    plt.figure(figsize=(12, 8))
    for metric in ['accuracy', 'precision', 'recall']:
        xgb_vals = [np.mean(xgb_metrics[eps][metric]) for eps in epsilons]
        nn_vals = [np.mean(nn_metrics[eps][metric]) for eps in epsilons]
        plt.plot(epsilons, xgb_vals, label=f'XGBoost {metric}', marker='o')
        plt.plot(epsilons, nn_vals, label=f'NN {metric}', marker='x')

    plt.xlabel('Epsilon (Privacy Budget)')
    plt.ylabel('Metric Value')
    plt.title('Privacy-Utility Trade-offs in Ransomware Detection')
    plt.legend()
    plt.grid(True)
    plt.savefig('tradeoffs.png')
    plt.show()

    # Print storage costs
    print("\nBlockchain Storage Costs (ETH):")
    for eps, cost in zip(epsilons, storage_costs):
        print(f"Epsilon {eps}: {cost:.6f} ETH")

if __name__ == "__main__":
    main()