In [None]:
#!/usr/bin/env python
# coding: utf-8

import pandas as pd
import torch
import numpy as np
from sklearn import preprocessing
from sklearn.metrics import roc_auc_score, roc_curve, accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# GAT Layer implementation
class GATLayer(nn.Module):
    def __init__(self, in_features, out_features, dropout=0.6, alpha=0.2, concat=True):
        super(GATLayer, self).__init__()
        self.dropout = dropout
        self.in_features = in_features
        self.out_features = out_features
        self.alpha = alpha
        self.concat = concat

        # Linear transformation
        self.W = nn.Linear(in_features, out_features, bias=False)
        
        # Attention mechanism
        self.a = nn.Linear(2 * out_features, 1, bias=False)
        
        # Dropout layer
        self.dropout_layer = nn.Dropout(dropout)
        
        # LeakyReLU
        self.leakyrelu = nn.LeakyReLU(self.alpha)

    def forward(self, x, edge_index):
        # Linear transformation
        h = self.W(x)
        
        # Prepare attention computation
        N = h.size()[0]
        
        # Create edge feature matrix
        edge_h = torch.cat((h[edge_index[0]], h[edge_index[1]]), dim=1)
        
        # Compute attention coefficients
        edge_e = self.leakyrelu(self.a(edge_h))
        
        # Convert edge attention to sparse matrix format
        edge_e = edge_e.squeeze()
        attention = torch.zeros(N, N, device=x.device)
        attention[edge_index[0], edge_index[1]] = edge_e
        
        # Apply softmax to get attention weights
        attention = F.softmax(attention, dim=1)
        attention = self.dropout_layer(attention)
        
        # Apply attention to node features
        h_prime = torch.matmul(attention, h)
        
        if self.concat:
            return F.elu(h_prime)
        else:
            return h_prime

# Generator with GAT layers
class GATGenerator(nn.Module):
    def __init__(self, latent_size, hidden_size, output_size, num_heads=4, dropout=0.6):
        super(GATGenerator, self).__init__()
        
        self.num_heads = num_heads
        self.fc_z = nn.Linear(latent_size, hidden_size)
        
        # Multiple GAT layers with different heads
        self.gat1 = nn.ModuleList([
            GATLayer(hidden_size, hidden_size // num_heads, dropout=dropout)
            for _ in range(num_heads)
        ])
        
        self.gat2 = nn.ModuleList([
            GATLayer(hidden_size, hidden_size // num_heads, dropout=dropout)
            for _ in range(num_heads)
        ])
        
        # Output layer
        self.fc_out = nn.Linear(hidden_size, output_size)

    def forward(self, z, edge_index):
        # Transform latent vector
        x = self.fc_z(z)
        x = F.relu(x)
        
        # First GAT layer with multi-head attention
        x = torch.cat([att(x, edge_index) for att in self.gat1], dim=1)
        x = F.elu(x)
        
        # Second GAT layer with multi-head attention
        x = torch.cat([att(x, edge_index) for att in self.gat2], dim=1)
        x = F.elu(x)
        
        # Generate final output
        x = self.fc_out(x)
        return x

# Discriminator with GAT layers
class GATDiscriminator(nn.Module):
    def __init__(self, input_size, hidden_size, num_heads=4, dropout=0.6):
        super(GATDiscriminator, self).__init__()
        
        self.num_heads = num_heads
        
        # Multiple GAT layers with different heads
        self.gat1 = nn.ModuleList([
            GATLayer(input_size, hidden_size // num_heads, dropout=dropout)
            for _ in range(num_heads)
        ])
        
        self.gat2 = nn.ModuleList([
            GATLayer(hidden_size, hidden_size // num_heads, dropout=dropout)
            for _ in range(num_heads)
        ])
        
        # Final classification layer
        self.fc_out = nn.Linear(hidden_size, 1)

    def forward(self, x, edge_index):
        # First GAT layer with multi-head attention
        x = torch.cat([att(x, edge_index) for att in self.gat1], dim=1)
        x = F.elu(x)
        
        # Second GAT layer with multi-head attention
        x = torch.cat([att(x, edge_index) for att in self.gat2], dim=1)
        x = F.elu(x)
        
        # Final classification
        x = self.fc_out(x)
        return x

# Data preprocessing functions
def df_label_encoder(df, columns):
    le = preprocessing.LabelEncoder()
    for col in columns:
        df[col] = le.fit_transform(df[col].astype(str))
    return df

def preprocess(df):
    df = df_label_encoder(df, ['merchant', 'category', 'city', 'state', 'job'])
    df['amt'] = (df['amt'] - df['amt'].min()) / (df['amt'].max() - df['amt'].min())
    df['node_from'] = df['cc_num'].astype(str)
    df['node_to'] = df['merchant'].astype(str)
    df = df.sort_values(by=['node_from'])
    node_list = pd.concat([df['node_from'], df['node_to']]).unique()
    return df, node_list

def create_graph_data(df, node_list):
    node_map = {node: idx for idx, node in enumerate(node_list)}
    edge_index = np.array([
        [node_map[from_node], node_map[to_node]] 
        for from_node, to_node in zip(df['node_from'], df['node_to'])
    ], dtype=np.int64).T
    
    node_features = torch.tensor(df[['amt', 'category', 'city', 'state']].values, dtype=torch.float)
    edge_index = torch.tensor(edge_index, dtype=torch.long)
    labels = torch.tensor(df['is_fraud'].values, dtype=torch.long)
    
    return node_features, edge_index, labels

# Training function for GAT-WGAN
def train_gat_wgan(generator, discriminator, node_features, edge_index, labels, 
                   num_epochs=16, batch_size=32, critic_iterations=5):
    
    optimizer_g = optim.Adam(generator.parameters(), lr=0.0001, betas=(0.5, 0.9))
    optimizer_d = optim.Adam(discriminator.parameters(), lr=0.0001, betas=(0.5, 0.9))
    
    real_data = node_features[labels == 1]
    target_minority_class = torch.sum(labels == 0)
    
    for epoch in range(num_epochs):
        # Train Discriminator
        for _ in range(critic_iterations):
            optimizer_d.zero_grad()
            
            # Real data
            d_real = discriminator(real_data, edge_index)
            
            # Generate fake data
            z = torch.randn(real_data.size(0), latent_size)
            fake_data = generator(z, edge_index)
            d_fake = discriminator(fake_data.detach(), edge_index)
            
            # Compute WGAN loss
            loss_d = -torch.mean(d_real) + torch.mean(d_fake)
            loss_d.backward()
            optimizer_d.step()
            
            # Clip weights
            for p in discriminator.parameters():
                p.data.clamp_(-0.01, 0.01)
        
        # Train Generator
        optimizer_g.zero_grad()
        fake_data = generator(torch.randn(real_data.size(0), latent_size), edge_index)
        loss_g = -torch.mean(discriminator(fake_data, edge_index))
        loss_g.backward()
        optimizer_g.step()
        
        if epoch % 1 == 0:
            print(f'Epoch [{epoch}/{num_epochs}], Loss D: {loss_d.item():.4f}, Loss G: {loss_g.item():.4f}')
    
    return generator, discriminator

# Classifier for evaluation
class Classifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_heads=4):
        super(Classifier, self).__init__()
        self.gat = GATLayer(input_size, hidden_size)
        self.fc = nn.Linear(hidden_size, 2)

    def forward(self, x, edge_index=None):
        if edge_index is not None:
            x = self.gat(x, edge_index)
        x = F.relu(x)
        return self.fc(x)

# Main execution
if __name__ == "__main__":
    # Load and preprocess data
    df = pd.read_csv('creditcard/fraudTrain.csv')
    df, node_list = preprocess(df)
    node_features, edge_index, labels = create_graph_data(df, node_list)
    
    # Model parameters
    input_size = node_features.shape[1]
    hidden_size = 128
    latent_size = 64
    num_heads = 4
    
    # Initialize models
    generator = GATGenerator(latent_size, hidden_size, input_size, num_heads=num_heads)
    discriminator = GATDiscriminator(input_size, hidden_size, num_heads=num_heads)
    
    # Train models
    generator, discriminator = train_gat_wgan(
        generator, discriminator, node_features, edge_index, labels
    )
    
    # Generate samples
    num_samples = torch.sum(labels == 0) - torch.sum(labels == 1)
    z = torch.randn(num_samples, latent_size)
    generated_samples = generator(z, edge_index)
    
    # Combine real and generated data
    augmented_features = torch.cat([node_features, generated_samples], dim=0)
    augmented_labels = torch.cat([
        labels, 
        torch.ones(num_samples, dtype=torch.long)
    ])
    
    # Split data for classifier training
    from sklearn.model_selection import train_test_split
    x_train, x_test, y_train, y_test = train_test_split(
        augmented_features, augmented_labels, test_size=0.2, random_state=42
    )
    
    # Train and evaluate classifier
    classifier = Classifier(input_size, hidden_size, num_heads)
    optimizer = optim.Adam(classifier.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    
    # Training loop for classifier
    num_epochs = 30
    for epoch in range(num_epochs):
        classifier.train()
        optimizer.zero_grad()
        
        outputs = classifier(x_train, edge_index)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
        
        if epoch % 5 == 0:
            classifier.eval()
            with torch.no_grad():
                test_outputs = classifier(x_test, edge_index)
                test_loss = criterion(test_outputs, y_test)
                accuracy = accuracy_score(
                    y_test.cpu().numpy(),
                    test_outputs.argmax(dim=1).cpu().numpy()
                )
                print(f'Epoch {epoch}, Loss: {loss.item():.4f}, Test Acc: {accuracy:.4f}')
    
    # Final evaluation
    classifier.eval()
    with torch.no_grad():
        final_outputs = classifier(x_test, edge_index)
        y_pred = final_outputs.argmax(dim=1)
        final_accuracy = accuracy_score(y_test.cpu().numpy(), y_pred.cpu().numpy())
        print(f'Final Test Accuracy: {final_accuracy:.4f}')
        
        # Calculate ROC AUC
        y_proba = F.softmax(final_outputs, dim=1)[:, 1].cpu().numpy()
        roc_auc = roc_auc_score(y_test.cpu().numpy(), y_proba)
        print(f'ROC AUC Score: {roc_auc:.4f}')

  from .autonotebook import tqdm as notebook_tqdm


Epoch [0/16], Loss D: -0.0685, Loss G: -0.0100
Epoch [1/16], Loss D: -0.1109, Loss G: -0.0100
Epoch [2/16], Loss D: -0.1531, Loss G: -0.0100
Epoch [3/16], Loss D: -0.1972, Loss G: -0.0100
Epoch [4/16], Loss D: -0.2439, Loss G: -0.0100
Epoch [5/16], Loss D: -0.2925, Loss G: -0.0100
Epoch [6/16], Loss D: -0.3428, Loss G: -0.0100
Epoch [7/16], Loss D: -0.3959, Loss G: -0.0101
Epoch [8/16], Loss D: -0.4509, Loss G: -0.0101
Epoch [9/16], Loss D: -0.5068, Loss G: -0.0102
Epoch [10/16], Loss D: -0.5647, Loss G: -0.0103
Epoch [11/16], Loss D: -0.6245, Loss G: -0.0104
Epoch [12/16], Loss D: -0.6860, Loss G: -0.0105
Epoch [13/16], Loss D: -0.7493, Loss G: -0.0107
Epoch [14/16], Loss D: -0.8141, Loss G: -0.0108
Epoch [15/16], Loss D: -0.8803, Loss G: -0.0110
