In [1]:
import pandas as pd
from sklearn.ensemble import IsolationForest

# Load the dataset
file_path = "FraudTest.csv"
df = pd.read_csv(file_path)

# Select relevant numerical features for anomaly detection
features = df.select_dtypes(include=['number']).dropna()

# Initialize the Isolation Forest model
model = IsolationForest(contamination=0.02, random_state=42)
model.fit(features)

# Detect anomalies
anomaly_scores = model.decision_function(features)
anomalies = model.predict(features)

# Mark anomalies in the dataset
df['Anomaly'] = anomalies

# Trigger alerts for detected anomalies
anomalies_detected = df[df['Anomaly'] == -1]
print(f"Detected {len(anomalies_detected)} anomalies.")

# Save results
df.to_csv("FraudTest_with_anomalies.csv", index=False)

# Update the model (simulated by refitting with new data)
def update_model(new_data_path):
    new_df = pd.read_csv(new_data_path)
    new_features = new_df.select_dtypes(include=['number']).dropna()
    model.fit(new_features)
    print("Model updated with new data.")

# Example of updating the model when new data arrives
# update_model("NewFraudData.csv")


Detected 11115 anomalies.


In [3]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Load the dataset
file_path = "FraudTest.csv"
df = pd.read_csv(file_path)

# Select relevant numerical features for anomaly detection
numerical_columns = ["amt", "zip", "lat", "long", "city_pop", "unix_time", "merch_lat", "merch_long"]
features = df[numerical_columns].dropna()

# Initialize the Isolation Forest model
model = IsolationForest(contamination=0.02, random_state=42)
model.fit(features)

# Detect anomalies
anomaly_scores = model.decision_function(features)
anomalies = model.predict(features)

# Convert anomalies to binary labels (1 for fraud, 0 for normal)
df['Anomaly'] = [1 if x == -1 else 0 for x in anomalies]

# Assuming the dataset has a true label column named 'is_fraud'
if 'is_fraud' in df.columns:
    y_true = df['is_fraud']  # True labels
    y_pred = df['Anomaly']  # Predicted anomalies
    
    # Calculate metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=1)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    
    print("Model Performance Metrics:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
else:
    print("Warning: No 'is_fraud' column found. Cannot compute evaluation metrics.")

# Save results
df.to_csv("FraudTest_with_anomalies.csv", index=False)

# Update the model (simulated by refitting with new data)
def update_model(new_data_path):
    new_df = pd.read_csv(new_data_path)
    new_features = new_df[numerical_columns].dropna()
    model.fit(new_features)
    print("Model updated with new data.")

# Example of updating the model when new data arrives
# update_model("NewFraudData.csv")


Model Performance Metrics:
Accuracy: 0.9771
Precision: 0.0237
Recall: 0.1226
F1 Score: 0.0397

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99    553574
           1       0.02      0.12      0.04      2145

    accuracy                           0.98    555719
   macro avg       0.51      0.55      0.51    555719
weighted avg       0.99      0.98      0.98    555719



Butterfly

In [4]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Butterfly Optimization Algorithm (BOA)
def butterfly_optimization(features, labels, num_butterflies=20, num_iterations=50, a=0.1, c=0.01):
    np.random.seed(42)
    n_samples, n_features = features.shape
    
    # Initialize butterfly positions (random weights for features)
    butterflies = np.random.rand(num_butterflies, n_features)
    best_butterfly = butterflies[0]
    best_score = -np.inf
    
    for _ in range(num_iterations):
        for i in range(num_butterflies):
            # Generate weights and calculate anomaly scores
            weights = butterflies[i]
            scores = np.dot(features, weights)
            threshold = np.percentile(scores, 98)  # Top 2% as anomalies
            predictions = (scores >= threshold).astype(int)
            
            # Evaluate with F1-score
            score = f1_score(labels, predictions)
            
            if score > best_score:
                best_score = score
                best_butterfly = weights.copy()
            
            # Update butterfly position
            butterflies[i] += a * np.random.rand(n_features) * (best_butterfly - butterflies[i]) + c * np.random.randn(n_features)
    
    return best_butterfly

# Load the dataset
file_path = "FraudTest.csv"
df = pd.read_csv(file_path)

# Select relevant numerical features
numerical_columns = ["amt", "zip", "lat", "long", "city_pop", "unix_time", "merch_lat", "merch_long"]
features = df[numerical_columns].dropna().values

# Normalize features
scaler = MinMaxScaler()
features = scaler.fit_transform(features)

# Ensure 'is_fraud' column exists for supervised evaluation
if 'is_fraud' in df.columns:
    labels = df['is_fraud'].values
    
    # Split into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
    
    # Run Butterfly Optimization Algorithm
    best_weights = butterfly_optimization(X_train, y_train)
    
    # Compute anomaly scores on test set
    test_scores = np.dot(X_test, best_weights)
    threshold = np.percentile(test_scores, 98)
    y_pred = (test_scores >= threshold).astype(int)
    
    # Evaluate model performance
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, zero_division=1)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    print("Model Performance Metrics:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    
    # Apply best weights to entire dataset
    df['Anomaly'] = (np.dot(features, best_weights) >= threshold).astype(int)
    
    # Save results
df.to_csv("FraudTest_with_anomalies.csv", index=False)

print("Anomaly detection completed using Butterfly Optimization Algorithm.")


Model Performance Metrics:
Accuracy: 0.9763
Precision: 0.0045
Recall: 0.0235
F1 Score: 0.0076

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99    110718
           1       0.00      0.02      0.01       426

    accuracy                           0.98    111144
   macro avg       0.50      0.50      0.50    111144
weighted avg       0.99      0.98      0.98    111144

Anomaly detection completed using Butterfly Optimization Algorithm.


In [2]:
conda install -c h2oai h2o


Collecting package metadata (current_repodata.json): done
Solving environment: \ 
The environment is inconsistent, please check the package plan carefully
The following packages are causing the inconsistency:

  - defaults/osx-64::numba==0.57.0=py311hdb55bb0_0
  - defaults/noarch::conda-pack==0.6.0=pyhd3eb1b0_0
  - defaults/osx-64::nbclient==0.5.13=py311hecd8cb5_0
  - defaults/osx-64::python-lsp-server==1.7.2=py311hecd8cb5_0
  - defaults/osx-64::aiobotocore==2.5.0=py311hecd8cb5_0
  - defaults/osx-64::conda-build==3.26.1=py311hecd8cb5_0
  - defaults/osx-64::hvplot==0.8.4=py311hecd8cb5_0
  - defaults/osx-64::nbclassic==0.5.5=py311hecd8cb5_0
  - defaults/osx-64::jupyter_server_fileid==0.9.0=py311hecd8cb5_0
  - defaults/osx-64::anaconda-client==1.12.1=py311hecd8cb5_0
  - defaults/osx-64::typing-extensions==4.7.1=py311hecd8cb5_0
  - defaults/osx-64::anaconda-cloud-auth==0.1.3=py311hecd8cb5_0
  - defaults/osx-64::datashader==0.15.2=py311hecd8cb5_0
  - defaults/osx-64::_anaconda_depends==2023

In [3]:
conda install conda=25.3.0

Collecting package metadata (current_repodata.json): done
Solving environment: | 
The environment is inconsistent, please check the package plan carefully
The following packages are causing the inconsistency:

  - defaults/osx-64::numba==0.57.0=py311hdb55bb0_0
  - defaults/noarch::conda-pack==0.6.0=pyhd3eb1b0_0
  - defaults/osx-64::nbclient==0.5.13=py311hecd8cb5_0
  - defaults/osx-64::python-lsp-server==1.7.2=py311hecd8cb5_0
  - defaults/osx-64::aiobotocore==2.5.0=py311hecd8cb5_0
  - defaults/osx-64::conda-build==3.26.1=py311hecd8cb5_0
  - defaults/osx-64::hvplot==0.8.4=py311hecd8cb5_0
  - defaults/osx-64::nbclassic==0.5.5=py311hecd8cb5_0
  - defaults/osx-64::jupyter_server_fileid==0.9.0=py311hecd8cb5_0
  - defaults/osx-64::anaconda-client==1.12.1=py311hecd8cb5_0
  - defaults/osx-64::typing-extensions==4.7.1=py311hecd8cb5_0
  - defaults/osx-64::anaconda-cloud-auth==0.1.3=py311hecd8cb5_0
  - defaults/osx-64::datashader==0.15.2=py311hecd8cb5_0
  - defaults/osx-64::_anaconda_depends==2023

pyqtwebengine-5.15.1 | 137 KB    |                                       |   0% 
gettext-0.21.0       | 3.0 MB    |                                       |   0% [A

libcurl-8.9.1        | 396 KB    |                                       |   0% [A[A


lxml-5.3.0           | 1.3 MB    |                                       |   0% [A[A[A



imbalanced-learn-0.1 | 418 KB    |                                       |   0% [A[A[A[A




conda-token-0.5.0    | 11 KB     |                                       |   0% [A[A[A[A[A





sip-6.7.12           | 593 KB    |                                       |   0% [A[A[A[A[A[A






libxslt-1.1.41       | 241 KB    |                                       |   0% [A[A[A[A[A[A[A







certifi-2025.1.31    | 164 KB    |                                       |   0% [A[A[A[A[A[A[A[A








libarchive-3.7.7     | 801 KB    |                                       |   0% [A[A[A[A[A[A[A[A[A









conda-23.9

libarchive-3.7.7     | 801 KB    | ########1                             |  22% [A[A[A[A[A[A[A[A[A

libcurl-8.9.1        | 396 KB    | ##################################### | 100% [A[A
gettext-0.21.0       | 3.0 MB    | ####################################2 |  98% [A









conda-23.9.0         | 1.3 MB    | 4                                     |   1% [A[A[A[A[A[A[A[A[A[A










xz-5.6.4             | 289 KB    | ##                                    |   6% [A[A[A[A[A[A[A[A[A[A[A
gettext-0.21.0       | 3.0 MB    | ####################################8 |  99% [A









conda-23.9.0         | 1.3 MB    | ##7                                   |   7% [A[A[A[A[A[A[A[A[A[A










xz-5.6.4             | 289 KB    | ######1                               |  17% [A[A[A[A[A[A[A[A[A[A[A









conda-23.9.0         | 1.3 MB    | ######8                               |  19% [A[A[A[A[A[A[A[A[A[A
gettext-0.21.0       | 3.0 M

libmamba-1.5.11      | 1.4 MB    | #####5                                |  15% [A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















h5py-3.12.1          | 1.3 MB    | #####                                 |  14% [A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A













pip-25.0             | 2.9 MB    | ##########2                           |  28% [A[A[A[A[A[A[A[A[A[A[A[A[A[A
















libmamba-1.5.11      | 1.4 MB    | ######7                               |  18% [A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A













pip-25.0             | 2.9 MB    | ###########8                          |  32% [A[A[A[A[A[A[A[A[A[A[A[A[A[A















h5py-3.12.1          | 1.3 MB    | #####9                                |  16% [A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A














libsolv-0.7.30       | 442 KB    | ########################1             |  65% [A[A[A[A[A[A[A[A[A[A[A[A[A[A[A














libsol

openssl-3.0.16       | 4.6 MB    | #2                                    |   3% [A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















h5py-3.12.1          | 1.3 MB    | #################################1    |  90% [A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A


















openssl-3.0.16       | 4.6 MB    | #5                                    |   4% [A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















h5py-3.12.1          | 1.3 MB    | ##################################9   |  95% [A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















h5py-3.12.1          | 1.3 MB    | ##################################### | 100% [A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A















h5py-3.12.1          | 1.3 MB    | ##################################### | 100% [A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A


















openssl-3.0.16       | 4.6 MB    | #7                                    |   5% [A[A[A[A[A[A[A[A[A[A[

openssl-3.0.16       | 4.6 MB    | #####6                                |  15% [A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A





















setuptools-75.8.0    | 2.2 MB    | #########################2            |  68% [A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A


















openssl-3.0.16       | 4.6 MB    | ######3                               |  17% [A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A






















 ... (more hidden) ...[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A





















setuptools-75.8.0    | 2.2 MB    | ##########################8           |  72% [A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A


















openssl-3.0.16       | 4.6 MB    | ######8                               |  18% [A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A






















 ... (more hidden) ...[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[A[

Preparing transaction: done
Verifying transaction: done
Executing transaction: \ 

    Installed package of scikit-learn can be accelerated using scikit-learn-intelex.
    More details are available here: https://intel.github.io/scikit-learn-intelex

    For example:

        $ conda install scikit-learn-intelex
        $ python -m sklearnex my_application.py

    

done

Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
import h2o
from h2o.automl import H2OAutoML
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Initialize H2O
h2o.init()

# Load dataset
file_path = "FraudTest.csv"
df = pd.read_csv(file_path)

# Select relevant numerical features
numerical_columns = df.select_dtypes(include=['number']).columns.tolist()
features = df[numerical_columns].dropna()

# Convert pandas DataFrame to H2OFrame
df_h2o = h2o.H2OFrame(df)

# Ensure 'is_fraud' column exists
if 'is_fraud' in df.columns:
    df_h2o['is_fraud'] = df_h2o['is_fraud'].asfactor()
    
    # Split data into training and testing sets
    train, test = df_h2o.split_frame(ratios=[0.8], seed=42)
    
    # Define features and target
    x = numerical_columns
    y = 'is_fraud'
    
    # Train AutoML model
    aml = H2OAutoML(max_models=20, seed=42, include_algos=["GBM", "DeepLearning", "XGBoost", "StackedEnsemble"])
    aml.train(x=x, y=y, training_frame=train)
    
    # Generate predictions
    predictions = aml.leader.predict(test)
    predictions = predictions.as_data_frame()['predict'].astype(int)
    y_test = test[y].as_data_frame()['is_fraud'].astype(int)
    
    # Evaluate model performance
    accuracy = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    f1 = f1_score(y_test, predictions)
    
    print("Model Performance Metrics:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test, predictions))
    
    # Mark detected frauds in dataset
    df.loc[df.index[:len(predictions)], 'Anomaly'] = predictions
    
    # Save results
df.to_csv("FraudTest_with_anomalies.csv", index=False)
print("Anomaly detection completed using AutoML 2.0.")

ModuleNotFoundError: No module named 'h2o'

Few-shot Learning & Meta-learning

In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load dataset
file_path = "FraudTest.csv"
df = pd.read_csv(file_path)

# Select numerical features
numerical_columns = ["amt", "zip", "lat", "long", "city_pop", "unix_time", "merch_lat", "merch_long"]
features = df[numerical_columns].dropna()
labels = df['is_fraud'] if 'is_fraud' in df.columns else None

# Normalize features
scaler = MinMaxScaler()
features = scaler.fit_transform(features)

# Split data into support (few-shot) and query sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.8, random_state=42, stratify=labels)
X_support, y_support = X_train[:10], y_train[:10]  # Few-shot support set

# Convert data to PyTorch tensors
X_support, y_support = torch.tensor(X_support, dtype=torch.float32), torch.tensor(y_support.values, dtype=torch.long)
X_test, y_test = torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test.values, dtype=torch.long)

# Define simple embedding model
class FewShotNet(nn.Module):
    def __init__(self, input_dim, embedding_dim=16):
        super(FewShotNet, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, embedding_dim)
        )
    
    def forward(self, x):
        return self.encoder(x)

# Initialize model, loss, and optimizer
input_dim = X_support.shape[1]
model = FewShotNet(input_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Compute class prototypes (centroids)
def compute_prototypes(embeddings, labels):
    unique_labels = torch.unique(labels)
    prototypes = torch.stack([embeddings[labels == label].mean(dim=0) for label in unique_labels])
    return prototypes, unique_labels

# Train the embedding model
for epoch in range(100):
    optimizer.zero_grad()
    support_embeddings = model(X_support)
    prototypes, unique_labels = compute_prototypes(support_embeddings, y_support)
    loss = criterion(support_embeddings, y_support)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch {epoch}: Loss = {loss.item():.4f}")

# Inference on test set
with torch.no_grad():
    test_embeddings = model(X_test)
    distances = torch.cdist(test_embeddings, prototypes)
    predictions = unique_labels[torch.argmin(distances, dim=1)]

# Evaluate performance
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions)
f1 = f1_score(y_test, predictions)

print("\nFew-Shot Learning Model Performance:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Epoch 0: Loss = 2.7774
Epoch 10: Loss = 2.4860
Epoch 20: Loss = 2.1894
Epoch 30: Loss = 1.8778
Epoch 40: Loss = 1.5447
Epoch 50: Loss = 1.1936
Epoch 60: Loss = 0.8570
Epoch 70: Loss = 0.5732
Epoch 80: Loss = 0.3682
Epoch 90: Loss = 0.2380

Few-Shot Learning Model Performance:
Accuracy: 0.9961
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.utils.class_weight import compute_class_weight

# Load the dataset
file_path = "FraudTest.csv"
df = pd.read_csv(file_path)

# Select relevant numerical features
numerical_columns = ["amt", "zip", "lat", "long", "city_pop", "unix_time", "merch_lat", "merch_long"]
features = df[numerical_columns].dropna()

# Ensure 'is_fraud' column exists
if 'is_fraud' in df.columns:
    labels = df['is_fraud']
    
    # Normalize features
    scaler = MinMaxScaler()
    features = scaler.fit_transform(features)
    
    # Split into training and testing sets with stratification
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42, stratify=labels)
    
    # Apply SMOTE to balance the dataset
    smote = SMOTE(sampling_strategy=0.2, random_state=42)
    X_train, y_train = smote.fit_resample(X_train, y_train)
    
    # Compute class weights
    class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
    class_weight_dict = {0: class_weights[0], 1: class_weights[1]}
    
    # Define focal loss function
    def focal_loss(alpha=0.25, gamma=2.0):
        def loss(y_true, y_pred):
            bce = tf.keras.losses.binary_crossentropy(y_true, y_pred)
            p_t = y_true * y_pred + (1 - y_true) * (1 - y_pred)
            return alpha * tf.pow(1 - p_t, gamma) * bce
        return loss
    
    # Build AI-driven SDN model (Simple Neural Network)
    model = Sequential([
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    
    model.compile(optimizer='adam', loss=focal_loss(), metrics=['accuracy'])
    
    # Train the model with class weights
    model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test), class_weight=class_weight_dict, verbose=1)
    
    # Adjust threshold for better fraud detection
    threshold = 0.3
    predictions = (model.predict(X_test) > threshold).astype(int)
    
    # Evaluate model performance
    accuracy = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions, average='binary', zero_division=1)
    recall = recall_score(y_test, predictions, average='binary')
    f1 = f1_score(y_test, predictions, average='binary')
    
    print("Model Performance Metrics:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test, predictions))
    
    # Assign predictions to dataset
    df.loc[df.index[:len(predictions)], 'Anomaly'] = predictions
    
    # Save results
df.to_csv("FraudTest_with_anomalies.csv", index=False)

print("Anomaly detection completed using AI-driven SDN method.")


Transformer

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
import torch

# Load the dataset
file_path = "FraudTest.csv"
df = pd.read_csv(file_path)

# Select relevant numerical features
numerical_columns = ["amt", "zip", "lat", "long", "city_pop", "unix_time", "merch_lat", "merch_long"]
features = df[numerical_columns].dropna().values

# Normalize features
scaler = MinMaxScaler()
features = scaler.fit_transform(features)

# Ensure 'is_fraud' column exists for supervised evaluation
if 'is_fraud' in df.columns:
    labels = df['is_fraud'].values
    
    # Split into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
    
    # Load transformer model and tokenizer
    model_name = "bert-base-uncased"
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
    
    # Convert numerical data into text format for transformer input
    X_test_text = ["Transaction with amount: {:.2f}, location: ({:.4f}, {:.4f})".format(x[0], x[2], x[3]) for x in X_test]
    
    # Generate predictions
    predictions = [1 if classifier(text)[0]['label'] == 'LABEL_1' else 0 for text in X_test_text]
    
    # Evaluate model performance
    accuracy = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions, zero_division=1)
    recall = recall_score(y_test, predictions)
    f1 = f1_score(y_test, predictions)
    
    print("Model Performance Metrics:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test, predictions))
    
    # Assign predictions to dataset
    df.loc[df.index[:len(predictions)], 'Anomaly'] = predictions
    
    # Save results
df.to_csv("FraudTest_with_anomalies.csv", index=False)

print("Anomaly detection completed using a transformer-based model.")


In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load the dataset
file_path = "FraudTest.csv"
df = pd.read_csv(file_path)

# Select relevant numerical features
numerical_columns = ["amt", "zip", "lat", "long", "city_pop", "unix_time", "merch_lat", "merch_long"]
features = df[numerical_columns].dropna()

# Ensure 'is_fraud' column exists
if 'is_fraud' in df.columns:
    labels = df['is_fraud']
    
    # Normalize features
    scaler = MinMaxScaler()
    features = scaler.fit_transform(features)
    
    # Split into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
    
    # Initialize AdaBoost classifier
    base_model = DecisionTreeClassifier(max_depth=1)
    model = AdaBoostClassifier(base_model, n_estimators=50, random_state=42, algorithm="SAMME")

    
    # Train the model
    model.fit(X_train, y_train)
    
    # Generate predictions
    predictions = model.predict(X_test)
    
    # Evaluate model performance
    accuracy = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    f1 = f1_score(y_test, predictions)
    
    print("Model Performance Metrics:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test, predictions))
    
    # Assign predictions to dataset
    df.loc[df.index[:len(predictions)], 'Anomaly'] = predictions
    
    # Save results
df.to_csv("FraudTest_with_anomalies.csv", index=False)

print("Anomaly detection completed using AdaBoost.")


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model Performance Metrics:
Accuracy: 0.9962
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00    110718
           1       0.00      0.00      0.00       426

    accuracy                           1.00    111144
   macro avg       0.50      0.50      0.50    111144
weighted avg       0.99      1.00      0.99    111144

Anomaly detection completed using AdaBoost.


In [4]:
import pandas as pd
from pyod.models.knn import KNN  # Import KNN from pyod

# Load the dataset
file_path = "FraudTest.csv"
df = pd.read_csv(file_path)

# Select relevant numerical features for anomaly detection
features = df.select_dtypes(include=['number']).dropna()

# Initialize the KNN model
model = KNN(contamination=0.02)  # No 'random_state' needed for KNN
model.fit(features)

# Detect anomalies
anomalies = model.predict(features)

# Mark anomalies in the dataset
df['Anomaly'] = anomalies

# Trigger alerts for detected anomalies
anomalies_detected = df[df['Anomaly'] == 1]  # In PyOD, 1 indicates anomaly
print(f"Detected {len(anomalies_detected)} anomalies.")

# Save results
df.to_csv("FraudTest_with_anomalies.csv", index=False)


Detected 5893 anomalies.


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load the dataset
file_path = "FraudTest.csv"
df = pd.read_csv(file_path)

# Select relevant numerical features
numerical_columns = ["amt", "zip", "lat", "long", "city_pop", "unix_time", "merch_lat", "merch_long"]
features = df[numerical_columns].dropna()

# Ensure 'is_fraud' column exists
if 'is_fraud' in df.columns:
    labels = df['is_fraud']
    
    # Normalize features
    scaler = MinMaxScaler()
    features = scaler.fit_transform(features)
    
    # Split into training and testing sets with stratification
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42, stratify=labels)
    
    # Build AI-driven SDN model (Lightweight Neural Network)
    model = Sequential([
        Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
        Dropout(0.2),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    # Train the model
    model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test), verbose=1)
    
    # Generate predictions
    predictions = (model.predict(X_test) > 0.5).astype(int)
    
    # Evaluate model performance
    accuracy = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions, zero_division=1)
    recall = recall_score(y_test, predictions, zero_division=1)
    f1 = f1_score(y_test, predictions, zero_division=1)
    
    print("Model Performance Metrics:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test, predictions, zero_division=1))
    
    # Assign predictions to dataset
    df.loc[df.index[:len(predictions)], 'Anomaly'] = predictions
    
    # Save results
df.to_csv("FraudTest_with_anomalies.csv", index=False)

print("Anomaly detection completed using AI-driven SDN method.")


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load the dataset
file_path = "FraudTest.csv"
df = pd.read_csv(file_path)

# Select relevant numerical features
features = df.select_dtypes(include=['number']).dropna()
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Split data for training (using only normal data)
X_train, X_test = train_test_split(scaled_features, test_size=0.2, random_state=42)

# Build Autoencoder Model
input_dim = X_train.shape[1]

autoencoder = keras.Sequential([
    keras.layers.Input(shape=(input_dim,)),
    keras.layers.Dense(32, activation="relu"),
    keras.layers.Dense(16, activation="relu"),
    keras.layers.Dense(32, activation="relu"),
    keras.layers.Dense(input_dim, activation="linear")  # Output layer reconstructs input
])

autoencoder.compile(optimizer="adam", loss="mse")

# Train the Autoencoder (using only normal data)
autoencoder.fit(X_train, X_train, epochs=50, batch_size=32, validation_data=(X_test, X_test), verbose=1)

# Calculate reconstruction error on all data
reconstructed = autoencoder.predict(scaled_features)
reconstruction_error = np.mean(np.abs(scaled_features - reconstructed), axis=1)

# Define anomaly threshold (e.g., 95th percentile)
threshold = np.percentile(reconstruction_error, 95)
df["Anomaly"] = reconstruction_error > threshold  # Mark anomalies as True/False

# Trigger alerts for detected anomalies
anomalies_detected = df[df["Anomaly"] == True]
print(f"Detected {len(anomalies_detected)} anomalies.")

# Save results
df.to_csv("FraudTest_with_anomalies.csv", index=False)


In [2]:
import pandas as pd
from sklearn.ensemble import IsolationForest

# Load the dataset
file_path = "Fraud_Data.csv"
df = pd.read_csv(file_path)

# Select relevant numerical features for anomaly detection
features = df.select_dtypes(include=['number']).dropna()

# Initialize the Isolation Forest model
model = IsolationForest(contamination=0.02, random_state=42)
model.fit(features)

# Detect anomalies
anomaly_scores = model.decision_function(features)
anomalies = model.predict(features)

# Mark anomalies in the dataset
df['Anomaly'] = anomalies

# Trigger alerts for detected anomalies
anomalies_detected = df[df['Anomaly'] == -1]
print(f"Detected {len(anomalies_detected)} anomalies.")

# Save results
df.to_csv("FraudTest_with_anomalies.csv", index=False)

# Update the model (simulated by refitting with new data)
def update_model(new_data_path):
    new_df = pd.read_csv(new_data_path)
    new_features = new_df.select_dtypes(include=['number']).dropna()
    model.fit(new_features)
    print("Model updated with new data.")

# Example of updating the model when new data arrives
# update_model("NewFraudData.csv")


Detected 3023 anomalies.
