## Install Rapids library to work with GPU CUDA Nvidia

In [None]:
!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
!python rapidsai-csp-utils/colab/pip-install.py


Cloning into 'rapidsai-csp-utils'...
remote: Enumerating objects: 562, done.[K
remote: Counting objects: 100% (293/293), done.[K
remote: Compressing objects: 100% (191/191), done.[K
remote: Total 562 (delta 185), reused 145 (delta 100), pack-reused 269 (from 1)[K
Receiving objects: 100% (562/562), 181.33 KiB | 3.18 MiB/s, done.
Resolving deltas: 100% (287/287), done.
Collecting pynvml
  Downloading pynvml-12.0.0-py3-none-any.whl.metadata (5.4 kB)
Collecting nvidia-ml-py<13.0.0a0,>=12.0.0 (from pynvml)
  Downloading nvidia_ml_py-12.560.30-py3-none-any.whl.metadata (8.6 kB)
Downloading pynvml-12.0.0-py3-none-any.whl (26 kB)
Downloading nvidia_ml_py-12.560.30-py3-none-any.whl (40 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 40.5/40.5 kB 3.0 MB/s eta 0:00:00
Installing collected packages: nvidia-ml-py, pynvml
Successfully installed nvidia-ml-py-12.560.30 pynvml-12.0.0
Installing RAPIDS remaining 24.10.* libraries
Looking in indexes: https://pypi.org/simple, https://pypi.nvidia.com
C

# RAPIDS imports

In [None]:
import cudf
import cuml
import cugraph
import cuspatial
import cuxfilter

# Support Vector Machine Trained With CUDA #

Model creation and training

In [None]:
from sklearn.model_selection import ParameterGrid
from cuml.svm import SVC as cumlSVC
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats
import statsmodels.api as sm
import joblib
from scipy.stats import uniform
from sklearn import svm
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import make_blobs
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, recall_score, precision_score, classification_report, confusion_matrix, make_scorer, f1_score


In [None]:
df_original = pd.read_csv("/content/Synthetic_Financial_datasets_log.csv")


In [None]:
df_original['isFraud'] = df_original['isFraud'] | df_original['isFlaggedFraud']
df_original.drop(['isFlaggedFraud'], inplace=True, axis=1)

df = df_original.copy()

#convert categorical data to integers
le = LabelEncoder()
df['nameOrig'] = le.fit_transform(df['nameOrig'])
df['nameDest'] = le.fit_transform(df['nameDest'])
df['type'] = le.fit_transform(df['type']) # 0.0=CASH_IN; 0.25=CASH_OUT; 0.5=DEBIT; 0.75=PAYMENT; 1.0=TRANSFER

########### Data Preparation #################


# Removing Unessesary fields
df.drop(['nameOrig'], inplace=True, axis=1)
df.drop(['oldbalanceOrg'], inplace=True, axis=1)
df.drop(['newbalanceOrig'], inplace=True, axis=1)
df.drop(['oldbalanceDest'], inplace=True, axis=1)
df.drop(['newbalanceDest'], inplace=True, axis=1)
df.drop(['nameDest'], inplace=True, axis=1)
isFraud_field_no_normalise = df['isFraud']
df = df.drop('isFraud', axis = 1)

# normalize numbers in dataframe before joining one-hot encoded values
df = (df - df.min()) / (df.max() - df.min())

# separating feature variables and class variables
X = df
y = isFraud_field_no_normalise

# splitting the data into training and testing set
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 42)


In [None]:
# Define the parameter grid for manual search (cuML does not support GridSearchCV directly)
param_grid = {
    'C': [5, 100],
    'gamma': ['scale', 1, 10],
    'kernel': ['poly', 'rbf'],
    'class_weight': [{0: 1, 1: 10}, {0: 1, 1: 100}]  # Automatically handle class imbalance
}

# Perform manual grid search
best_params = None
best_model = None
best_score = -np.inf

for params in ParameterGrid(param_grid):
    print(f"Training with parameters: {params}")
    model = cumlSVC(
        C=params['C'], gamma=params['gamma'], kernel=params['kernel'], class_weight=params['class_weight']
    )
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    score = classification_report(y_test, y_pred, output_dict=True)['1']['f1-score']

    if score > best_score:
        best_score = score
        best_model = model
        best_params = params
        print("\nBest Pre_Score:", best_score)
        print("\nBest Pre_Params:", best_params)
        joblib.dump(best_model, 'svm_pre_best_model.pkl')

# Display the best parameters and score
print("\nBest Parameters:", best_params)
print(f"Best Weighted F1-Score: {best_score:.4f}")

# Evaluate the best model
y_pred = best_model.predict(X_test)
joblib.dump(best_model, 'svm_best_model.pkl')

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Training with parameters: {'C': 5, 'class_weight': {0: 1, 1: 10}, 'gamma': 'scale', 'kernel': 'poly'}

Best Pre_Score: 0.11152416356877323

Best Pre_Params: {'C': 5, 'class_weight': {0: 1, 1: 10}, 'gamma': 'scale', 'kernel': 'poly'}
Training with parameters: {'C': 5, 'class_weight': {0: 1, 1: 10}, 'gamma': 'scale', 'kernel': 'rbf'}

Best Pre_Score: 0.22917398945518452

Best Pre_Params: {'C': 5, 'class_weight': {0: 1, 1: 10}, 'gamma': 'scale', 'kernel': 'rbf'}
Training with parameters: {'C': 5, 'class_weight': {0: 1, 1: 10}, 'gamma': 1, 'kernel': 'poly'}
Training with parameters: {'C': 5, 'class_weight': {0: 1, 1: 10}, 'gamma': 1, 'kernel': 'rbf'}
Training with parameters: {'C': 5, 'class_weight': {0: 1, 1: 10}, 'gamma': 10, 'kernel': 'poly'}


In [None]:
classification_report(y_test, y_pred, output_dict=True)['1']['f1-score']