## Install Rapids library to work with GPU CUDA Nvidia

In [1]:
#!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
#!python rapidsai-csp-utils/colab/pip-install.py nightlies nightlies

In [2]:
#%%time
#!pip install --target=/kaggle/working --extra-index-url=https://pypi.nvidia.com "cudf-cu12==24.12.*" "cuml-cu12==25.02.*"
#!rm -rf /kaggle/working/numpy*

In [3]:
#import cuml
#cuml.__version__

# RAPIDS imports

In [4]:
#from cuml.ensemble import RandomForestClassifier as DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier as DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import ParameterGrid
import numpy as np
#import cupy as cp

# Support Vector Machine Trained With CUDA #

Model creation and training

In [5]:
#import cuml
#from cuml.model_selection import GridSearchCV
from sklearn.metrics import f1_score
#import cudf
import numpy as np
import pandas as pd

from sklearn.model_selection import ParameterGrid
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats
import statsmodels.api as sm
import joblib
from scipy.stats import uniform
from sklearn import svm
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import make_blobs
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, recall_score, precision_score, classification_report, confusion_matrix, make_scorer, f1_score


In [6]:
df_original = pd.read_csv("/kaggle/input/synthetic-financial-datasets-log/Synthetic_Financial_datasets_log.csv")


In [7]:
df_original['isFraud'] = df_original['isFraud'] | df_original['isFlaggedFraud']
df_original.drop(['isFlaggedFraud'], inplace=True, axis=1)

df = df_original.copy()

#convert categorical data to integers
le = LabelEncoder()
df['nameOrig'] = le.fit_transform(df['nameOrig'])
df['nameDest'] = le.fit_transform(df['nameDest'])
df['type'] = le.fit_transform(df['type']) # 0.0=CASH_IN; 0.25=CASH_OUT; 0.5=DEBIT; 0.75=PAYMENT; 1.0=TRANSFER

########### Data Preparation #################


# Removing Unessesary fields
df.drop(['nameOrig'], inplace=True, axis=1)
df.drop(['oldbalanceOrg'], inplace=True, axis=1)
df.drop(['newbalanceOrig'], inplace=True, axis=1)
df.drop(['oldbalanceDest'], inplace=True, axis=1)
df.drop(['newbalanceDest'], inplace=True, axis=1)
df.drop(['nameDest'], inplace=True, axis=1)
isFraud_field_no_normalise = df['isFraud']
df = df.drop('isFraud', axis = 1)

# normalize numbers in dataframe before joining one-hot encoded values
df = (df - df.min()) / (df.max() - df.min())

# separating feature variables and class variables
X = df
y = isFraud_field_no_normalise

# splitting the data into training and testing set
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 42)


In [8]:
param_grid = {
    'max_depth': [15, 30],
    'max_features': [0.8, 0.9, 1.0],
    'criterion': ['gini', 'entropy'],
    #'n_estimators': [100, 200, 300, 400, 500],
    'min_samples_leaf': [10, 25, 40, 60]
}

# Convert the parameter grid to a list of all combinations
grid = ParameterGrid(param_grid)

# Variable to store the best parameters and best score
best_params = None
best_score = 0

# Iterate through each parameter combination
for params in grid:
    print(f"Training with parameters: {params}")
    
    # Initialize the model with the current parameter combination
    model = DecisionTreeClassifier(
        max_depth=params['max_depth'],
        max_features=params['max_features'],
        criterion=params['criterion'],
        #n_estimators=params['n_estimators'],
        min_samples_leaf=params['min_samples_leaf'],
        random_state=42
    )
    
    # Fit the model
    model.fit(X_train, y_train)
    
    # Predict anomalies
    y_pred = model.predict(X_test)
    
    score = classification_report(y_test, y_pred, output_dict=True)['1']['f1-score']
    print(f"F1 Score for current parameters: {score}")
    
    if score > best_score:
        best_score = score
        best_model = model
        best_params = params
        print("\nBest Pre_Score:", best_score)
        print("\nBest Pre_Params:", best_params)
        joblib.dump(best_model, 'forest_pre_best_model.pkl')

# Display the best parameters and score
print("\nBest Parameters:", best_params)
print(f"Best Weighted F1-Score: {best_score:.4f}")

# Evaluate the best model
y_pred = best_model.predict(X_test)
joblib.dump(best_model, 'forest_best_model.pkl')

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Training with parameters: {'criterion': 'gini', 'max_depth': 15, 'max_features': 0.8, 'min_samples_leaf': 10}
F1 Score for current parameters: 0.43256379100850545

Best Pre_Score: 0.43256379100850545

Best Pre_Params: {'criterion': 'gini', 'max_depth': 15, 'max_features': 0.8, 'min_samples_leaf': 10}
Training with parameters: {'criterion': 'gini', 'max_depth': 15, 'max_features': 0.8, 'min_samples_leaf': 25}
F1 Score for current parameters: 0.3765234124438742
Training with parameters: {'criterion': 'gini', 'max_depth': 15, 'max_features': 0.8, 'min_samples_leaf': 40}
F1 Score for current parameters: 0.38419530999036294
Training with parameters: {'criterion': 'gini', 'max_depth': 15, 'max_features': 0.8, 'min_samples_leaf': 60}
F1 Score for current parameters: 0.36821952776005107
Training with parameters: {'criterion': 'gini', 'max_depth': 15, 'max_features': 0.9, 'min_samples_leaf': 10}
F1 Score for current parameters: 0.43256379100850545
Training with parameters: {'criterion': 'gini',

In [9]:
classification_report(y_test, y_pred, output_dict=True)['1']['f1-score']

0.5016987542468857