## Importing the csv file from google drive.

In [None]:
import gdown

# Replace '1OtNYhw3xc04qIj0Y-McrDJ0XCkx5lXRz' with your file key
file_id = '1fOwmnF25IbW6hh55h4hyWHviw8kR7fVM'
url = f'https://drive.google.com/uc?id={file_id}'
output = 'final_dataset.csv'  # Provide the filename and extension you want

gdown.download(url, output, quiet=False)

Downloading...
From (original): https://drive.google.com/uc?id=1fOwmnF25IbW6hh55h4hyWHviw8kR7fVM
From (redirected): https://drive.google.com/uc?id=1fOwmnF25IbW6hh55h4hyWHviw8kR7fVM&confirm=t&uuid=e80e647a-9c8c-4cf3-b2b7-b877cf70d056
To: /content/final_dataset.csv
100%|██████████| 165M/165M [00:04<00:00, 38.2MB/s]


'final_dataset.csv'

## Dataset pre processing

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd

df = pd.read_csv('/content/final_dataset.csv')

categorical_columns = ['transaction_type', 'sender_account_type', 'receiver_account_type']
label_encoder = LabelEncoder()

for col in categorical_columns:
    df[col] = label_encoder.fit_transform(df[col])

top_features = [
    'depositor_running_balance',
    'sender_before_transaction_balance',
    'amount',
    'sender_debit_amount',
    'withdrawer_running_balance'

]

X = df[top_features]
y = df['is_anomaly']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Decision tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

decision_tree_model = DecisionTreeClassifier(random_state=42)
decision_tree_model.fit(X_train, y_train)

y_pred_dt = decision_tree_model.predict(X_test)

accuracy_dt = accuracy_score(y_test, y_pred_dt)
precision_dt = precision_score(y_test, y_pred_dt)
recall_dt = recall_score(y_test, y_pred_dt)
conf_matrix_dt = confusion_matrix(y_test, y_pred_dt)

print(f'Decision Tree Accuracy: {accuracy_dt}')
print(f'Decision Tree Precision: {precision_dt}')
print(f'Decision Tree Recall: {recall_dt}')
print(f'Decision Tree Confusion Matrix:\n {conf_matrix_dt}')


Decision Tree Accuracy: 0.9387773341007317
Decision Tree Precision: 0.4707126121492657
Decision Tree Recall: 0.5115831314825167
Decision Tree Confusion Matrix:
 [[254370   9262]
 [  7864   8237]]


<h1>Prediction</h1>

In [None]:
import ipywidgets as widgets
from IPython.display import display
import numpy as np

layout = widgets.Layout(width='300px')

amount = widgets.FloatText(description="Amount:", layout=layout)
sender_debit_amount = widgets.FloatText(description="Sender Debit Amount:", layout=layout)
depositor_running_balance = widgets.FloatText(description="Depositor Running Balance:", layout=layout)
withdrawer_running_balance = widgets.FloatText(description="Withdrawer Running Balance:", layout=layout)
sender_before_transaction_balance = widgets.FloatText(description="Sender Balance Before Transaction:", layout=layout)

button = widgets.Button(description="Predict", button_style='success')

output = widgets.Output()

def on_button_clicked(b):
    input_data = np.array([
        depositor_running_balance.value,
        sender_before_transaction_balance.value,
        amount.value,
        sender_debit_amount.value,
        withdrawer_running_balance.value
    ]).reshape(1, -1)
    prediction = decision_tree_model.predict(input_data)

    with output:
        output.clear_output()
        if prediction[0] == 1:
            print("Prediction: This transaction is an anomaly.")
        else:
            print("Prediction: This transaction is normal.")

button.on_click(on_button_clicked)

display(depositor_running_balance, sender_before_transaction_balance, amount, sender_debit_amount,
        withdrawer_running_balance, button, output)

FloatText(value=0.0, description='Depositor Running Balance:', layout=Layout(width='300px'))

FloatText(value=0.0, description='Sender Balance Before Transaction:', layout=Layout(width='300px'))

FloatText(value=0.0, description='Amount:', layout=Layout(width='300px'))

FloatText(value=0.0, description='Sender Debit Amount:', layout=Layout(width='300px'))

FloatText(value=0.0, description='Withdrawer Running Balance:', layout=Layout(width='300px'))

Button(button_style='success', description='Predict', style=ButtonStyle())

Output()

## Logistic regression

## Random forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

random_forest_model = RandomForestClassifier(random_state=42)
random_forest_model.fit(X_train, y_train)

y_pred_rf = random_forest_model.predict(X_test)

accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test, y_pred_rf)
conf_matrix_rf = confusion_matrix(y_test, y_pred_rf)

print(f'Random Forest Accuracy: {accuracy_rf}')
print(f'Random Forest Precision: {precision_rf}')
print(f'Random Forest Recall: {recall_rf}')
print(f'Random Forest Confusion Matrix:\n {conf_matrix_rf}')


Random Forest Accuracy: 0.9704539686057777
Random Forest Precision: 0.9920874152223059
Random Forest Recall: 0.49059064654369294
Random Forest Confusion Matrix:
 [[263569     63]
 [  8202   7899]]


## SGD classifier

In [None]:
from sklearn.linear_model import SGDClassifier
sgd_model = SGDClassifier(random_state=42)
sgd_model.fit(X_train, y_train)
y_pred_sgd = sgd_model.predict(X_test)
accuracy_sgd = accuracy_score(y_test, y_pred_sgd)
precision_sgd = precision_score(y_test, y_pred_sgd)
recall_sgd = recall_score(y_test, y_pred_sgd)
conf_matrix_sgd = confusion_matrix(y_test, y_pred_sgd)
print(f'SGD Classifier Accuracy: {accuracy_sgd}')
print(f'SGD Classifier Precision: {precision_sgd}')
print(f'SGD Classifier Recall: {recall_sgd}')
print(f'SGD Classifier Confusion Matrix:\n {conf_matrix_sgd}')




SGD Classifier Accuracy: 0.9625071049894006
SGD Classifier Precision: 0.8381732738884203
SGD Classifier Recall: 0.432022855723247
SGD Classifier Confusion Matrix:
 [[262289   1343]
 [  9145   6956]]


## Support vector machine

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

# Initialize the SVM model
svm_model = SVC(random_state=42)

# Train the model
svm_model.fit(X_train, y_train)

# Make predictions
y_pred_svm = svm_model.predict(X_test)

# Calculate metrics
accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm)
recall_svm = recall_score(y_test, y_pred_svm)
conf_matrix_svm = confusion_matrix(y_test, y_pred_svm)

# Print results
print(f'SVM Accuracy: {accuracy_svm}')
print(f'SVM Precision: {precision_svm}')
print(f'SVM Recall: {recall_svm}')
print(f'SVM Confusion Matrix:\n {conf_matrix_svm}')


In [None]:
print(f"Model Performance Comparison:\n")
print(f"Decision Tree - Accuracy: {accuracy_dt}, Precision: {precision_dt}, Recall: {recall_dt}")
print(f"Logistic Regression - Accuracy: {accuracy_lr}, Precision: {precision_lr}, Recall: {recall_lr}")
print(f"Random Forest - Accuracy: {accuracy_rf}, Precision: {precision_rf}, Recall: {recall_rf}")
print(f"SGD Classifier - Accuracy: {accuracy_sgd}, Precision: {precision_sgd}, Recall: {recall_sgd}")



Model Performance Comparison:

Decision Tree - Accuracy: 0.9387773341007317, Precision: 0.4707126121492657, Recall: 0.5115831314825167
Logistic Regression - Accuracy: 0.965009491193388, Precision: 0.9840515258395952, Recall: 0.3985466741196199
Random Forest - Accuracy: 0.9704539686057777, Precision: 0.9920874152223059, Recall: 0.49059064654369294
SGD Classifier - Accuracy: 0.9625071049894006, Precision: 0.8381732738884203, Recall: 0.432022855723247
