<a href="https://colab.research.google.com/github/melihkurtaran/Fraud_Detection/blob/main/XAI_FraudDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Fraud Detection Project XAI on generated datasets**

In [32]:
#Load libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.subplots as sp

In [1]:
#Connect to GitHub for faster access
!git clone https://github.com/melihkurtaran/Fraud_Detection.git

Cloning into 'Fraud_Detection'...
remote: Enumerating objects: 70, done.[K
remote: Total 70 (delta 0), reused 0 (delta 0), pack-reused 70[K
Unpacking objects: 100% (70/70), 111.58 MiB | 4.86 MiB/s, done.
Updating files: 100% (25/25), done.


In [13]:
# Upload the gendered datasets
female = pd.read_csv("Fraud_Detection/datasets/generated_datasets/adults_2550_female_urban.csv", delimiter='|')
male = pd.read_csv("Fraud_Detection/datasets/generated_datasets/adults_2550_male_urban.csv", delimiter='|')

In [17]:
female = female.rename(columns={'merch_long,,': 'merch_long'})

# Merge the dataframes
df = pd.concat([female, male])

In [21]:
df.head()

Unnamed: 0,ssn,cc_num,first,last,gender,street,city,state,zip,lat,...,trans_num,trans_date,trans_time,unix_time,category,amt,is_fraud,merchant,merch_lat,merch_long
0,115-04-4507,4218196001337,Kathy,Johnson,F,863 Lawrence Valleys,Homosassa,FL,34446,28.7508,...,59f060ee29e40979a362acdb04315f6e,2022-12-12,01:47:59,1670806079,shopping_net,10.13,1,fraud_Fisher-Schowalter,28.312405,"-82.745821,,"
1,115-04-4507,4218196001337,Kathy,Johnson,F,863 Lawrence Valleys,Homosassa,FL,34446,28.7508,...,faec1d6969fc4ed9b5182be0d4a0f15b,2022-12-12,03:39:36,1670812776,shopping_pos,814.04,1,fraud_Lynch Ltd,28.52252,"-81.591884,,"
2,115-04-4507,4218196001337,Kathy,Johnson,F,863 Lawrence Valleys,Homosassa,FL,34446,28.7508,...,53fa6924f0c85c2a9eeee8e5a1f604fd,2022-12-12,02:33:53,1670808833,gas_transport,969.16,1,fraud_Cummerata-Jones,27.874119,"-82.412580,,"
3,115-04-4507,4218196001337,Kathy,Johnson,F,863 Lawrence Valleys,Homosassa,FL,34446,28.7508,...,9869951e93f3dcf1ce8f8a1677741c3e,2022-12-13,03:36:20,1670898980,shopping_pos,743.97,1,fraud_Dooley Inc,29.525702,"-82.368289,,"
4,115-04-4507,4218196001337,Kathy,Johnson,F,863 Lawrence Valleys,Homosassa,FL,34446,28.7508,...,6e037c9b466baf60e97b7eb63bfe6e49,2022-12-13,01:04:54,1670889894,shopping_pos,791.05,1,fraud_Stoltenberg-Beatty,28.629166,"-83.486240,,"


No null values!

In [23]:
# Check for null values
null_df = df.isnull()

# Check if there are any null values in the original dataframe
if null_df.any().any():
    print("There are null values in the dataframe")
else:
    print("There are no null values in the dataframe")

There are no null values in the dataframe


In [62]:
df = df.drop(['ssn', 'cc_num','first','last','city','state','zip','street','dob','acct_num','profile','trans_num','trans_date','trans_time','unix_time','merch_long'], axis=1)

# **Data Exploration**

In [63]:
df.columns

Index(['gender', 'lat', 'long', 'city_pop', 'job', 'category', 'amt',
       'is_fraud', 'merchant', 'merch_lat'],
      dtype='object')

Checking the imbalance for frauds

In [41]:
# Define colors and labels for the pie chart
colors = ['#00BFFF', '#FFD700']  # blue and gold
labels = ['Normal Transactions', 'Fraudulent Transactions']

# Calculate the percentage of fraudulent transactions
fraud_percentage = df['is_fraud'].value_counts(normalize=True) * 100

# Create a Pie chart with Plotly
fig = go.Figure(data=[go.Pie(labels=labels,
                             values=fraud_percentage,
                             hole=.3)])

# Customize the chart colors, fonts, and layout
fig.update_traces(hoverinfo='label+percent', textinfo='percent', textfont_size=18,
                  marker=dict(colors=colors, line=dict(color='white', width=0.1)))
fig.update_layout(
    title_text="Credit Card Transactions",
    title_font=dict(size=24, color='white'),
    legend_title_font=dict(size=20, color='yellow'),
    paper_bgcolor="black",
    plot_bgcolor='black',
    font=dict(color='white', size=16)
)

# Show the chart
fig.show()

Checking the gender balance

In [54]:
# Define colors and labels for the pie chart
colors = ['#FF69B4', '#1E90FF']  # pink and blue
labels = ['Female', 'Male']

# Calculate the percentage of transactions by gender
gender_percentage = df['gender'].value_counts(normalize=True) * 100
gender_counts = df['gender'].value_counts()

# Create a Pie chart with Plotly
fig = go.Figure(data=[go.Pie(labels=labels,
                             values=gender_percentage,
                             hole=.3)])

# Customize the chart colors, fonts, and layout
fig.update_traces(hoverinfo='label+percent', textinfo='percent', textfont_size=18,
                  marker=dict(colors=colors, line=dict(color='white', width=0.1)))
fig.update_layout(
    title_text="Credit Card Transactions by Gender",
    title_font=dict(size=24, color='white'),
    legend_title_font=dict(size=20, color='yellow'),
    paper_bgcolor="black",
    plot_bgcolor='black',
    font=dict(color='white', size=16)
)

# Show the chart
fig.show()


In [53]:
gender_counts

F    60301
M    51663
Name: gender, dtype: int64

In [64]:
df

Unnamed: 0,gender,lat,long,city_pop,job,category,amt,is_fraud,merchant,merch_lat
0,F,28.7508,-82.5139,27123,Accounting technician,shopping_net,10.13,1,fraud_Fisher-Schowalter,28.312405
1,F,28.7508,-82.5139,27123,Accounting technician,shopping_pos,814.04,1,fraud_Lynch Ltd,28.522520
2,F,28.7508,-82.5139,27123,Accounting technician,gas_transport,969.16,1,fraud_Cummerata-Jones,27.874119
3,F,28.7508,-82.5139,27123,Accounting technician,shopping_pos,743.97,1,fraud_Dooley Inc,29.525702
4,F,28.7508,-82.5139,27123,Accounting technician,shopping_pos,791.05,1,fraud_Stoltenberg-Beatty,28.629166
...,...,...,...,...,...,...,...,...,...,...
51658,M,37.4888,-120.8535,77377,Theatre manager,kids_pets,3.30,0,fraud_Ullrich Ltd,38.211227
51659,M,37.4888,-120.8535,77377,Theatre manager,food_dining,9.89,0,fraud_Ernser-Lynch,37.670862
51660,M,37.4888,-120.8535,77377,Theatre manager,misc_pos,1203.06,0,fraud_Hermann-Gaylord,36.968387
51661,M,37.4888,-120.8535,77377,Theatre manager,personal_care,2.44,0,fraud_Yost-Rogahn,36.925122
