# Mobile Money Fraud Transaction Detection System

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns

In [5]:
df = pd.read_csv('mobile_money_transactions.csv')
df.head()

Unnamed: 0,transaction_id,user_id,transaction_type,amount,device_id,location,timestamp,channel,agent_id,sim_swap_flag,balance_before,balance_after,hour_of_day,day_of_week,transaction_velocity
0,T000000,U0874,receive,5157.6,D082,Yaba,2024-03-25 16:38:00,USSD,A035,0,77090.73,71933.13,16,0,0
1,T000001,U0460,receive,2501.95,D196,Ajah,2024-04-11 23:47:00,USSD,A069,0,25595.31,23093.36,23,3,1
2,T000002,U0253,send,3727.93,D180,Ikeja,2024-04-01 14:52:00,USSD,A194,0,68411.2,64683.27,14,0,4
3,T000003,U0526,receive,4044.79,D096,Owerri,2024-03-13 00:14:00,USSD,A297,0,13859.25,9814.46,0,2,0
4,T000004,U0752,receive,2523.75,D175,Yaba,2024-06-28 23:38:00,Agent,A050,0,2497.58,481.97505,23,4,2


## Performing EDA

In [6]:
for col in df.columns:
    print(df[col].value_counts())

transaction_id
T009983    1
T009982    1
T009981    1
T009980    1
T009979    1
          ..
T000004    1
T000003    1
T000002    1
T000001    1
T000000    1
Name: count, Length: 10000, dtype: int64
user_id
U0464    24
U0560    21
U0849    20
U0221    20
U0640    19
         ..
U0829     3
U0190     3
U0904     3
U0287     3
U0185     3
Name: count, Length: 1000, dtype: int64
transaction_type
send        3542
receive     3514
cash_out    1492
cash_in     1452
Name: count, dtype: int64
amount
1952.28    2
94.02      2
2952.60    2
5627.95    2
4095.85    2
          ..
1017.83    1
2930.10    1
2652.99    1
246.28     1
3117.12    1
Name: count, Length: 9946, dtype: int64
device_id
D186    74
D134    66
D045    66
D071    63
D004    63
        ..
D148    37
D137    36
D040    33
D049    32
D054    29
Name: count, Length: 200, dtype: int64
location
Ikeja          1267
Owerri         1266
Aba            1265
Makurdi        1261
Yaba           1246
Portharcout    1238
Ajah           1235
K

In [7]:
#checking for missing values
df.isnull().sum()

transaction_id          0
user_id                 0
transaction_type        0
amount                  0
device_id               0
location                0
timestamp               0
channel                 0
agent_id                0
sim_swap_flag           0
balance_before          0
balance_after           0
hour_of_day             0
day_of_week             0
transaction_velocity    0
dtype: int64

In [10]:
#checking duplicates
df.duplicated().sum()

np.int64(0)

In [11]:
#dropping the following columns: transaction_id	user_id
df.drop(columns=['transaction_id','user_id','agent_id'], inplace=True)

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   transaction_type      10000 non-null  object 
 1   amount                10000 non-null  float64
 2   device_id             10000 non-null  object 
 3   location              10000 non-null  object 
 4   timestamp             10000 non-null  object 
 5   channel               10000 non-null  object 
 6   sim_swap_flag         10000 non-null  int64  
 7   balance_before        10000 non-null  float64
 8   balance_after         10000 non-null  float64
 9   hour_of_day           10000 non-null  int64  
 10  day_of_week           10000 non-null  int64  
 11  transaction_velocity  10000 non-null  int64  
dtypes: float64(3), int64(4), object(5)
memory usage: 937.6+ KB


In [13]:
#changing the timestamp column to datetime datatype
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')

In [14]:
df

Unnamed: 0,transaction_type,amount,device_id,location,timestamp,channel,sim_swap_flag,balance_before,balance_after,hour_of_day,day_of_week,transaction_velocity
0,receive,5157.60,D082,Yaba,2024-03-25 16:38:00,USSD,0,77090.73,71933.13000,16,0,0
1,receive,2501.95,D196,Ajah,2024-04-11 23:47:00,USSD,0,25595.31,23093.36000,23,3,1
2,send,3727.93,D180,Ikeja,2024-04-01 14:52:00,USSD,0,68411.20,64683.27000,14,0,4
3,receive,4044.79,D096,Owerri,2024-03-13 00:14:00,USSD,0,13859.25,9814.46000,0,2,0
4,receive,2523.75,D175,Yaba,2024-06-28 23:38:00,Agent,0,2497.58,481.97505,23,4,2
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,receive,3214.51,D052,Owerri,2024-06-26 22:11:00,App,0,81009.63,77795.12000,22,2,2
9996,cash_in,1375.08,D151,Makurdi,2024-04-07 11:59:00,USSD,1,17085.68,15710.60000,11,6,2
9997,cash_out,8774.94,D157,Ikeja,2024-05-20 02:15:00,USSD,0,55531.93,46756.99000,2,0,0
9998,cash_out,15885.43,D029,Ikeja,2024-01-17 04:51:00,USSD,0,18953.09,3067.66000,4,2,2
