In [4]:
# Importing dependencie
# ----------------------------------------------------------------

# * Analysis & Manipulation libraries
import pandas as pd 
import numpy as np

# * Dashboard creation libraries
import plotly.express as px 
from dash import Dash, html, dcc
from dash.dependencies import Output, Input
from dash.exceptions import PreventUpdate
from dash_bootstrap_templates import load_figure_template
import dash_bootstrap_components as dbc

In [5]:
# Loading the dataset
path = "/Users/galbeeir/Desktop/git/Project 4 - Fradulent Transactions/fraudulent_transactions/ML_and_dashboard/datagen/CSVs/fraudTest.csv"

fraud_df = pd.read_csv(path, parse_dates=["trans_date_trans_time", "dob"],infer_datetime_format=True)

# Showing the first 5 results
fraud_df.head()

Unnamed: 0.1,Unnamed: 0,trans_date_trans_time,cc_num,merchant,category,amt,first,last,gender,street,...,lat,long,city_pop,job,dob,trans_num,unix_time,merch_lat,merch_long,is_fraud
0,0,2020-06-21 12:14:25,2291163933867244,fraud_Kirlin and Sons,personal_care,2.86,Jeff,Elliott,M,351 Darlene Green,...,33.9659,-80.9355,333497,Mechanical engineer,1968-03-19,2da90c7d74bd46a0caf3777415b3ebd3,1371816865,33.986391,-81.200714,0
1,1,2020-06-21 12:14:33,3573030041201292,fraud_Sporer-Keebler,personal_care,29.84,Joanne,Williams,F,3638 Marsh Union,...,40.3207,-110.436,302,"Sales professional, IT",1990-01-17,324cc204407e99f51b0d6ca0055005e7,1371816873,39.450498,-109.960431,0
2,2,2020-06-21 12:14:53,3598215285024754,"fraud_Swaniawski, Nitzsche and Welch",health_fitness,41.28,Ashley,Lopez,F,9333 Valentine Point,...,40.6729,-73.5365,34496,"Librarian, public",1970-10-21,c81755dbbbea9d5c77f094348a7579be,1371816893,40.49581,-74.196111,0
3,3,2020-06-21 12:15:15,3591919803438423,fraud_Haley Group,misc_pos,60.05,Brian,Williams,M,32941 Krystal Mill Apt. 552,...,28.5697,-80.8191,54767,Set designer,1987-07-25,2159175b9efe66dc301f149d3d5abf8c,1371816915,28.812398,-80.883061,0
4,4,2020-06-21 12:15:17,3526826139003047,fraud_Johnston-Casper,travel,3.19,Nathan,Massey,M,5783 Evan Roads Apt. 465,...,44.2529,-85.017,1126,Furniture designer,1955-07-06,57ff021bd3f328f8738bb535c302a31b,1371816917,44.959148,-85.884734,0


In [6]:
# Dropping irrelevant columns
columns_to_drop = ["Unnamed: 0", "cc_num", "unix_time", "zip"]
fraud_df = fraud_df.drop(columns_to_drop, axis=1)
fraud_df.head()

Unnamed: 0,trans_date_trans_time,merchant,category,amt,first,last,gender,street,city,state,lat,long,city_pop,job,dob,trans_num,merch_lat,merch_long,is_fraud
0,2020-06-21 12:14:25,fraud_Kirlin and Sons,personal_care,2.86,Jeff,Elliott,M,351 Darlene Green,Columbia,SC,33.9659,-80.9355,333497,Mechanical engineer,1968-03-19,2da90c7d74bd46a0caf3777415b3ebd3,33.986391,-81.200714,0
1,2020-06-21 12:14:33,fraud_Sporer-Keebler,personal_care,29.84,Joanne,Williams,F,3638 Marsh Union,Altonah,UT,40.3207,-110.436,302,"Sales professional, IT",1990-01-17,324cc204407e99f51b0d6ca0055005e7,39.450498,-109.960431,0
2,2020-06-21 12:14:53,"fraud_Swaniawski, Nitzsche and Welch",health_fitness,41.28,Ashley,Lopez,F,9333 Valentine Point,Bellmore,NY,40.6729,-73.5365,34496,"Librarian, public",1970-10-21,c81755dbbbea9d5c77f094348a7579be,40.49581,-74.196111,0
3,2020-06-21 12:15:15,fraud_Haley Group,misc_pos,60.05,Brian,Williams,M,32941 Krystal Mill Apt. 552,Titusville,FL,28.5697,-80.8191,54767,Set designer,1987-07-25,2159175b9efe66dc301f149d3d5abf8c,28.812398,-80.883061,0
4,2020-06-21 12:15:17,fraud_Johnston-Casper,travel,3.19,Nathan,Massey,M,5783 Evan Roads Apt. 465,Falmouth,MI,44.2529,-85.017,1126,Furniture designer,1955-07-06,57ff021bd3f328f8738bb535c302a31b,44.959148,-85.884734,0


In [7]:
# Formatting category & merchant
fraud_df['merchant'] = fraud_df['merchant'].str.replace("fraud_", "")
fraud_df['merchant']


0                        Kirlin and Sons
1                         Sporer-Keebler
2         Swaniawski, Nitzsche and Welch
3                            Haley Group
4                        Johnston-Casper
                       ...              
555714                   Reilly and Sons
555715                    Hoppe-Parisian
555716                         Rau-Robel
555717                   Breitenberg LLC
555718                       Dare-Marvin
Name: merchant, Length: 555719, dtype: object

In [8]:
fraud_df['category'] = fraud_df['category'].str.replace("_", " ")
fraud_df['category']

0          personal care
1          personal care
2         health fitness
3               misc pos
4                 travel
               ...      
555714    health fitness
555715         kids pets
555716         kids pets
555717            travel
555718     entertainment
Name: category, Length: 555719, dtype: object

In [9]:
fraud_df['category'] = fraud_df['category'].str.title()

In [10]:
fraud_df.head()

Unnamed: 0,trans_date_trans_time,merchant,category,amt,first,last,gender,street,city,state,lat,long,city_pop,job,dob,trans_num,merch_lat,merch_long,is_fraud
0,2020-06-21 12:14:25,Kirlin and Sons,Personal Care,2.86,Jeff,Elliott,M,351 Darlene Green,Columbia,SC,33.9659,-80.9355,333497,Mechanical engineer,1968-03-19,2da90c7d74bd46a0caf3777415b3ebd3,33.986391,-81.200714,0
1,2020-06-21 12:14:33,Sporer-Keebler,Personal Care,29.84,Joanne,Williams,F,3638 Marsh Union,Altonah,UT,40.3207,-110.436,302,"Sales professional, IT",1990-01-17,324cc204407e99f51b0d6ca0055005e7,39.450498,-109.960431,0
2,2020-06-21 12:14:53,"Swaniawski, Nitzsche and Welch",Health Fitness,41.28,Ashley,Lopez,F,9333 Valentine Point,Bellmore,NY,40.6729,-73.5365,34496,"Librarian, public",1970-10-21,c81755dbbbea9d5c77f094348a7579be,40.49581,-74.196111,0
3,2020-06-21 12:15:15,Haley Group,Misc Pos,60.05,Brian,Williams,M,32941 Krystal Mill Apt. 552,Titusville,FL,28.5697,-80.8191,54767,Set designer,1987-07-25,2159175b9efe66dc301f149d3d5abf8c,28.812398,-80.883061,0
4,2020-06-21 12:15:17,Johnston-Casper,Travel,3.19,Nathan,Massey,M,5783 Evan Roads Apt. 465,Falmouth,MI,44.2529,-85.017,1126,Furniture designer,1955-07-06,57ff021bd3f328f8738bb535c302a31b,44.959148,-85.884734,0


In [11]:
# Calculating the age of the person at the time of the transaction
fraud_df['age'] = (fraud_df['trans_date_trans_time'] - fraud_df['dob']).apply(lambda x: x.days // 365)

fraud_df.head()

Unnamed: 0,trans_date_trans_time,merchant,category,amt,first,last,gender,street,city,state,lat,long,city_pop,job,dob,trans_num,merch_lat,merch_long,is_fraud,age
0,2020-06-21 12:14:25,Kirlin and Sons,Personal Care,2.86,Jeff,Elliott,M,351 Darlene Green,Columbia,SC,33.9659,-80.9355,333497,Mechanical engineer,1968-03-19,2da90c7d74bd46a0caf3777415b3ebd3,33.986391,-81.200714,0,52
1,2020-06-21 12:14:33,Sporer-Keebler,Personal Care,29.84,Joanne,Williams,F,3638 Marsh Union,Altonah,UT,40.3207,-110.436,302,"Sales professional, IT",1990-01-17,324cc204407e99f51b0d6ca0055005e7,39.450498,-109.960431,0,30
2,2020-06-21 12:14:53,"Swaniawski, Nitzsche and Welch",Health Fitness,41.28,Ashley,Lopez,F,9333 Valentine Point,Bellmore,NY,40.6729,-73.5365,34496,"Librarian, public",1970-10-21,c81755dbbbea9d5c77f094348a7579be,40.49581,-74.196111,0,49
3,2020-06-21 12:15:15,Haley Group,Misc Pos,60.05,Brian,Williams,M,32941 Krystal Mill Apt. 552,Titusville,FL,28.5697,-80.8191,54767,Set designer,1987-07-25,2159175b9efe66dc301f149d3d5abf8c,28.812398,-80.883061,0,32
4,2020-06-21 12:15:17,Johnston-Casper,Travel,3.19,Nathan,Massey,M,5783 Evan Roads Apt. 465,Falmouth,MI,44.2529,-85.017,1126,Furniture designer,1955-07-06,57ff021bd3f328f8738bb535c302a31b,44.959148,-85.884734,0,65


In [12]:
# Dropping the dob column
fraud_df = fraud_df.drop('dob', axis=1)

fraud_df.head()

Unnamed: 0,trans_date_trans_time,merchant,category,amt,first,last,gender,street,city,state,lat,long,city_pop,job,trans_num,merch_lat,merch_long,is_fraud,age
0,2020-06-21 12:14:25,Kirlin and Sons,Personal Care,2.86,Jeff,Elliott,M,351 Darlene Green,Columbia,SC,33.9659,-80.9355,333497,Mechanical engineer,2da90c7d74bd46a0caf3777415b3ebd3,33.986391,-81.200714,0,52
1,2020-06-21 12:14:33,Sporer-Keebler,Personal Care,29.84,Joanne,Williams,F,3638 Marsh Union,Altonah,UT,40.3207,-110.436,302,"Sales professional, IT",324cc204407e99f51b0d6ca0055005e7,39.450498,-109.960431,0,30
2,2020-06-21 12:14:53,"Swaniawski, Nitzsche and Welch",Health Fitness,41.28,Ashley,Lopez,F,9333 Valentine Point,Bellmore,NY,40.6729,-73.5365,34496,"Librarian, public",c81755dbbbea9d5c77f094348a7579be,40.49581,-74.196111,0,49
3,2020-06-21 12:15:15,Haley Group,Misc Pos,60.05,Brian,Williams,M,32941 Krystal Mill Apt. 552,Titusville,FL,28.5697,-80.8191,54767,Set designer,2159175b9efe66dc301f149d3d5abf8c,28.812398,-80.883061,0,32
4,2020-06-21 12:15:17,Johnston-Casper,Travel,3.19,Nathan,Massey,M,5783 Evan Roads Apt. 465,Falmouth,MI,44.2529,-85.017,1126,Furniture designer,57ff021bd3f328f8738bb535c302a31b,44.959148,-85.884734,0,65


In [13]:
# Modifying the gender column
fraud_df['gender'] = fraud_df['gender'].str.replace("M", "Male")
fraud_df['gender'] = fraud_df['gender'].str.replace("F", "Female")
fraud_df.head()

Unnamed: 0,trans_date_trans_time,merchant,category,amt,first,last,gender,street,city,state,lat,long,city_pop,job,trans_num,merch_lat,merch_long,is_fraud,age
0,2020-06-21 12:14:25,Kirlin and Sons,Personal Care,2.86,Jeff,Elliott,Male,351 Darlene Green,Columbia,SC,33.9659,-80.9355,333497,Mechanical engineer,2da90c7d74bd46a0caf3777415b3ebd3,33.986391,-81.200714,0,52
1,2020-06-21 12:14:33,Sporer-Keebler,Personal Care,29.84,Joanne,Williams,Female,3638 Marsh Union,Altonah,UT,40.3207,-110.436,302,"Sales professional, IT",324cc204407e99f51b0d6ca0055005e7,39.450498,-109.960431,0,30
2,2020-06-21 12:14:53,"Swaniawski, Nitzsche and Welch",Health Fitness,41.28,Ashley,Lopez,Female,9333 Valentine Point,Bellmore,NY,40.6729,-73.5365,34496,"Librarian, public",c81755dbbbea9d5c77f094348a7579be,40.49581,-74.196111,0,49
3,2020-06-21 12:15:15,Haley Group,Misc Pos,60.05,Brian,Williams,Male,32941 Krystal Mill Apt. 552,Titusville,FL,28.5697,-80.8191,54767,Set designer,2159175b9efe66dc301f149d3d5abf8c,28.812398,-80.883061,0,32
4,2020-06-21 12:15:17,Johnston-Casper,Travel,3.19,Nathan,Massey,Male,5783 Evan Roads Apt. 465,Falmouth,MI,44.2529,-85.017,1126,Furniture designer,57ff021bd3f328f8738bb535c302a31b,44.959148,-85.884734,0,65


### Card Visuals: Calculations
We have two cards in our dasboard: 
1. Amount of transactions in the dataset
2. The percentage of faudulent transactions in the dataset

In [14]:
# Amount of transactions in the dataset
total_transactios = fraud_df['trans_num'].count()
total_transactios_formatted = str(total_transactios)[:3] +"," +str(total_transactios)[3:]
total_transactios_formatted

'555,719'

In [15]:
# The percentage of fraudulent transactions relative to non-fraudulent transactions
percentage_fraudulent = round((fraud_df.query("is_fraud == 1")['is_fraud'].count()) / (fraud_df.query("is_fraud == 0")['is_fraud'].count()), 3)
percentage_fraudulent_formatted = f"%{percentage_fraudulent}"
percentage_fraudulent_formatted

'%0.004'

### Bar Visuals: Preparation & Formatting

In [16]:
# To create the H-Barchart we just need to group by a certain "categorical" column and count the transactions to show the summary
    # * To include descending/ascending order we just have to apply the simple logic based on RadioItems
    #NOTE: Descending order in H-Barchart:  ascending = True
    #NOTE: We will have to make the y axis dynamic by the category
px.bar(
    fraud_df.groupby("category", as_index=False)["trans_num"].count().sort_values(by="trans_num", ascending=True),
    x="trans_num",
    y="category",
    color="trans_num",
    color_continuous_scale="Tealgrn",
    text_auto='.2s',
    title="Total Transactions by Category"
).update_xaxes(
    showline=False, tickfont_color='rgba(0, 0, 0, 0)',
    title ="Total Transactions"
).update_layout(
    title = {
        'x': 0.12,
        'y': .85
        }
)

In [17]:
# NOTE: The destribution by age bar chart should include a filter of is_fraud and a condition that if you select a fruad the color will be red, if you select a non-fraud color is green
# NOTE: The dataframe to plot should have a query before the groupby, to specify weather the tansaction is/isn't fraudulent
# NOTE: All the headers in the dashboard should add fraudulent/non-fraudulent/All to the chart title name
(
    px
    .histogram(
    fraud_df.groupby("age", as_index=False)['trans_num'].count(),
    x="age",
    y="trans_num",
    title="Destribution of Transactions by Age (All)"
    )
    .update_traces(marker_color='rgba(49, 252, 3, 0.6)', marker_line_color='#2ad104',
                  marker_line_width=1.5,
                    opacity=0.6, 
                    )
    .update_layout(
        title = {
            "x": 0.075,
            "y": .85
        }
    )
)


### Creating and Formatting the Pie Chart

In [18]:
fig = px.pie(
    fraud_df.groupby("gender", as_index=False)["trans_num"].count(),
    values="trans_num",
    names="gender",
    hole=0.46,
    color_discrete_sequence=['rgba(252, 3, 3, 0.7)', 'rgba(49, 252, 3, 0.6)']  # Assign specific colors for genders (blue and orange)
)

fig.update_layout(
    title_text="Transactions Breakdown",
    annotations=[dict(text='Gender %',
                     x=0.5,
                     y=0.5,
                     font_size=20,
                     showarrow=False)],
    title = {
        "x": 0.48
    }
)

fig.show()

### Formatting and preparing the density map

In [19]:
px.scatter_mapbox(
    fraud_df.groupby(["city", "lat", "long"])["trans_num"].count().reset_index(),
    lat="lat",
    lon="long",
    size="trans_num",
    color="trans_num",
    color_continuous_scale=px.colors.sequential.Jet,
    zoom=3,
    mapbox_style="carto-darkmatter",
    title="Destribution of Transactions (All)",
    hover_data=["city"]
    ).update_layout(
        title={
            "x":0.076,
            "y":.85
        },
    coloraxis_colorbar = dict(
        thicknessmode="pixels",
        thickness=15,
        dtick=300,
        title="Transactions #"
    )
    )


In [20]:
fraud_df.groupby(["city", "lat", "long"])["trans_num"].count().reset_index()

Unnamed: 0,city,lat,long,trans_num
0,Achille,33.8396,-96.3648,208
1,Acworth,43.1960,-72.3001,828
2,Adams,43.8967,-89.8219,223
3,Afton,44.8696,-92.8234,461
4,Afton,45.3637,-84.4695,436
...,...,...,...,...
907,Woods Cross,40.8874,-111.9027,216
908,Woodville,34.6689,-86.2296,813
909,Yellowstone National Park,44.7957,-110.6137,217
910,Zaleski,39.2830,-82.3977,638


# Building the dashboard

In [23]:
# Importing external stylesheets
dbc_css = "https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates/dbc.min.css"

# Nameing the app and using the SLATE style theme
app = Dash(__name__, external_stylesheets=[dbc.themes.SLATE, dbc_css])

# Configuring the SLATE style theme on the figures
load_figure_template("SLATE")

# Define filter labels
FILTER_LABELS = {
    1: 'Fraudulent',
    0: 'Non-Fraudulent',
    -1: 'All'
}

# Define chart colors
color_list = ["#FC0303", "#31FC03"]

# Determining the app_layout
app.layout = html.Div(
    style={"width": "80%", "height": "80%", "margin-left": "10%", "margin-right": "10%", "margin-top": "20%"},
    children=[
    dbc.Row(html.H1(id="header"), style={"color":"white", "margin-top":"5px", "margin-left":"5px", "fontSize": "35px"}),
    dbc.Row(dbc.Card(dbc.RadioItems(
        id="dataFilter",
        options= [
            {'label': 'Fraudulent', 'value': 1},
            {'label': 'Non-Fraudulent', 'value': 0},
            {'label': 'All', 'value': -1}],
            value=0,
            inline=True
    ), style={"textAlign":"center"})),
    html.Br(),
    dbc.Row([
        dbc.Col(dbc.Card(f"Total Transactions: {total_transactios_formatted}"), style={"textAlign":"center",
                                                                                      "fontSize": "20px"},),
        dbc.Col(dbc.Card(f"Fraudulent: {percentage_fraudulent_formatted}"), style={"textAlign":"center",
                                                                                    "fontSize": "20px"}),
        ]),
    html.Br(),
    dbc.Row([
        dbc.Col(dbc.Card([
            dcc.Dropdown(
                id="features",
                options=fraud_df.select_dtypes(include='object').columns[:-1],
                value= "category",
                className='dbc'
            ),
            dbc.RadioItems(
                id="asc-desc",
                options= [
                    {'label': 'Ascending', 'value': True},
                    {'label': 'Descending', 'value': False}],
                value=False,
                inline=True),
            dcc.Graph(id="hBarChart"),
            ]), width=4),
        dbc.Col(dbc.Card(dcc.Graph(id="histogram")), width=4),
        dbc.Col(dbc.Card(dcc.Graph(id="pieChart")), width=4)
        ]),
    html.Br(),
    dbc.Row(dbc.Card(dcc.Graph(id="scatterMapBox", style={"width": "100%"})))
])
@app.callback(
    Output("header", "children"),
    Output("hBarChart", "figure"),
    Output("histogram", "figure"),
    Output("pieChart", "figure"),
    Output("scatterMapBox", "figure"),
    Input("dataFilter", "value"),
    Input("features", "value"),
    Input("asc-desc", "value")
)
def dashboard(filter_item, feature, sort_order):
    # * Prevent None values
    if filter_item is None:
        raise PreventUpdate()
    
    # * Match the filted label to the selected filter item
    filter_label = FILTER_LABELS.get(filter_item, 'Unknown Filter')
    
    # * Create a dynamic header
    header = f"{filter_label} Dashboard"

    # * Filter the datset based on the selected filter item
    if filter_item == 1:
        df = fraud_df.query("is_fraud == 1")
    elif filter_item == 0:
        df = fraud_df.query("is_fraud == 0")
    else:
        df = fraud_df

    # * Plot the bar chart 
    bar = (
        px.bar(
        df.groupby(feature, as_index=False)["trans_num"].count().sort_values(by="trans_num", ascending=sort_order),
        x="trans_num",
        y=feature,
        color="trans_num",
        color_continuous_scale="Tealgrn",
        text_auto='.2s',
        title=f"Total Transactions by {feature} ({filter_label})"
        )
        .update_xaxes(
        title =f"Total Transactions")
        .update_layout(
        title = {
            'x': 0.12,
            'y': .85
        },
        coloraxis_showscale=False,
        plot_bgcolor='rgba(15, 15, 15, 0)',
        paper_bgcolor='rgba(15, 15, 15, 0.5)'
        )
    )

    # * Plot the histogram
    histogram = (
    px.histogram(
        df.groupby("age", as_index=False)['trans_num'].count(),
        x="age",
        y="trans_num",
        title=f"Destribution of Transactions by Age ({filter_label})"
        )
        .update_traces(marker_color='rgba(49, 252, 3, 0.6)', marker_line_color='#2ad104',
                       marker_line_width=1.5,
                       opacity=0.6)
        .update_layout(
        title = {
            "x": 0.075,
            "y": .85
        },
        plot_bgcolor='rgba(15, 15, 15, 0)',
        paper_bgcolor='rgba(15, 15, 15, 0.5)')
    )

    # * Plot the pie chart
    pie = (
        px.pie(
        df.groupby("gender", as_index=False)["trans_num"].count(),
        values="trans_num",
        names="gender",
        hole=0.46,
        color_discrete_sequence=['rgba(252, 3, 3, 0.7)', 'rgba(49, 252, 3, 0.6)'])
        .update_layout(
        title_text=f"Transactions Breakdown ({filter_label})",
        annotations=[dict(text='Gender %',
                     x=0.5,
                     y=0.5,
                     font_size=20,
                     showarrow=False)],
        title = {
            "x": 0.48
        },
        plot_bgcolor='rgba(15, 15, 15, 0)',
        paper_bgcolor='rgba(15, 15, 15, 0.5)')
    )

    # * Plot the scatter_mapbox
    map_scatter = (
        px.scatter_mapbox(
        df.groupby(["city", "lat", "long"])["trans_num"].count().reset_index(),
        lat="lat",
        lon="long",
        size="trans_num",
        color="trans_num",
        color_continuous_scale=px.colors.sequential.Jet,
        zoom=4.5,
        center=dict(
        lat=37.9931,
        lon=-100.9893
        ),
        mapbox_style="carto-darkmatter",
        title=f"Destribution of Transactions ({filter_label})",
        hover_data=["city"],
        hover_name="city",
        )
        .update_layout(
        title={
            "x":0.038,
            "y":.85
        },
        coloraxis_colorbar = dict(
        thicknessmode="pixels",
        thickness=15,
        title="Count"
        ),
        plot_bgcolor='rgba(15, 15, 15, 0)',
        paper_bgcolor='rgba(15, 15, 15, 0.5)')
    )


    return header, bar, histogram, pie, map_scatter

if __name__ == "__main__":
    app.run_server(debug=True, port=1991, mode="inline")

In [None]:
# fraud_df.groupby("category", as_index=False)["trans_num"].sum().sort_values(by="trans_num", ascending=True)