In [1]:
# Import packages:

# For data management and cleaning
import pandas as pd
import numpy as np
import os
import kagglehub
import re
import warnings
warnings.filterwarnings('ignore')
from datetime import datetime

# For visualisation
from scipy.stats import gaussian_kde
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
import dash
from dash import dcc, html
app = dash.Dash(__name__)

# For predictive modelling
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from category_encoders import TargetEncoder
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.utils import resample
import time

# Import the credit car fraud detection dataset
path = kagglehub.dataset_download("kartik2112/fraud-detection")

print("Path to dataset files:", path)

Path to dataset files: C:\Users\Princess.Domingo\.cache\kagglehub\datasets\kartik2112\fraud-detection\versions\1


In [2]:
# Loop through any .csv files in the Kaggle folder
for file_name in os.listdir(path):
    
    if file_name.endswith('.csv'):
        # Define the full file path
        file_path = os.path.join(path, file_name)
        
        # Load CSV file into a DataFrame
        df = pd.read_csv(file_path)
        
        # Convert the DataFrmes 
        df_name = os.path.splitext(file_name)[0]
        df_name = re.sub(r'(?<!^)(?=[A-Z])', '_', df_name).lower()  # Convert to snake_case
        
        # Create the DataFrame with the modified name
        globals()[df_name] = df
        print(f"DataFrame '{df_name}' has been created.")

DataFrame 'fraud_test' has been created.
DataFrame 'fraud_train' has been created.


In [3]:
def explore_metadata(df):
    print(df.info())
    print(df.head(2))
    print(df.describe())

In [10]:
def clean_data(df, df2):
    df3 = pd.concat([df, df2], axis=0)

    df3['date'] = pd.to_datetime(df3['trans_date_trans_time'].str[:10])


    df3['name'] = df3['first'] + ' ' + df3['last']

    df3 = df3.drop(['Unnamed: 0', 'first', 'last', 'unix_time'], axis=1, errors='ignore')

    df3['month'] = df3['date'].dt.to_period('M').astype(str)

    df3['is_fraud'] = df3['is_fraud'].replace({1: 'true', 0: 'false'})

    df3['dob'] = pd.to_datetime(df3['dob'], errors='coerce')
    current_date = datetime.now()
    df3['age'] = ((current_date - df3['dob']).dt.days / 365.25).astype('Int64', errors='ignore')

    age_bins = [18, 25, 35, 45, 55, 65, np.inf]
    age_labels = ['18-24', '25-34', '35-44', '45-54', '55-64', '65+']
    df3['age_bracket'] = pd.cut(df3['age'], bins=age_bins, labels=age_labels, right=False)
    df3 = df3.drop('age', axis=1)
    return df3


In [5]:
explore_metadata(fraud_test)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 555719 entries, 0 to 555718
Data columns (total 23 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   Unnamed: 0             555719 non-null  int64  
 1   trans_date_trans_time  555719 non-null  object 
 2   cc_num                 555719 non-null  int64  
 3   merchant               555719 non-null  object 
 4   category               555719 non-null  object 
 5   amt                    555719 non-null  float64
 6   first                  555719 non-null  object 
 7   last                   555719 non-null  object 
 8   gender                 555719 non-null  object 
 9   street                 555719 non-null  object 
 10  city                   555719 non-null  object 
 11  state                  555719 non-null  object 
 12  zip                    555719 non-null  int64  
 13  lat                    555719 non-null  float64
 14  long                   555719 non-nu

In [6]:
explore_metadata(fraud_train)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1296675 entries, 0 to 1296674
Data columns (total 23 columns):
 #   Column                 Non-Null Count    Dtype  
---  ------                 --------------    -----  
 0   Unnamed: 0             1296675 non-null  int64  
 1   trans_date_trans_time  1296675 non-null  object 
 2   cc_num                 1296675 non-null  int64  
 3   merchant               1296675 non-null  object 
 4   category               1296675 non-null  object 
 5   amt                    1296675 non-null  float64
 6   first                  1296675 non-null  object 
 7   last                   1296675 non-null  object 
 8   gender                 1296675 non-null  object 
 9   street                 1296675 non-null  object 
 10  city                   1296675 non-null  object 
 11  state                  1296675 non-null  object 
 12  zip                    1296675 non-null  int64  
 13  lat                    1296675 non-null  float64
 14  long              

In [11]:
fraud_data = clean_data(fraud_test, fraud_train)


In [12]:
fraud_data.head()

Unnamed: 0,trans_date_trans_time,cc_num,merchant,category,amt,gender,street,city,state,zip,...,job,dob,trans_num,merch_lat,merch_long,is_fraud,date,name,month,age_bracket
0,2020-06-21 12:14:25,2291163933867244,fraud_Kirlin and Sons,personal_care,2.86,M,351 Darlene Green,Columbia,SC,29209,...,Mechanical engineer,1968-03-19,2da90c7d74bd46a0caf3777415b3ebd3,33.986391,-81.200714,False,2020-06-21,Jeff Elliott,2020-06,55-64
1,2020-06-21 12:14:33,3573030041201292,fraud_Sporer-Keebler,personal_care,29.84,F,3638 Marsh Union,Altonah,UT,84002,...,"Sales professional, IT",1990-01-17,324cc204407e99f51b0d6ca0055005e7,39.450498,-109.960431,False,2020-06-21,Joanne Williams,2020-06,25-34
2,2020-06-21 12:14:53,3598215285024754,"fraud_Swaniawski, Nitzsche and Welch",health_fitness,41.28,F,9333 Valentine Point,Bellmore,NY,11710,...,"Librarian, public",1970-10-21,c81755dbbbea9d5c77f094348a7579be,40.49581,-74.196111,False,2020-06-21,Ashley Lopez,2020-06,45-54
3,2020-06-21 12:15:15,3591919803438423,fraud_Haley Group,misc_pos,60.05,M,32941 Krystal Mill Apt. 552,Titusville,FL,32780,...,Set designer,1987-07-25,2159175b9efe66dc301f149d3d5abf8c,28.812398,-80.883061,False,2020-06-21,Brian Williams,2020-06,35-44
4,2020-06-21 12:15:17,3526826139003047,fraud_Johnston-Casper,travel,3.19,M,5783 Evan Roads Apt. 465,Falmouth,MI,49632,...,Furniture designer,1955-07-06,57ff021bd3f328f8738bb535c302a31b,44.959148,-85.884734,False,2020-06-21,Nathan Massey,2020-06,65+


In [13]:
fraud_data.columns

Index(['trans_date_trans_time', 'cc_num', 'merchant', 'category', 'amt',
       'gender', 'street', 'city', 'state', 'zip', 'lat', 'long', 'city_pop',
       'job', 'dob', 'trans_num', 'merch_lat', 'merch_long', 'is_fraud',
       'date', 'name', 'month', 'age_bracket'],
      dtype='object')

In [429]:
# I will be creating multiple datasets to identify trends between customer demographics 
# and fraud payments

def fraud_payments_by_demos(df):
    # Question 1: Which product category has the highest percentage of fraud payments?
    df['age_bracket'] = df['age_bracket'].astype(str)
    df2 = df[df['is_fraud'] == 'true']

    # Q1a. Calculate the number of transactions segmented by product category
    total_trans_by_category = df.groupby('category')['trans_num'].count()

    # Q1b. Calculate the number of fraudulent transactions by product category
    df3 = df2.groupby('category')['trans_num'].count().reset_index(name='fraud_payments')
    
    # Q1c. Map the total values to the relevant category
    df3['total_trans_by_category'] = df3['category'].map(total_trans_by_category)

    # Q1d. Find the percentage of payments that were fraud per category
    df3['percentage'] = ((df3['fraud_payments'] / df3['total_trans_by_category'])*100).round(2)

    df3 = df3.sort_values('percentage', ascending=False)


    # Question 2: Which gender has more fraudulent payments?

    # Q2a. Calculate the number of fraud payments segmented by gender
    total_fraud_payments = df2['trans_num'].count()

    # Q2b. Calculate the number of fraudulent transaction
    df4 = df2.groupby(['gender'])['trans_num'].count().reset_index(name='fraud_payments')

    # Q2d. Find the percentage of payments that were fraud by gneder
    df4['percentage'] = ((df4['fraud_payments'] / total_fraud_payments)*100)

    df4 = df4.sort_values('percentage', ascending=False)
    
    # Question 3: Is there a relationship between seasonality and the percentage of fraud payments?

    # Q3a. Calculate the number of transactions over each month
    total_monthly_trans = df.groupby('month')['trans_num'].count()

    # Q3b. Calculate the number of fraudulent transactions over each months
    df5 = df2.groupby('month')['trans_num'].count().reset_index(name='fraud_payments')

    # Q3c. Map the totals to the correct month
    df5['total_trans_by_month'] = df5['month'].map(total_monthly_trans)

    # Q3d. Calculate the average over each month.
    df5['percentage'] = ((df5['fraud_payments'] / df5['total_trans_by_month'])*100).round(2)

    # Question 4: Is there a relationship between age and % of fraudulent payments 

    # Q4b. Calculate the total number of fraudulent transactions
    df6 = df2.groupby('age_bracket')['trans_num'].count().reset_index(name='fraud_payments')

    # Q4d. Calculate the percentage
    df6['percentage'] = ((df6['fraud_payments'] / total_fraud_payments)*100).round(2)

    # Question 5: How are fraduluent transaction spread across the state and
    # is there a relationship between the commitment of fraud and state population sizes?

    
    # Q5a. Calculate state populations based on the city population
    states = df[['city', 'state', 'city_pop']].drop_duplicates()
    state_pop = states.groupby('state')['city_pop'].sum()

    # Q5b. Calculate the total number of transactions by state
    state_trans = df.groupby('state')['trans_num'].count()

    # Q5c. Calculate the total fraud payments associated to states
    df7 = df2.groupby('state')['trans_num'].count().reset_index(name='fraud_payments')

    # Q5d. Map back the total values and the state populations to each state
    df7['state_pop'] = df7['state'].map(state_pop)
    df7['formatted_pop'] = df7['state_pop'].apply(
    lambda x: f"{x/1_000_000:.1f}M" if x >= 1_000_000 else f"{x/1_000:.0f}K"
)
    df7['percentage'] = ((df7['fraud_payments'] / total_fraud_payments)*100).round(2)

    df7 = df7.sort_values('percentage', ascending=False)

    # Q6. What is the percentage of fraud payments in the datatset?
    
    # Q6a. Identify the number of fraudulent and non-fraudulent payments
    df8 = df.groupby('is_fraud')['trans_num'].count().reset_index(name='payments')

    # Q6b. Calculate the total number of payaments in the dataset
    total_payments = df8['payments'].sum()

    # Q6c. Calculate the percentage of fraudulent payments
    df8['percentage'] = ((df8['payments'] / total_payments)*100).round(2)

    
    # Q7. What is the distribution of fraudulent payment amounts?
    kde = gaussian_kde(df2['amt'])
    amounts = np.linspace(df2['amt'].min(), df2['amt'].max(), 100)
    density = kde(amounts)

    return df3, df4, df5, df6, df7, df8, amounts, density

In [430]:
category, gender, month, age_bracket, states, totals, kde_amounts, kde_density = fraud_payments_by_demos(fraud_data)

In [293]:
category.head()

Unnamed: 0,category,fraud_payments,total_trans_by_category,percentage
11,shopping_net,2219,139322,1.59
8,misc_net,1182,90654,1.3
4,grocery_pos,2228,176191,1.26
12,shopping_pos,1056,166463,0.63
2,gas_transport,772,188029,0.41


In [294]:
gender.head()

Unnamed: 0,gender,fraud_payments,percentage
0,F,4899,50.761579
1,M,4752,49.238421


In [295]:
month.head()

Unnamed: 0,month,fraud_payments,total_trans_by_month,percentage
0,2019-01,506,52525,0.96
1,2019-02,517,49866,1.04
2,2019-03,494,70939,0.7
3,2019-04,376,68078,0.55
4,2019-05,408,72532,0.56


In [296]:
age_bracket

Unnamed: 0,age_bracket,fraud_payments,percentage
0,18-24,239,2.48
1,25-34,1568,16.25
2,35-44,1706,17.68
3,45-54,1771,18.35
4,55-64,1813,18.79
5,65+,2554,26.46


In [431]:
states.head()

Unnamed: 0,state,fraud_payments,state_pop,formatted_pop,percentage
34,NY,730,6178074,6.2M,7.56
43,TX,592,8221850,8.2M,6.13
38,PA,572,2541361,2.5M,5.93
4,CA,402,7475144,7.5M,4.17
35,OH,360,1468869,1.5M,3.73


In [298]:
totals

Unnamed: 0,is_fraud,payments,percentage
0,False,1842743,99.48
1,True,9651,0.52


In [309]:
states.head(2)

Unnamed: 0,state,fraud_payments,state_pop,percentage
34,NY,730,6178074,7.56
43,TX,592,8221850,6.13


In [None]:
def create_graphs(category, gender, month, age_bracket, states, totals, kde_amounts, kde_density):

    # Create a bar graph that shows the percentage of fraud payments by product category
    fig1 = px.bar(category, x='category', y='percentage',
                   title='% of Fraudulent Payments by Product Category',
                   text='percentage')
    
    fig1 = fig1.update_traces(marker_color='#65c2f7', texttemplate='%{y:.1f}%', 
                              textposition='outside', hovertemplate='%{x}: %{y:.1f}%')
    fig1 = fig1.update_layout(
        xaxis=dict(showgrid=False),
        yaxis=dict(showgrid=False),
        yaxis_title="Fraud Percentage (%)", 
        xaxis_title='Product Category'
    )

    fig2 = px.bar(gender, x='gender', y='percentage',
                  title='% of Fraudulent Transactions by Gender',
                  text='percentage', color='gender', 
                  color_discrete_sequence=['#eb4034','#34eb6b'])
    
    fig2 = fig2.update_traces(texttemplate='%{y:.1f}%', 
                              textposition='outside', hovertemplate='%{x}: %{y:.1f}%') 
    
    fig2 = fig2.update_layout(
        xaxis=dict(showgrid=False),
        yaxis=dict(showgrid=False, range=[0, 60]),
        yaxis_title="Fraud Percentage (%)", 
        xaxis_title='Gender', 
        showlegend=False
    )           
    
    fig3 = px.bar(age_bracket, x='age_bracket', y='percentage',
                  title='% of Total Fraud Payments Split by Age', 
                  color_discrete_sequence=['#65c2f7'], text='percentage')
    
    fig3 = fig3.update_traces(texttemplate='%{y:.1f}%', 
                              textposition='outside', hovertemplate='Age Bracket (%{x}): %{y:.1f}%')

    fig3 = fig3.update_layout(
        xaxis=dict(showgrid=False),
        yaxis=dict(showgrid=False, range=[0, 30]),
        yaxis_title="Fraud Percentage (%)", 
        xaxis_title='Age Bracket'
    )   

    fig4 = px.line(month, x='month', y='percentage', 
                   title='Monthly % of Fraud Payments', 
                   color_discrete_sequence=['#65c2f7'])

    average_percentage_month_on_month = month['percentage'].mean().round(1)

    fig4 = fig4.update_traces(hovertemplate='%{x}: %{y:.1f}%')

    fig4 = fig4.add_trace(
        go.Scatter(
            x=month['month'], 
            y=[average_percentage_month_on_month] * len(month['month']),
            mode='lines',
            line=dict(dash='dash', color='red'),
            name='Monthly Average',
            hovertemplate='%{y:.f}%'
        )
    )
    
    fig4 = fig4.update_layout(
        xaxis=dict(showgrid=False),
         yaxis=dict(showgrid=False),
        yaxis_title="Fraud Percentage (%)", 
        xaxis_title='Month', 
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
    ))

    fig5 = go.Figure()

    fig5.add_trace(go.Choropleth(
    locations=states['state'],
    locationmode='USA-states',
    z=states['percentage'],
    hoverinfo='location+z',  # Show state and percentage on hover
    color=states['percentage'],
    colorscale='Viridis',
    colorbar_title="Percentage of Fraud Payments",
    visible=True  # Initially visible
    ))

    fig5.add_trace(go.Choropleth(
        locations=states['state'], 
        locationmode='USA-states', 
        z=states['state_pop'], 
        hoverinfo=None,
        hovertemplate="<b>%{location}</b><br>Population: %{customdata}<extra></extra>",
        custom_data=states['formatted_pop'],
        colorscale='Blues', 
        colorbar_title='State Population', 
        visible=False
    ))

    fig5.update_layout(
      updatemenus=[{
        "buttons": [
            {"label": "Fraud Payments (%)",
             "method": "update",
             "args": [{"visible": [True, False]},  # Show only fraud percentage trace
                      {"title": "Geographical Representation of Fraud Payments (%)"}]},
            {"label": "State Population",
             "method": "update",
             "args": [{"visible": [False, True]},  # Show only population trace
                      {"title": "Geographical Representation of State Population"}]}
        ],
        "direction": "down",
        "showactive": True,
        "x": 0.12,
        "y": 1.15,
        "xanchor": "left",
        "yanchor": "top"
    }],
    title="Geographical Representation of Fraud Payments and State Population"
)


    return fig1, fig2, fig3, fig4, fig5

In [527]:
totals

Unnamed: 0,is_fraud,payments,percentage
0,False,1842743,99.48
1,True,9651,0.52


In [557]:
import plotly.graph_objects as go
import plotly.express as px

def create_graphs(category, gender, month, age_bracket, states, kde_amounts, kde_density, df):

    # Create a bar graph that shows the percentage of fraud payments by product category
    fig1 = px.bar(category, x='category', y='percentage',
                   title='% of Fraudulent Payments by Product Category',
                   text='percentage')
    
    fig1.update_traces(marker_color='#65c2f7', texttemplate='%{y:.1f}%', 
                      textposition='outside', hovertemplate='%{x}: %{y:.1f}%')
    fig1.update_layout(
        xaxis=dict(showgrid=False),
        yaxis=dict(showgrid=False),
        yaxis_title="Fraud Percentage (%)", 
        xaxis_title='Product Category'
    )

    fig2 = px.bar(gender, x='gender', y='percentage',
                  title='% of Fraudulent Transactions by Gender',
                  text='percentage', color='gender', 
                  color_discrete_sequence=['#eb4034','#34eb6b'])
    
    fig2.update_traces(texttemplate='%{y:.1f}%', 
                       textposition='outside', hovertemplate='%{x}: %{y:.1f}%') 
    fig2.update_layout(
        xaxis=dict(showgrid=False),
        yaxis=dict(showgrid=False, range=[0, 60]),
        yaxis_title="Fraud Percentage (%)", 
        xaxis_title='Gender', 
        showlegend=False
    )           
    
    fig3 = px.bar(age_bracket, x='age_bracket', y='percentage',
                  title='% of Total Fraud Payments Split by Age', 
                  color_discrete_sequence=['#65c2f7'], text='percentage')
    
    fig3.update_traces(texttemplate='%{y:.1f}%', 
                       textposition='outside', hovertemplate='Age Bracket (%{x}): %{y:.1f}%')
    fig3.update_layout(
        xaxis=dict(showgrid=False),
        yaxis=dict(showgrid=False, range=[0, 30]),
        yaxis_title="Fraud Percentage (%)", 
        xaxis_title='Age Bracket'
    )   

    fig4 = px.line(month, x='month', y='percentage', 
                   title='Monthly % of Fraud Payments', 
                   color_discrete_sequence=['#65c2f7'])

    average_percentage_month_on_month = month['percentage'].mean().round(1)

    fig4.update_traces(hovertemplate='%{x}: %{y:.1f}%')

    fig4.add_trace(
        go.Scatter(
            x=month['month'], 
            y=[average_percentage_month_on_month] * len(month['month']),
            mode='lines',
            line=dict(dash='dash', color='red'),
            name='Monthly Average',
            hovertemplate='%{y:.f}%'
        )
    )
    
    fig4.update_layout(
        xaxis=dict(showgrid=False),
        yaxis=dict(showgrid=False),
        yaxis_title="Fraud Percentage (%)", 
        xaxis_title='Month', 
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
    ))

    fig5 = go.Figure()

# Fraud payments choropleth trace
    fig5.add_trace(go.Choropleth(
        locations=states['state'],
        locationmode='USA-states',
        z=states['percentage'],
        colorscale=px.colors.sequential.Viridis[::-1],
        colorbar=dict(title="% of Fraud Payments"),  # Corrected this line
        visible=True  # Initially visible
        ))
    
    fig5.update_traces(
    hovertemplate="<b>%{location}</b>: %{z:.1f}%<extra></extra>"
    )

# State population choropleth trace
    fig5.add_trace(go.Choropleth(
        locations=states['state'], 
        locationmode='USA-states', 
        z=states['state_pop'], 
        hoverinfo=None,
        hovertemplate="<b>%{location}</b><br>Population: %{customdata}<extra></extra>",
        customdata=states['formatted_pop'],
        colorscale=px.colors.sequential.Viridis[::-1],
        colorbar=dict(title='State Population',
                      tickformat='~s'),
        visible=False  # Initially hidden
))
    

    fig5.update_layout(
        geo=dict(
            scope='usa', 
            projection_type='albers usa', 
            showland=True, 
            landcolor='lightgray', 
            subunitwidth=1, 
            countrywidth=2
,       ),
        updatemenus=[{
        "buttons": [
            {"label": "Fraud Payments (%)",
             "method": "update",
             "args": [{"visible": [True, False]},  # Show only fraud percentage trace
                      {"title": "Geographical Representation of Fraud Payments (%)"}]},
            {"label": "State Population",
             "method": "update",
             "args": [{"visible": [False, True]},  # Show only population trace
                      {"title": "Geographical Representation of State Population"}]}
        ],
        "direction": "down",
        "showactive": True,
        "x": 0,
        "y": 1.15,
        "xanchor": "left",
        "yanchor": "top"
    }],
    title="Geographical Representation of Fraud Payments and State Population"
)
    
    total_fraud_trans = totals.loc[totals['is_fraud'] == 'true', 'payments'].iloc[0]
    
    fig6 = go.Figure(go.Indicator(
        mode='gauge+number', 
        value = total_fraud_trans, 
        title= {'text': 'Total Fraud Payments'}, 
        domain = {'x': [0, 1], 'y': [0, 1]}, 
        gauge={
            'axis' : {'range': [0, 20000]}, 
            'bar': {'color' : 'red'}
        }
    ))

    fig7 = go.Figure(data=go.Scatter(x=kde_amounts, y=kde_density, mode='lines', line_color='red'))
    fig7 = fig7.update_layout(
            title='Distribution of Fraudulent Payment Amounts', 
            xaxis_title='Payment Amount', 
            yaxis_title='Density')
    
    df2 = df[df['is_fraud'] == 'true']
    avg_fraud_amt = df2['amt'].mean()
    
    fig8 = go.Figure(go.Indicator(
        mode='gauge+number',
        value=avg_fraud_amt, 
        number={'prefix': "$", 'font': {'size': 20}},
        domain={'x': [0, 1], 'y': [0.2, 0.9]},
        gauge={
        'shape': 'bullet',
        'bar': {'color': 'red'}
    }
    ))

    # Adjust layout to move the title above the bar 
    fig8.update_layout(
        annotations=[
            dict(
                x=0.5,  # Position at the center horizontally
                y=1.1,  # Move the title up
                text="Average Fraud Payment Amount",
                font=dict(size=14),
                showarrow=False
        )
        ]   
    )

    recent_month_value = month.loc[month['month'] == '2020-12', 'percentage'].iloc[0]
    previous_month_value = month.loc[month['month'] == '2020-11', 'percentage'].iloc[0]
    recent_month = month.loc[month['month'] == '2020-12', 'month'].iloc[0]
    previous_month = month.loc[month['month'] == '2020-11', 'month'].iloc[0]

    fig9 = go.Figure(go.Indicator(
    mode='number+delta',
    value=recent_month_value, 
    number={'suffix': '%'},
    title={
        'text': f'MoM Change in % in Fraud Transactions<br><span style="font-size:12px;">{recent_month} vs {previous_month}</span>',
        'font': {'size': 14}
    },
    delta={
        'position': 'bottom',
        'reference': previous_month_value,
        'increasing': {'color': 'red'},  
        'decreasing': {'color': 'green'},
        'suffix' : '%'
    },
    domain={'x': [0, 1], 'y': [0, 1]}
))

    return fig1, fig2, fig3, fig4, fig5, fig6, fig7, fig8, fig9

In [559]:
category_bar_graph, gender_bar_graph, age_bracket_graph, month_graph, state_map, fraud_kpi, kde_graph, avg_fraud_kpi, monthly_kpi = create_graphs(category, gender, month, age_bracket, states, kde_amounts, kde_density, fraud_data)

In [554]:
fraud_kpi

In [560]:
avg_fraud_kpi

In [556]:
monthly_kpi

In [563]:
from dash import Dash, html, dcc

# Initialize the Dash app
app = Dash(__name__)

# Layout
app.layout = html.Div([
    html.H1("Fraud Activity Dashboard", style={'text-align': 'center'}),

    html.Div([
        dcc.Graph(figure=avg_fraud_kpi, style={"height": "300px", "width": "500px"}), 
        dcc.Graph(figure=fraud_kpi, style={"height": "300px", "width": "500px"}),
        dcc.Graph(figure=monthly_kpi, style={"height": "300px", "width": "500px"}),
    ], style={"margin-top": "20px", "display": "flex", "gap": "20px", "justify-content": "center"}),

    html.Div([
        dcc.Graph(figure=kde_graph),
        dcc.Graph(figure=month_graph)
    ], style={"margin-top": "20px", "display": "flex", "gap": "20px", "justify-content": "center"}),

    html.Div([
        dcc.Graph(figure=state_map)
    ], style={"margin-top": "20px"}),

    html.Div([
        dcc.Graph(figure=gender_bar_graph),
        dcc.Graph(figure=age_bracket_graph)
    ], style={"margin-top": "20px", "display": "flex", "gap": "20px", "justify-content": "center"}),

    html.Div([
        dcc.Graph(figure=category_bar_graph)
    ], style={"margin-top": "20px"})
])

# Save as HTML file function
def save_as_html():
    # Define a simple HTML structure
    html_content = """
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <title>Fraud Activity Dashboard</title>
        <script src="https://cdn.jsdelivr.net/npm/dash@2.0.0-beta.0/dash.min.js"></script>
    </head>
    <body>
        <div id="dash-container"></div>
    </body>
    </html>
    """
    
    # Save the HTML file
    with open("fraud_dashboard.html", "w") as f:
        f.write(html_content)
    print("Dashboard saved as fraud_dashboard.html")

# Run the app and save it as an HTML file
if __name__ == '__main__':
    app.run_server(debug=False, use_reloader=False)
    save_as_html()


Dashboard saved as fraud_dashboard.html
