In [1]:
import pandas as pd
import matplotlib.pyplot as plt 
import numpy as np
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff

In [2]:
df = pd.read_csv("Anti_Fraud_Centre_Data.csv")
df

Unnamed: 0,Numro d'identification / Number ID,Date Received / Date reue,Complaint Received Type,Type de plainte reue,Country,Pays,Province/State,Province/tat,Fraud and Cybercrime Thematic Categories,Catgories thmatiques sur la fraude et la cybercriminalit,...,Mthode de sollicitation,Gender,Genre,Language of Correspondence,Langue de correspondance,Victim Age Range / Tranche d'ge des victimes,Complaint Type,Type de plainte,Number of Victims / Nombre de victimes,Dollar Loss /pertes financires
0,1,2021-01-02,CAFC Website,CAFC site web,Canada,Canada,Nova Scotia,Nouvelle-cosse,Phishing,Hameonnage,...,Messages texte,Female,Femme,English,Anglais,'30 - 39,Attempt,Tentative,0,$0.00
1,2,2021-01-02,CAFC Website,CAFC site web,Canada,Canada,British Columbia,Colombie-Britanique,Identity Fraud,Fraude l'identit,...,Autre/inconnu,Female,Femme,English,Anglais,'70 - 79,Victim,Victime,1,$0.00
2,3,2021-01-02,CAFC Website,CAFC site web,Not Specified,Non spcifi,Not Specified,Non spcifi,Romance,Romance,...,Autre/inconnu,Not Available,non disponible,Not Available,non disponible,'Not Available / non disponible,Victim,Victime,1,$298.00
3,4,2021-01-02,CAFC Website,CAFC site web,United States,tats-Unis,California,Californie,Foreign Money Offer,Offre dargent de ltranger,...,Courrier,Male,Homme,English,Anglais,'60 - 69,Attempt,Tentative,0,$0.00
4,5,2021-01-02,CAFC Website,CAFC site web,Canada,Canada,Ontario,Ontario,Merchandise,Marchandise,...,Internet,Female,Femme,English,Anglais,'20 - 29,Victim,Victime,1,$50.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
313971,313972,2024-12-31,Phone,Tlphone,Canada,Canada,Alberta,Alberta,Service,Service,...,Appel direct,Not Available,non disponible,English,Anglais,'70 - 79,Victim,Victime,1,$0.00
313972,313973,2024-12-31,Phone,Tlphone,Canada,Canada,Ontario,Ontario,Identity Fraud,Fraude l'identit,...,Autre/inconnu,Not Available,non disponible,English,Anglais,'30 - 39,Victim,Victime,1,$0.00
313973,313974,2024-12-31,Phone,Tlphone,Canada,Canada,Quebec,Qubec,Service,Service,...,Appel direct,Male,Homme,French,Franais,'60 - 69,Victim,Victime,1,$0.00
313974,313975,2024-12-31,Phone,Tlphone,Canada,Canada,Quebec,Qubec,Extortion,Extorsion,...,Courriel,Female,Femme,French,Franais,'70 - 79,Victim,Victime,1,$0.00


In [3]:
# Clean dollar loss data
df['Dollar Loss Clean'] = df['Dollar Loss /pertes financires'].astype(str).str.replace('$', '')
df['Dollar Loss Clean'] = df['Dollar Loss Clean'].str.replace(',', '')
df['Dollar Loss Clean'] = pd.to_numeric(df['Dollar Loss Clean'], errors='coerce')

# 1. Fraud and cybercrime thematic categories (Bar Chart)
fraud_categories = df['Fraud and Cybercrime Thematic Categories'].value_counts().reset_index()
fraud_categories.columns = ['Category', 'Count']

fig1 = px.bar(fraud_categories,
             x='Count',
             y='Category',
             orientation='h',
             title='Distribution of Fraud and Cybercrime Categories',
             template='plotly_white',
             color='Count',
             color_continuous_scale='Viridis')

fig1.update_layout(
    title={
        'text': 'Distribution of Fraud and Cybercrime Categories',
        'x': 0.5,
        'xanchor': 'center'
    },
    xaxis_title='Number of Cases',
    yaxis_title='',
    showlegend=False,
    plot_bgcolor='white',
    height=600  # Make it taller for better readability
)

# Add case count as text on bars
fig1.update_traces(texttemplate='%{x:,.0f}', textposition='outside')

fig1.show()

In [4]:
# 2. Dollar loss per fraud type (Treemap)
fraud_loss = df.groupby('Fraud and Cybercrime Thematic Categories').agg({
    'Dollar Loss Clean': 'sum',
    'Numro d\'identification / Number ID': 'count'  # Count of cases
}).reset_index()

# Calculate percentage of total loss
total_loss = fraud_loss['Dollar Loss Clean'].sum()
fraud_loss['Percentage'] = (fraud_loss['Dollar Loss Clean'] / total_loss * 100).round(2)

# Create labels with category, loss amount and case count
fraud_loss['Label'] = fraud_loss.apply(
    lambda x: f"{x['Fraud and Cybercrime Thematic Categories']}<br>"
             f"${x['Dollar Loss Clean']:,.0f}<br>"
             f"({x['Percentage']}%)<br>"
             f"{x['Numro d\'identification / Number ID']:,} cases",
    axis=1
)

fig2 = px.treemap(
    fraud_loss,
    path=[px.Constant('All Types'), 'Fraud and Cybercrime Thematic Categories'],
    values='Dollar Loss Clean',
    custom_data=['Label'],
    title='Dollar Loss Distribution by Fraud Type',
    template='plotly_white',
    color='Dollar Loss Clean',
    color_continuous_scale='Viridis'
)

fig2.update_traces(
    hovertemplate='%{customdata[0]}<extra></extra>',
    textinfo='label+value'
)

fig2.update_layout(
    title={
        'text': 'Dollar Loss Distribution by Fraud Type',
        'x': 0.5,
        'xanchor': 'center'
    },
    height=600
)

fig2.show()

In [5]:
# 3. Co-occurrence of fraud types with complaint types (Stacked Bar)
# Create cross-tabulation of fraud categories and complaint types
cooccurrence = pd.crosstab(
    df['Fraud and Cybercrime Thematic Categories'],
    df['Complaint Type']
)

# Convert to percentage for better visualization
cooccurrence_pct = cooccurrence.div(cooccurrence.sum(axis=1), axis=0) * 100

fig3 = px.bar(
    cooccurrence_pct.reset_index().melt(
        id_vars='Fraud and Cybercrime Thematic Categories',
        var_name='Complaint Type',
        value_name='Percentage'
    ),
    x='Fraud and Cybercrime Thematic Categories',
    y='Percentage',
    color='Complaint Type',
    title='Co-occurrence of Fraud Types and Complaint Types',
    template='plotly_white',
    color_discrete_sequence=px.colors.qualitative.Set3
)

fig3.update_layout(
    title={
        'text': 'Co-occurrence of Fraud Types and Complaint Types',
        'x': 0.5,
        'xanchor': 'center'
    },
    xaxis_title='Fraud Category',
    yaxis_title='Percentage',
    barmode='stack',
    showlegend=True,
    legend_title='Complaint Type',
    plot_bgcolor='white',
    height=600,
    xaxis={'tickangle': 45}
)

fig3.show()