In [4]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.graph_objs as go
import plotly.express as px
import pandas as pd
from IPython.display import IFrame
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/upi-transactions-2024-dataset/upi_transactions_2024.csv


In [5]:
df = pd.read_csv("/kaggle/input/upi-transactions-2024-dataset/upi_transactions_2024.csv")

In [6]:
# Spending Distribution by Age and Merchant
fig_sunburst = px.sunburst(
    df,
    path=['sender_age_group', 'merchant_category'],
    values='amount (INR)', 
    color='amount (INR)', 
    color_continuous_scale='Blues',
    title='Spending Distribution by Age and Merchant'
)
fig_file = "figure_1.html"
fig_sunburst.write_html(fig_file)
IFrame(fig_file, width='100%', height=600)

In [11]:
# Distribution of Transactions by State
fig_violin = px.violin(
    df,
    y='amount (INR)', 
    color='sender_state',
    box=True, 
    points='all',
    title='Distribution of Transactions by State'
)
fig_file = "figure_2.html"
fig_violin.write_html(fig_file)
IFrame(fig_file, width='100%', height=600)

In [9]:
# Failed Transactions by Network Type
network_counts = df.groupby('network_type')['transaction_status'].apply(lambda x: (x == 'failed').sum()).reset_index()
network_counts = network_counts.rename(columns={'transaction_status': 'failed_count'})
network_counts['total'] = df.groupby('network_type')['transaction_status'].count().values
network_counts['failure_ratio'] = network_counts['failed_count'] / network_counts['total']

fig_bubble = px.scatter(
    network_counts,
    x='network_type',
    y='failure_ratio',
    size='total',
    color='network_type',
    hover_name='network_type',
    title='Failed Transactions by Network Type'
)
fig_file = "figure_3.html"
fig_bubble.write_html(fig_file)
IFrame(fig_file, width='100%', height=600)

In [10]:
# Fraud Rate by Merchant
fraud_rates = df.groupby('merchant_category')['fraud_flag'].mean().reset_index()
fig_treemap = px.treemap(
    fraud_rates,
    path=['merchant_category'],
    values='fraud_flag',
    color='fraud_flag',
    color_continuous_scale='Reds',
    title='Fraud Rate by Merchant'
)
fig_file = "figure_4.html"
fig_treemap.write_html(fig_file)
IFrame(fig_file, width='100%', height=600)

In [12]:
# Spending by Age and Weekend
fig_box = px.box(
    df,
    x='is_weekend',
    y='amount (INR)', 
    color='sender_age_group',
    title='Weekday vs Weekend Spending by Age'
)
fig_file = "figure_5.html"
fig_box.write_html(fig_file)
IFrame(fig_file, width='100%', height=600)

In [13]:
# Transactions Flow (All, Failed, Fraud)
transaction_counts = df.groupby('transaction type')['transaction id'].count()
fraud_count = df.groupby('transaction type')['fraud_flag'].sum()
fail_count = df.groupby('transaction type')['transaction_status'].apply(lambda x: (x == 'failed').sum()) 
funnel_df = pd.DataFrame({"transaction":transaction_counts.index,"total":transaction_counts,"failed":fail_count,"fraud":fraud_count})

fig_funnel = go.Figure([
    go.Funnel(
        y=funnel_df['transaction'],
        x=[funnel_df['total'], funnel_df['failed'], funnel_df['fraud']]
    )
])

fig_funnel.update_layout(title='Transaction Flow from All to Failed to Fraud')
fig_file = "figure_6.html"
fig_funnel.write_html(fig_file)
IFrame(fig_file, width='100%', height=600)

In [14]:
# Parallel Categories: Age, Day, Merchant
fig_parallel = px.parallel_categories(
    df,
    dimensions=['sender_age_group', 'day_of_week', 'merchant_category'],
    color='amount (INR)', 
    color_continuous_scale='Blues',
    title='Patterns Between Age, Day, and Merchant'
)
fig_file = "figure_7.html"
fig_parallel.write_html(fig_file)
IFrame(fig_file, width='100%', height=600)

In [15]:
# Daily Spending
daily = df.copy()
daily['date'] = pd.to_datetime(daily['timestamp']).dt.date
daily_sums = daily.groupby('date')['amount (INR)'].sum().reset_index()
daily_sums['day'] = pd.to_datetime(daily_sums['date']).dt.day
daily_sums['month'] = pd.to_datetime(daily_sums['date']).dt.month

fig_calendar = px.scatter(
    daily_sums,
    x='day',
    y='month',
    size='amount (INR)', 
    color='amount (INR)', 
    color_continuous_scale='Viridis',
    title='Calendar View of Daily Spending'
)

fig_file = "figure_8.html"
fig_calendar.write_html(fig_file)
IFrame(fig_file, width='100%', height=600)