# Part 2: Fund Movement Prediction

In [1]:
import pandas as pd 
import plotly.graph_objects as go

In [2]:
# Import the data 
statements_df=pd.read_csv('data/reconciled_card_data.csv')
business_df=pd.read_csv('data/business_kpi.csv')
statements_df.loc[statements_df['payment_date_reconcile']=='outstanding','payment_date_reconcile']=pd.NaT

## 1. EDA: credit card custumers and fund movements

* Having business metadata would help create customer segmentation and explore common patterns among similar businesses.
* Forecasting a time series requires several data points to be available for reliably predicting up to the next 5 or 10 steps.
* In this case, the prediction is going to be done for the next month, or at most, the next quarter.
* With this in mind, let's explore the data for businesses that have at least 30 published credit card statements.

In [3]:
# Filter the df
statement_count=statements_df.groupby('business_id').size().reset_index(name='statements_count').sort_values(by=['statements_count'],ascending=False)
target_business=statement_count[statement_count['statements_count']>=30]
statements_df_target_business=pd.merge(target_business,statements_df, on='business_id')
statement_count.head(20)

Unnamed: 0,business_id,statements_count
2,cl81xm5ze01001kx8kg53hob3,190
7,cl8af5bmp19891kx3wlkp2825,107
18,cl9rk0p090000ye1kuocy1ylx,49
5,cl83libcm71791nx2kmuehklu,32
3,cl823ggqb18681nww46j6v2kf,30
4,cl83l1qre64701lwuykh8gi0y,30
8,cl8jb9bog03311lx6vzqbhsrd,30
9,cl8khb2qs11621mx5wkbus530,29
6,cl8978e5209211lxatoukqp7b,29
0,cl81p742620381lycqw0xl7sf,28


In [4]:
# Create a time series line plot
business='cl8af5bmp19891kx3wlkp2825'
x1=statements_df_target_business[statements_df_target_business['business_id'] == business]['statement_end_date']
y1=statements_df_target_business[statements_df_target_business['business_id'] == business]['outstanding_balance_recon']
x2=statements_df_target_business[statements_df_target_business['business_id'] == business][['payment_date_reconcile', 'total_payment_collected']].dropna()['payment_date_reconcile']
y2=statements_df_target_business[statements_df_target_business['business_id'] == business][['payment_date_reconcile', 'total_payment_collected']].dropna()['total_payment_collected']


fig2 = go.Figure()
fig2.add_trace(go.Scatter(x=x1, y=y1, mode='lines', name='Time Series'))
fig2.add_trace(go.Scatter(x=x2, y=y2, mode='lines', name='Time Series'))
# Update layout for better readability
fig2.update_layout(
    title='Time Series Plot',
    xaxis_title='Date',
    yaxis_title='Value',
)

# Show the plot
fig2.show()

In [5]:
payment_count = (
    statements_df.groupby("business_id")["payment_date_reconcile"]
    .count()
    .reset_index(name="payment_count")
    .sort_values(by=["payment_count"], ascending=False)
)
payment_count

Unnamed: 0,business_id,payment_count
7,cl8af5bmp19891kx3wlkp2825,103
2,cl81xm5ze01001kx8kg53hob3,79
18,cl9rk0p090000ye1kuocy1ylx,41
3,cl823ggqb18681nww46j6v2kf,23
6,cl8978e5209211lxatoukqp7b,19
1,cl81puo9y23001kwys64o0gfq,15
0,cl81p742620381lycqw0xl7sf,14
15,cl9g2apl8001fwx1k5uh8q8k3,13
13,cl97amnir000fwx1k932gtt0m,11
4,cl83l1qre64701lwuykh8gi0y,9


In [6]:
 #Sample data (replace this with your own time series data)
data = {
    'Date': pd.date_range('2022-01-01', '2022-01-10'),
    'Value': [10, 15, 20, 18, 25, 30, 28, 35, 40, 38]
}
df = pd.DataFrame(data)

# Create a time series line plot
fig = go.Figure()

fig.add_trace(go.Scatter(x=df['Date'], y=df['Value'], mode='lines', name='Time Series'))

# Update layout for better readability
fig.update_layout(
    title='Time Series Plot',
    xaxis_title='Date',
    yaxis_title='Value',
)

# Show the plot
fig.show()