In [120]:
import pandas as pd
import math

In [121]:
df = pd.read_csv('gs://shakingshamrocks_eu/test_data_3_sec.csv')

In [122]:
df = df.drop(df.columns[0],axis = 1)

In [51]:
def find_incommings(df_user):
    
    # segment into months
    df_user['year'] = pd.DatetimeIndex(df_user['dates']).year
    df_user['month'] = pd.DatetimeIndex(df_user['dates']).month
    
    # find average income per month
    df_in = df_user[df_user['amount']>0]
    other_account_name = df_in['other_account_name'].unique()
    
    # find average gap in payments
    incomings = {}
    for payment in other_account_name:
        
        df_tmp = df_in[df_in['other_account_name']== payment].copy()
        df_tmp.sort_values(by='dates', inplace=True)
        df_tmp.reset_index(inplace=True, drop=True)
        df_tmp['diff_days'] = pd.to_datetime(df_tmp['dates']).diff().dt.days.fillna(0, downcast='infer')
        
        incomings[payment] = {}
        incomings[payment]['freq'] = math.ceil(df_tmp['diff_days'].mean())
        incomings[payment]['amount'] = df_tmp['amount'].mean()
        incomings[payment]['start_date'] = df_tmp['dates'].min()
           
    return incomings


def find_outgoings(df_user):
    
    # segment into months
    df_user['year'] = pd.DatetimeIndex(df_user['dates']).year
    df_user['month'] = pd.DatetimeIndex(df_user['dates']).month
    
    # find average income per month
    df_in = df_user[df_user['amount']<0]
    other_account_name = df_in['other_account_name'].unique()
        
    # find average gap in payments
    outgoings = {}
    for payment in other_account_name:
        
        df_tmp = df_in[df_in['other_account_name']== payment].copy()
        df_tmp.sort_values(by='dates', inplace=True)
        df_tmp.reset_index(inplace=True, drop=True)
        df_tmp['diff_days'] = pd.to_datetime(df_tmp['dates']).diff().dt.days.fillna(0, downcast='infer')
        
        outgoings[payment] = {}
        outgoings[payment]['freq'] = math.ceil(df_tmp['diff_days'].mean())
        outgoings[payment]['amount'] = df_tmp['amount'].mean()
        outgoings[payment]['start_date'] = df_tmp['dates'].min()
        
        if outgoings[payment]['freq'] > 25:
            outgoings[payment]['Type'] = 'Regular'
        
        if (outgoings[payment]['freq'] >= 7) & (outgoings[payment]['freq'] < 25):
            outgoings[payment]['Type'] = 'Irregular'
            
        if outgoings[payment]['freq'] < 7:
            outgoings[payment]['Type'] = 'Discretionary'
           
    return outgoings


def build_predicted_in_out(user, incommings, outgoings, end_date):
    
    
    in_list = []
    for i in incommings.keys():    
            
        df = pd.DataFrame(data={'dates': pd.date_range(incommings[i]['start_date'], end_date, freq='MS')})
        df['amount_in'] = incommings[i]['amount']
        in_list.append(df)
    
    out_list = []
    for i in outgoings.keys(): 
        
        if outgoings[i]['Type'] == 'Regular':
        
            df = pd.DataFrame(data={'dates': pd.date_range(outgoings[i]['start_date'], end_date, freq=str(outgoings[i]['freq'])+'D')})
            df['amount_out_reg'] = outgoings[i]['amount']
            out_list.append(df)            
        
    df = pd.concat(in_list + out_list)
    df.reset_index(inplace=True,drop=True)
    df.sort_values(by='dates', inplace=True)
    df.reset_index(inplace=True, drop=True)
    df.fillna(0, inplace=True)

    # bucket into months
    df['year'] = pd.DatetimeIndex(df['dates']).year
    df['month'] = pd.DatetimeIndex(df['dates']).month
    
    return df


In [38]:
#df_train = df.iloc[:700,:] 
#df_test = df.iloc[701:,:] 

In [52]:

# build testing logic from here down to create decector

unique_name = df['account_name'].unique()

In [53]:
unique_name

array(['Katherine Valencia', 'Patrick Hamilton'], dtype=object)

In [41]:
#df_test = df_train[df['account_name']==unique_name[0]].copy()

  """Entry point for launching an IPython kernel.


In [54]:
incommings_payments = find_incommings(df.copy())

In [56]:
outgoing_payments = find_outgoings(df.copy())

In [55]:
incommings_payments

{'Wages MyOrg': {'freq': 15,
  'amount': 2450.0,
  'start_date': '2018-02-16 02:17:13'}}

In [105]:
df_tmp = build_predicted_in_out(unique_name[1], incommings_payments, outgoing_payments, '2024-01-01 00:00:00')

In [106]:
df_tmp.head()

Unnamed: 0,dates,amount_in,amount_out_reg,year,month
0,2018-02-28 07:10:24,0.0,-238.924523,2018,2
1,2018-02-28 07:38:44,0.0,-200.0,2018,2
2,2018-03-01 02:17:13,2450.0,0.0,2018,3
3,2018-03-30 07:10:24,0.0,-238.924523,2018,3
4,2018-03-30 07:38:44,0.0,-200.0,2018,3


In [107]:
gf_payment_pattern_reg = df_tmp.groupby(['year','month'])[['amount_in','amount_out_reg']].sum()
gf_payment_pattern_reg['buffer'] = gf_payment_pattern_reg['amount_in'] + gf_payment_pattern_reg['amount_out_reg']
gf_payment_pattern_reg.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,amount_in,amount_out_reg,buffer
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,2,0.0,-438.924523,-438.924523
2018,3,2450.0,-438.924523,2011.075477
2018,4,2450.0,-438.924523,2011.075477
2018,5,2450.0,-438.924523,2011.075477
2018,6,2450.0,-438.924523,2011.075477
2018,7,2450.0,-438.924523,2011.075477
2018,8,2450.0,-438.924523,2011.075477
2018,9,2450.0,-438.924523,2011.075477
2018,10,2450.0,-438.924523,2011.075477
2018,11,2450.0,-438.924523,2011.075477


In [108]:
gf_payment_pattern_reg

Unnamed: 0_level_0,Unnamed: 1_level_0,amount_in,amount_out_reg,buffer
year,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,2,0.0,-438.924523,-438.924523
2018,3,2450.0,-438.924523,2011.075477
2018,4,2450.0,-438.924523,2011.075477
2018,5,2450.0,-438.924523,2011.075477
2018,6,2450.0,-438.924523,2011.075477
...,...,...,...,...
2023,8,2450.0,-877.849046,1572.150954
2023,9,2450.0,-438.924523,2011.075477
2023,10,2450.0,-438.924523,2011.075477
2023,11,2450.0,-438.924523,2011.075477


In [61]:
df.tail() # 2018-02-16 - 2026-01-01

Unnamed: 0,dates,account_name,account_IBAN,other_account_name,Type,amount,balance
987,2025-10-31 17:55:15,Patrick Hamilton,CJVO60129798466874,Utilities,Regular,-224.106675,-13653.319034
988,2025-11-01 03:01:52,Katherine Valencia,DOJO46309747392475,Medical Session,Regular,-200.0,-13853.319034
989,2025-12-31 15:06:52,Katherine Valencia,DOJO46309747392475,Utilities,Regular,-206.107448,-14059.426481
990,2025-12-31 22:29:06,Patrick Hamilton,CJVO60129798466874,Utilities,Regular,-265.745032,-14325.171513
991,2026-01-01 01:04:31,Katherine Valencia,DOJO46309747392475,Medical Session,Regular,-200.0,-14525.171513


In [109]:
# Select for the same user and join on dates
df_user = df.loc[df['account_name'] == unique_name[1]].copy()


In [110]:
df_user['account_name'].value_counts()

Patrick Hamilton    489
Name: account_name, dtype: int64

In [111]:
df_user['year'] = df_user['dates'].apply(lambda x: int(x[:4]))
df_user['month'] = df_user['dates'].apply(lambda x: int(x[5:7]))

In [112]:
df_user.head()

Unnamed: 0,dates,account_name,account_IBAN,other_account_name,Type,amount,balance,year,month
1,2018-02-20 14:29:42,Patrick Hamilton,CJVO60129798466874,Wages MyOrg,Regular,2600.0,4900.0,2018,2
5,2018-02-28 04:58:09,Patrick Hamilton,CJVO60129798466874,Rent - apt 12,Regular,-1500.0,3106.422271,2018,2
6,2018-02-28 07:10:24,Patrick Hamilton,CJVO60129798466874,Utilities,Regular,-204.099259,2902.323012,2018,2
8,2018-02-28 07:38:44,Patrick Hamilton,CJVO60129798466874,Medical Session,Regular,-200.0,2372.323012,2018,2
9,2018-02-28 07:41:58,Patrick Hamilton,CJVO60129798466874,Medical Subscription,Regular,-150.0,2222.323012,2018,2


In [113]:
# Promote multi-index
gf_payment_pattern_reg = gf_payment_pattern_reg.reset_index()

In [114]:
df_join = pd.merge(df_user,gf_payment_pattern_reg, how = 'left', on = ['year','month'])

In [115]:
df_join.shape

(489, 12)

In [116]:
df_join.head()

Unnamed: 0,dates,account_name,account_IBAN,other_account_name,Type,amount,balance,year,month,amount_in,amount_out_reg,buffer
0,2018-02-20 14:29:42,Patrick Hamilton,CJVO60129798466874,Wages MyOrg,Regular,2600.0,4900.0,2018,2,0.0,-438.924523,-438.924523
1,2018-02-28 04:58:09,Patrick Hamilton,CJVO60129798466874,Rent - apt 12,Regular,-1500.0,3106.422271,2018,2,0.0,-438.924523,-438.924523
2,2018-02-28 07:10:24,Patrick Hamilton,CJVO60129798466874,Utilities,Regular,-204.099259,2902.323012,2018,2,0.0,-438.924523,-438.924523
3,2018-02-28 07:38:44,Patrick Hamilton,CJVO60129798466874,Medical Session,Regular,-200.0,2372.323012,2018,2,0.0,-438.924523,-438.924523
4,2018-02-28 07:41:58,Patrick Hamilton,CJVO60129798466874,Medical Subscription,Regular,-150.0,2222.323012,2018,2,0.0,-438.924523,-438.924523


In [119]:
df_join.loc[df_join['Type'] == 'Discretionary']

Unnamed: 0,dates,account_name,account_IBAN,other_account_name,Type,amount,balance,year,month,amount_in,amount_out_reg,buffer
17,2018-03-28 00:13:24,Patrick Hamilton,CJVO60129798466874,Group,Discretionary,-52.937404,2890.992954,2018,3,2450.0,-438.924523,2011.075477
26,2018-04-04 00:09:47,Patrick Hamilton,CJVO60129798466874,Group,Discretionary,-53.677123,-1619.806819,2018,4,2450.0,-438.924523,2011.075477
36,2018-04-28 00:05:25,Patrick Hamilton,CJVO60129798466874,PLC,Discretionary,-300.248394,464.276739,2018,4,2450.0,-438.924523,2011.075477
101,2018-08-28 00:16:01,Patrick Hamilton,CJVO60129798466874,Ltd,Discretionary,-178.118546,-9904.445118,2018,8,2450.0,-438.924523,2011.075477
102,2018-08-28 01:07:51,Patrick Hamilton,CJVO60129798466874,Ltd,Discretionary,-165.192943,-10069.63806,2018,8,2450.0,-438.924523,2011.075477
103,2018-08-28 02:02:24,Patrick Hamilton,CJVO60129798466874,Ltd,Discretionary,-150.89198,-10495.170945,2018,8,2450.0,-438.924523,2011.075477
106,2018-08-28 03:14:57,Patrick Hamilton,CJVO60129798466874,Ltd,Discretionary,-188.345988,-12580.964803,2018,8,2450.0,-438.924523,2011.075477
107,2018-08-28 04:22:36,Patrick Hamilton,CJVO60129798466874,Ltd,Discretionary,-157.212235,-12944.330104,2018,8,2450.0,-438.924523,2011.075477
108,2018-08-28 05:03:31,Patrick Hamilton,CJVO60129798466874,Ltd,Discretionary,-188.88182,-13267.276715,2018,8,2450.0,-438.924523,2011.075477


In [102]:
# If amount > buffer, then depends on balance
def gt_buffer(row):
    amount = row['amount']
    buffer = row['buffer']
    txn_type = row['Type']
    other_account_name = row['other_account_name']
    
    alert_dict = {}
    if txn_type != 'Regular':
        if abs(amount) > abs(buffer):
            print('Payment of {} to {} exceeds the projected buffer between regular incoming and outgoing payments this month - are you sure?')
            alert_dict.update({other_account_name, amount, buffer})
            
    return alert_dict
            
    
    

In [117]:
df_join.apply(gt_buffer, axis = 1)

0      {}
1      {}
2      {}
3      {}
4      {}
       ..
484    {}
485    {}
486    {}
487    {}
488    {}
Length: 489, dtype: object

In [130]:
# Try hitting the API Endpoint
import requests
json_data = df.to_json()
r = requests.post('https://demo-app-lquvhriy2a-ew.a.run.app/service/forecast_v2/',  json= {"data":json_data})
r.json()

'{"dates":{"0":"2018-02-16 02:17:13","1":"2018-02-20 14:29:42","2":"2018-02-25 21:12:59","3":"2018-02-28 01:18:39","4":"2018-02-28 03:02:54","5":"2018-02-28 04:58:09","6":"2018-02-28 07:10:24","7":"2018-02-28 07:15:31","8":"2018-02-28 07:38:44","9":"2018-02-28 07:41:58","10":"2018-02-28 07:52:35","11":"2018-02-28 08:09:57","12":"2018-02-28 08:19:09","13":"2018-02-28 08:43:13","14":"2018-02-28 08:49:09","15":"2018-02-28 09:05:45","16":"2018-02-28 18:43:49","17":"2018-03-01 01:32:13","18":"2018-03-01 02:43:29","19":"2018-03-01 03:21:51","20":"2018-03-05 02:51:29","21":"2018-03-07 10:58:58","22":"2018-03-07 13:23:32","23":"2018-03-07 15:47:40","24":"2018-03-11 03:45:17","25":"2018-03-14 00:41:47","26":"2018-03-14 02:22:12","27":"2018-03-14 14:35:21","28":"2018-03-15 17:13:08","29":"2018-03-19 05:03:24","30":"2018-03-20 06:07:48","31":"2018-03-21 00:26:48","32":"2018-03-21 16:05:40","33":"2018-03-22 04:58:51","34":"2018-03-25 11:01:36","35":"2018-03-28 00:13:24","36":"2018-03-28 00:27:57",

In [129]:
r

<Response [200]>