In [1]:
import numpy as np
import pandas as pd

In [230]:
path = 'E:\\dtuklaptop\\e\\Users\\Mat\\python\\14. property\\RSACapital\\'

def load_barclays(filename):
    input_file = path + filename
    df=pd.read_csv(input_file,names=['Number','Date','Account','Amount','Subcategory','Memo','Memo2'],skiprows=1, index_col=1, parse_dates=True, dayfirst=True)
    df=df.replace('\t','', regex=True)
    # Need to do this because some memo columns contain commas which confuses read_csv
    df['Memo2'] = df['Memo2'].fillna('')
    df['Memo'] = df['Memo'].astype(str) + df['Memo2'].astype(str)
    df.drop(['Memo2','Number'], axis=1, inplace=True)
    #df.set_index('Date')
    df = categorise(df)
    return df

def categorise_barclays(df):
    df["Cat"] = np.nan
    
    # Identify Mortgages
    df.loc[(df.Cat.isnull())&(df.Memo.str.match('JASPER|TOPAZ|SIBERITE|MORTGAGE EXPRESS|NRAM|PLATFORM|AMBER|BHAM|CAPITAL|CHL|MORTGAGE TRUST')==True),"Cat"] = 'Mortgage'
    df.loc[(df.Cat.isnull())&(df.Memo.str.match('.*M TUCKER.*STO.*')==True) & (df.Amount > -200) & (df.Amount <-190),"Cat"] = 'Mortgage' # Kingston Rd mortgage
    
    # Identify Rents
    df.loc[(df.Cat.isnull())&(df.Memo.str.match('BEALS.*BG.*')==True),"Cat"] = 'BealsRent'
    df.loc[(df.Cat.isnull())&(df.Subcategory.str.match('Funds Transfer|Counter Credit|Standing Order|Bill Payment')==True)&(df.Memo.str.match('.*DEPOSIT.*',case=False)==True),"Cat"] = 'Deposit'
    df.loc[(df.Cat.isnull())&(df.Subcategory.str.match('Funds Transfer|Counter Credit|Standing Order|Bill Payment')==True)&(df.Memo.str.match('.*RENT.*|.*KUMAR.*|.*LINDEMERE.*|.*SEQUENCE UK.*|.*SOPHIE.*|.*BETTS.*',case=False)==True),"Cat"] = 'OurRent'
    
    # Identify Property Expenses
    df.loc[(df.Cat.isnull())&(df.Subcategory.str.match('Bill Payment|Funds Transfer|Standing Order')==True)&(df.Memo.str.match('.*PORTSEA.*|.*BECK.*|.*COURT FEE.*|.*ROGERS.*|.*ICE PROFESSIONAL.*|.*SOUTHERN ELEC.*')==True),"Cat"] = 'PropertyExpense'

    # Identify Service Charges and Transfers between accounts
    df.loc[(df.Cat.isnull())&(df.Memo.str.match('23 HAMPSHIRE.*STO|4-6 ALHAMBRA RD CS|12-14 ALHAMBRA RD|16-18 ALHAMBRA RD|ALHAMBRA ROAD MANA')==True),"Cat"] = 'ServiceCharge'
    df.loc[(df.Cat.isnull())&(df.Memo.str.match('.*30728691.*')==True),'Cat'] = 'Funds3072'
    df.loc[(df.Cat.isnull())&(df.Memo.str.match('.*40406538 .*')==True),'Cat'] = 'Funds4040'
    df.loc[(df.Cat.isnull())&(df.Memo.str.match('.*60458872.*')==True),'Cat'] = 'Funds6045'
    df.loc[(df.Cat.isnull())&(df.Subcategory=='Bill Payment')&(df.Amount<0)&(df.Memo.str.match('RSA CAPITAL')==True),"Cat"] = 'TransferToRSACapital'
    df.loc[(df.Cat.isnull())&(df.Subcategory.str.match('Funds Transfer')==True)&(df.Memo.str.match('.*FRATTON SC.*|.*FRATTON ROAD.*|.*FRATTON RD.*|.*CREST.*')==True),"Cat"] = 'FrattonRoad'
   
    # Identify Regular Payments
    df.loc[(df.Cat.isnull())&(df.Memo.str.match('.*HARPUR TRUST.*|.*BEDFORD SCHOOL.*')==True),"Cat"] = 'SchoolFee'
    df.loc[(df.Cat.isnull())&(df.Memo.str.match('.*F VALENTINO.*')==True),"Cat"] = 'Hilltop'
    df.loc[(df.Cat.isnull())&(df.Subcategory.str.match('Bill Payment|Direct Debit')==True)&(df.Memo.str.match('.*HMRC*.')==True),"Cat"] = 'HMRC'
    df.loc[(df.Cat.isnull())&(df.Subcategory=='Direct Debit'),'Cat'] = 'RegularPayment'
    df.loc[(df.Cat.isnull())&(df.Memo.str.match('.*LAND ROVER.*')==True),"Cat"] = 'Car'
    df.loc[(df.Cat.isnull())&(df.Memo.str.match('NATIONWIDE|KINGSTON UNITY')==True),'Cat'] = 'RegularPayment'
    df.loc[(df.Cat.isnull())&(df.Memo.str.match('Spotify')==True),'Cat'] = 'RegularPayment'
    df.loc[(df.Cat.isnull())&(df.Subcategory=='Standing Order')&(df.Memo.str.match('.*M TUCKER.*STO.*')==True)&(df.Amount>-100),'Cat'] = 'RegularPayment' # £50 to 1585, £5 to Natwest
    df.loc[(df.Cat.isnull())&(df.Subcategory=='Card Purchase')&(df.Memo.str.match('.*Amazon Prime*',case=False)==True)&(df.Amount==-7.99),'Cat'] = 'RegularPayment'
    
    # Mark rest of card puchases as Personal Expense
    df.loc[(df.Cat.isnull())&(df.Subcategory.str.match('.*Card Purchase.*|.*Card Refund.*')==True),"Cat"] = 'PersonalExpense'

    return df

def load_starling(filename):
    input_file = path + filename
    df=pd.read_csv(input_file, index_col=0, parse_dates=True, dayfirst=True)
    df['Account'] = '60-83-71 00558156'
    df['Notes'] = df['Notes'].fillna('')
    df['Memo'] = df['Counter Party'] + ' ' + df['Reference']  + ' ' + df['Notes']
    df.rename(columns = {'Amount (GBP)':'Amount','Spending Category':'Subcategory'}, inplace = True)
    df.drop(['Counter Party','Reference','Notes','Balance (GBP)'], axis=1, inplace=True)
    df = df[['Account','Amount','Subcategory','Memo']]
    return df
    
def categorise_starling(df):
    df["Cat"] = np.nan

    # Identify Rents
    df.loc[(df.Cat.isnull())&(df.Memo.str.match('BEALS')==True),"Cat"] = 'BealsRent'
    df.loc[(df.Cat.isnull())&(df.Subcategory.str.match('REVENUE')==True)&(df.Memo.str.match('.*DEPOSIT.*',case=False)==True),"Cat"] = 'Deposit'
    df.loc[(df.Cat.isnull())&(df.Subcategory.str.match('REVENUE')==True),"Cat"] = 'OurRent'

    # Identify Expenses
    df.loc[(df.Cat.isnull())&(df.Subcategory.str.match('WORKPLACE|.*REPAIRS.*|.*MAINTENANCE.*|.*PROFESSIONAL.*')==True),"Cat"] = 'PropertyExpense'
    df.loc[(df.Cat.isnull())&(df.Memo.str.match('.*NRLA.*')==True),"Cat"] = 'PropertyExpense'

    # Identify Withdrawls
    df.loc[(df.Cat.isnull())&(df.Subcategory.str.match('INTEREST_PAYMENTS')==True),"Cat"] = 'Mortgages'
    df.loc[(df.Cat.isnull())&(df.Subcategory.str.match('DIRECTORS_WAGES|OTHER|OTHER_INCOME')==True),"Cat"] = 'PersonalExpenses'
    return df

In [231]:
dfMt=categorise_barclays(load_barclays('BC_6045_AUG22.csv'))
dfIv=categorise_barclays(load_barclays('BC_6045_AUG22.csv'))
dfRsa=categorise_starling(load_starling('StarlingStatement_2022-09.csv'))

In [216]:
df3=load_barclays('BC_3072_AUG22.csv')
df3

Unnamed: 0_level_0,Account,Amount,Subcategory,Memo,Cat
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-08-31,20-53-97 30728691,820.0,Counter Credit,M Williams RENT BGC,OurRent
2022-08-30,20-53-97 30728691,-1200.0,Standing Order,MR MATTHEW STUART 207409 40406538 STO,Funds4040
2022-08-30,20-53-97 30728691,-33.0,Direct Debit,BOOTS REWARDSCHEME A45028400000736 DDR,RegularPayment
2022-08-26,20-53-97 30728691,-24.04,Direct Debit,H3G 980404269602240822 DD,RegularPayment
2022-08-25,20-53-97 30728691,-600.0,Bill Payment,RSA CAPITAL LIMITE M BETTSRENT 111214 BB,OurRent
2022-08-25,20-53-97 30728691,600.0,Standing Order,MR MICHAEL BETTS FT11 12-14ALHAMBRA ST,OurRent
2022-08-25,20-53-97 30728691,-400.0,Funds Transfer,207409 40406538 TRF FT,Funds4040
2022-08-25,20-53-97 30728691,-78.0,Bill Payment,A BECK PLUMBING 13751 101214LEAK8 BBP,PropertyExpense
2022-08-23,20-53-97 30728691,-10.0,Direct Debit,ID MOBILE LIMITED 11888885/001 DDR,RegularPayment
2022-08-22,20-53-97 30728691,-10.0,Direct Debit,ID MOBILE LIMITED 11245718/001 DDR,RegularPayment


In [167]:
df3[df3.Cat.isnull()]

Unnamed: 0,Number,Date,Account,Amount,Subcategory,Memo,Cat


In [218]:
input_file = path + 'StarlingStatement_2022-09.csv'
df=pd.read_csv(input_file, index_col=0, parse_dates=True, dayfirst=True)
df['Account'] = '60-83-71 00558156'
df['Notes'] = df['Notes'].fillna('')
df['Memo'] = df['Counter Party'] + ' ' + df['Reference']  + ' ' + df['Notes']
df.rename(columns = {'Amount (GBP)':'Amount','Spending Category':'Subcategory'}, inplace = True)
df.drop(['Counter Party','Reference','Notes','Balance (GBP)'], axis=1, inplace=True)
df = df[['Account','Amount','Subcategory','Memo']]


df["Cat"] = np.nan

# Identify Rents
df.loc[(df.Cat.isnull())&(df.Memo.str.match('BEALS')==True),"Cat"] = 'BealsRent'
df.loc[(df.Cat.isnull())&(df.Subcategory.str.match('REVENUE')==True)&(df.Memo.str.match('.*DEPOSIT.*',case=False)==True),"Cat"] = 'Deposit'
df.loc[(df.Cat.isnull())&(df.Subcategory.str.match('REVENUE')==True),"Cat"] = 'OurRent'

# Identify Expenses
df.loc[(df.Cat.isnull())&(df.Subcategory.str.match('WORKPLACE|.*REPAIRS.*|.*MAINTENANCE.*|.*PROFESSIONAL.*')==True),"Cat"] = 'PropertyExpense'
df.loc[(df.Cat.isnull())&(df.Memo.str.match('.*NRLA.*')==True),"Cat"] = 'PropertyExpense'

# Identify Withdrawls
df.loc[(df.Cat.isnull())&(df.Subcategory.str.match('INTEREST_PAYMENTS')==True),"Cat"] = 'Mortgages'
df.loc[(df.Cat.isnull())&(df.Subcategory.str.match('DIRECTORS_WAGES|OTHER|OTHER_INCOME')==True),"Cat"] = 'PersonalExpenses'

df

Unnamed: 0_level_0,Account,Amount,Subcategory,Memo,Cat
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-09-01,60-83-71 00558156,-125.0,OTHER,National Residential Landlords NRLA03100427,PropertyExpense
2022-09-01,60-83-71 00558156,675.0,REVENUE,FURCZYK Z K SEPTEMBER 8 8ALHAM,OurRent
2022-09-01,60-83-71 00558156,600.0,REVENUE,NATSO P AUGUST RENT,OurRent
2022-09-05,60-83-71 00558156,-4133.62,INTEREST_PAYMENTS,Matthew Tucker From Rsa Capital..,Mortgages
2022-09-05,60-83-71 00558156,-2691.42,INTEREST_PAYMENTS,Ivana Valentino Mortgages,Mortgages
2022-09-05,60-83-71 00558156,-5689.02,DIRECTORS_WAGES,Ivana Valentino School Fees,PersonalExpenses
2022-09-05,60-83-71 00558156,820.0,REVENUE,Ivana Valentino MWILLIAMS 196BRENT,OurRent
2022-09-05,60-83-71 00558156,23.83,REVENUE,Matthew Tucker 3321 IBIRENT,OurRent
2022-09-05,60-83-71 00558156,-355.0,PROFESSIONAL_SERVICES,Tucker and Valentino Natwest Court Fee F5 1214,PropertyExpense
2022-09-05,60-83-71 00558156,850.0,REVENUE,DANIELLE FULKER 171 flat 3,OurRent


In [223]:
def sumOf(df,cat):
    g=pd.Grouper(freq="M")
    return df.loc[df.Cat == cat,'Amount'].groupby(g).sum()

def get_pty_summary(dfMt,dfIv,dfRsa):
    dfPty = pd.DataFrame()
    dfPty['Mortgage']=pd.concat([sumOf(dfMt,'Mortgage'),sumOf(dfIv,'Mortgage'),sumOf(dfRsa,'Mortgage')],axis=1).sum(axis=1)
    dfPty['PropertyExpense']=pd.concat([sumOf(dfMt,'PropertyExpense'),sumOf(dfIv,'PropertyExpense'),sumOf(dfRsa,'PropertyExpense')],axis=1).sum(axis=1)
    dfPty['ServiceCharge']=pd.concat([sumOf(dfMt,'ServiceCharge'),sumOf(dfIv,'ServiceCharge'),sumOf(dfRsa,'ServiceCharge')],axis=1).sum(axis=1)
    dfPty['OurRent']=pd.concat([sumOf(dfMt,'OurRent'),sumOf(dfIv,'OurRent'),sumOf(dfRsa,'OurRent')],axis=1).sum(axis=1)
    dfPty['BealsRent']=pd.concat([sumOf(dfMt,'BealsRent'),sumOf(dfIv,'BealsRent'),sumOf(dfRsa,'BealsRent')],axis=1).sum(axis=1)
    
    dfPty = dfPty.fillna(0)
    
    dfPty['TotalRent']=dfPty['OurRent'] + dfPty['BealsRent']
    dfPty['NetProfit']=dfPty['OurRent'] + dfPty['BealsRent'] + dfPty['Mortgage'] + dfPty['PropertyExpense'] + dfPty['ServiceCharge']
    return dfPty

In [232]:
df=get_pty_summary(dfMt,dfIv,dfRsa)
df

Unnamed: 0_level_0,Mortgage,PropertyExpense,ServiceCharge,OurRent,BealsRent,TotalRent,NetProfit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-08-31,-12790.68,-4786.0,-1200.0,4425.06,1304.36,5729.42,-13047.26


In [204]:
def plot_rent_statement(dfSum, start, end):
    dfBar=pd.DataFrame()
    #dfBar['RentalIncome'] = dfSum.loc[start:end]['OurRent']
    #dfBar['Mortgage'] = dfSum.loc[start:end]['Mortgage Payment']
    #dfBar['ManagementFees'] = dfSum.loc[start:end]['Mgmt Fees']+dfSum.loc[start:end]['VAT']
    #dfBar['BealsBills'] = dfSum.loc[start:end]['Bills Paid']
    #dfBar['OurBills'] = dfSum.loc[start:end]['Other Bills']
    #dfBar.index = dfBar.index.strftime('%b %y')
    
    #ax = dfBar[['Mortgage','ManagementFees','BealsBills','OurBills']].plot.bar(stacked=True, position=1, width=.3, color=['red','yellow','blue','orange'])
    #ax.axhline(5000, color="gray",linestyle='--')
    #ax.axhline(10000, color="gray",linestyle='--')
    dfBar[['OurRent']].plot.bar(stacked=True,ax=ax, position=0, width=.3, color=['green'],figsize=(30,15),fontsize=20).legend(loc=2, prop={'size': 20})
    
plot_rent_statement(df6,'2012-01-01','2022-08-31')

KeyError: "None of [Index(['OurRent'], dtype='object')] are in the [columns]"

In [215]:
df6[df6.Cat.isnull()&(df6.Subcategory=='Standing Order')]
df6[df6.Cat.isnull()]

Unnamed: 0_level_0,Account,Amount,Subcategory,Memo,Cat
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
