In [31]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('data.csv')
df['date'] = pd.to_datetime(df.date, dayfirst=True)
df.sample(10)

Unnamed: 0,date,year,month,account,category,description,currency,in,out
238,2024-06-01,2024,6,revolut_EUR,transfer,from sella - transfer,EUR,500.0,0.0
92,2024-04-29,2024,4,sella,bills,gas bill,EUR,0.0,200.0
305,2024-07-05,2024,7,revolut_EUR,needs,iliad bill,EUR,0.0,20.0
321,2024-07-13,2024,7,sella,wants,gift,EUR,0.0,14.0
375,2024-08-09,2024,8,revolut_EUR,wants,tennis,EUR,0.0,16.5
247,2024-06-01,2024,6,revolut_EUR,wants,blender and chromecast,EUR,0.0,121.97
135,2021-12-31,2021,12,sella,wants,balance,EUR,0.0,1462.9
104,2024-05-22,2024,5,revolut_EUR,needs,groceries,EUR,0.0,7.35
340,2024-07-24,2024,7,sella,wants,coffee,EUR,0.0,4.2
359,2024-08-03,2024,8,sella,needs,dentist,EUR,0.0,100.0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 404 entries, 0 to 403
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   date         404 non-null    datetime64[ns]
 1   year         404 non-null    int64         
 2   month        404 non-null    int64         
 3   account      404 non-null    object        
 4   category     404 non-null    object        
 5   description  404 non-null    object        
 6   currency     404 non-null    object        
 7   in           404 non-null    float64       
 8   out          404 non-null    float64       
dtypes: datetime64[ns](1), float64(2), int64(2), object(4)
memory usage: 28.5+ KB


In [5]:
def get_total_wealth(df):
    wealth = []
    for account in df.account.unique():
        account_df = df[df.account == account]
        account_df = account_df.sort_values(['date'])
        account_df['in_out'] = account_df['in'] - df['out']
        account_df['balance'] = round(account_df.in_out.cumsum(),2)

        wealth.append((account_df.currency.iloc[-1], account_df.balance.iloc[-1]))
    
    total_eur = sum([item[1] for item in wealth if item[0]=='EUR'])
    total_gbp_to_eur = sum(item[1] for item in wealth if item[0]=='GBP') * 1.19 #CurrencyRates().get_rate('GBP', 'EUR')

    return(total_eur + total_gbp_to_eur)

get_total_wealth(df)

81128.2831

In [6]:
def get_account_balance(df):
    balance = []
    for account in df.account.unique():
        account_df = df[df.account == account]
        account_df = account_df.sort_values(['date'])
        account_df['in_out'] = account_df['in'] - account_df['out']
        account_df['balance'] = round(account_df.in_out.cumsum(),2)

        balance.append((account_df.currency.iloc[-1], account_df.balance.iloc[-1], account))
    # print(balance)
    return balance

get_account_balance(df)

[('EUR', 13565.94, 'sella'),
 ('EUR', 1936.87, 'generali'),
 ('EUR', 65125.7, 'generali_SAV'),
 ('GBP', 418.49, 'revolut_GBP'),
 ('EUR', 1.77, 'revolut_EUR')]

In [7]:
def get_account_monthly_balance(df, account_name='sella'):
    df = df[df.account == account_name]
    df = df.sort_values(['date'])
    df['in_out'] = df['in'] - df['out']
    df['balance'] = round(df.in_out.cumsum(), 2)

    return df.groupby(['year', 'month', 'account', 'currency']).agg({'in':'sum', 'out':'sum', 'in_out':'sum', 'balance':'last'}).reset_index()

get_account_monthly_balance(df)

Unnamed: 0,year,month,account,currency,in,out,in_out,balance
0,2019,11,sella,EUR,17606.36,0.0,17606.36,17606.36
1,2019,12,sella,EUR,921.0,12142.33,-11221.33,6385.03
2,2020,1,sella,EUR,1918.0,131.3,1786.7,8171.73
3,2020,2,sella,EUR,1595.0,0.0,1595.0,9766.73
4,2020,3,sella,EUR,1584.0,0.0,1584.0,11350.73
5,2020,4,sella,EUR,1583.0,0.98,1582.02,12932.75
6,2020,5,sella,EUR,1578.0,0.0,1578.0,14510.75
7,2020,6,sella,EUR,1569.0,1056.0,513.0,15023.75
8,2020,7,sella,EUR,1563.0,439.5,1123.5,16147.25
9,2020,8,sella,EUR,4306.48,8094.8,-3788.32,12358.93


In [8]:
MONTHS = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

def monthly_balance_plot(df, account_name='sella', year=2024):
    data = get_account_monthly_balance(df, account_name)

    if not df.empty and year:

        balance_df = data[data.year == int(year)]
        # print(balance_df)

        title = f'Monthly balance | {account_name.upper()} | {year}'

        fig = px.bar(
            data_frame=balance_df,
            x='month',
            y='balance',
            title=title
        )

        fig.update_xaxes(title=None, labelalias=dict(zip([1,2,3,4,5,6,7,8,9,10,11,12], MONTHS)))
        fig.update_yaxes(title='Balance')

        # fig.show()
        return fig
    
monthly_balance_plot(df,'sella').show()

In [12]:
def exchange_in_out(df):
    if df.currency == 'GBP':
        rate = 1.19
    else:
        rate = 1
    
    df['in'] = df['in'] * rate
    df['out'] = df['out'] * rate

    return df

def get_monthly_category(df, year=2024):
    # need to exchange everything to EUR and then do the calculations
    cat_df = df[(df.apply(exchange_in_out, axis=1).year==year) & (df.apply(exchange_in_out, axis=1).account != 'generali_SAV')].groupby(['year','month','category']).agg({'in':'sum', 'out':'sum'}).reset_index()
    cat_df

    tmp = cat_df.groupby(['year','month'])['in'].sum().reset_index()
    tmp = tmp.rename(columns={'in':'total_income'})

    merged = pd.merge(left=cat_df, right=tmp)

    # merged['pcg_in_out'] = (abs(merged['in'] - merged['out'])/merged.total_income) #* 100
    merged['pcg_in_out'] = (merged['in'] - merged['out'])/merged.total_income #* 100

    return merged

get_monthly_category(df)

Unnamed: 0,year,month,category,in,out,total_income,pcg_in_out
0,2024,1,salary,5155.47,0.0,5155.47,1.0
1,2024,1,wants,0.0,2572.58,5155.47,-0.499
2,2024,2,needs,0.0,4848.0,1620.47,-2.991725
3,2024,2,salary,1141.14,0.0,1620.47,0.704203
4,2024,2,transfer,479.33,0.0,1620.47,0.295797
5,2024,2,wants,0.0,1885.09,1620.47,-1.163298
6,2024,3,bills,0.0,150.0,7573.44,-0.019806
7,2024,3,needs,2424.0,3007.98,7573.44,-0.077109
8,2024,3,salary,4148.11,0.0,7573.44,0.547718
9,2024,3,subscription,0.0,59.4,7573.44,-0.007843


In [51]:
def pcg_category_plot(df, year=2024, month=8):
    data = get_monthly_category(df, year)
    data = data[data.month==month]

    title = f'Percentage of in/out by category | {MONTHS[month-1].capitalize()}, {year}'

    data['color'] = np.where(data.pcg_in_out < 0, 'red', 'green')
    fig = px.bar(
        data_frame=data[(data.pcg_in_out != 0)],
        # data_frame=data,
        x='pcg_in_out',
        y='category',
        barmode='stack',
        text_auto='.2%',
    )

    fig.update_layout(xaxis_tickformat='.0%', title=title, xaxis_title='', yaxis_title='')
    fig.update_traces(marker_color=data[(data.pcg_in_out != 0)].color)

    return fig

pcg_category_plot(df).show()
