# Comparing Pandas and Tablite's readability.

First Pandas:

In [1]:
import pandas as pd
import datetime as dt

df = pd.read_csv('csv/data.csv')

df['date'] = df['Month']+' '+df['Year'].astype(str)

dates_df = pd.DataFrame([d.strftime('%b %Y') for d in pd.date_range('Jan 2022','Jan 2023',freq='M')],columns=['date'])

new_df = pd.pivot_table(df, values='Amount', index=['Variable'],
                    columns=['date'], aggfunc=sum, fill_value=0).T\
                        .merge(dates_df,on='date',how='right').T\
                           .fillna(0).rename(index={'date':'Variable'}).T.set_index('Variable')\
                            .T.assign(YearTotal = lambda x: x.sum(axis=1).astype(int))\
                                .reindex(['Salary','Bonus', 'Taxes']).astype('int32')

new_df.loc['TotalBrutto'] = new_df.sum()      
new_df

Variable,Jan 2022,Feb 2022,Mar 2022,Apr 2022,May 2022,Jun 2022,Jul 2022,Aug 2022,Sep 2022,Oct 2022,Nov 2022,Dec 2022,YearTotal
Salary,1000,1000,0,0,0,0,0,0,0,1000,1000,0,4000
Bonus,100,0,0,0,0,0,0,0,0,300,10,0,410
Taxes,-300,-300,0,0,0,0,0,0,0,-300,-300,0,-1200
TotalBrutto,800,700,0,0,0,0,0,0,0,1000,710,0,3210


Next to no chance in correctly explaining what goes on above.

Next tablite:

In [2]:
from tablite import Table, GroupBy
from datetime import datetime,date
Table.reset_storage()

# 1. Import the data
df = Table.import_file('csv/data.csv', import_as='csv')  

# 2. create the joined month-year field by adding datetime str.
df['date'] = [datetime.strptime(f"{m} {y}", '%b %Y').date() for m,y in zip(df['Month'], df['Year'])]  

# 3. add zeros for blanks.
var_order = ['Salary','Bonus', 'Taxes']
for month in range(1,12+1):  
    for variable in var_order:
        dt = date(2022,month,1)
        df.add_rows("John Henry", dt.strftime('%b'), 2022, variable, 0, dt)  

# 4. sort by date (not in reverse order), as the tablite pivot keeps the order.
df = df.sort(date=False)  

# 5. create the pivot.
pivot = df.pivot(rows=['Variable'], columns=['date'], functions=[('Amount', GroupBy.sum)])  

# 6. remove extra column
del pivot['function']  

# 7. rename the columns from (date=2022-01-01) to 2022-01
for column_name in pivot.columns[1:]:  
    new_name = column_name[6:13]
    pivot[new_name] = pivot[column_name][:]
    del pivot[column_name]

# 8. reindex so that taxes are at the bottom.
sorted_pivot = pivot.reindex(index=[var_order.index(v) for v in list(pivot['Variable'])])  

# 9. create the YearTotal column
sorted_pivot['YearTotal'] = [sum(r[1:]) for r in pivot.rows] 

# 10. Add TotalBrutto.
sorted_pivot.add_rows(["TotalBrutto"] + [sum(sorted_pivot[c]) for c in sorted_pivot.columns[1:]])  

sorted_pivot  # show it.

splitting data.csv for multiprocessing: 100%|██████████| 13/13 [00:00<00:00, 4333.30lines/s]
100%|██████████| 1/1 [00:00<00:00,  1.87tasks/s]
creating sort index: 100%|██████████| 1/1 [00:00<00:00, 118.50it/s]


#,Variable,2022-01,2022-02,2022-03,2022-04,2022-05,2022-06,2022-07,2022-08,2022-09,2022-10,2022-11,2022-12,YearTotal
row,str,int,int,int,int,int,int,int,int,int,int,int,int,int
0,Salary,1000,1000,0,0,0,0,0,0,0,1000,1000,0,4000
1,Bonus,100,0,0,0,0,0,0,0,0,300,10,0,-1200
2,Taxes,-300,-300,0,0,0,0,0,0,0,-300,-300,0,410
3,TotalBrutto,800,700,0,0,0,0,0,0,0,1000,710,0,3210


Pretty much self explanatory.