In [None]:
import pandas as pd
import numpy as np
import glob
import re


In [None]:
class Analysis():
    def __init__(self, id, files):
        self.id = id

        # Read csv files
        self.df = pd.concat((pd.read_csv(file) for file in files))

        # Clean up headers
        self.df = self.df[pd.isna(self.df['Instrument']) == False]
        self.df.reset_index(drop=True, inplace=True)

        # Calculate some columns
        self.df['Credit/Debit'] = self.df['Quantity'] * self.df['Price'] * -1
        date_regex = r"(\d{2}) (\w{3}) (\d{4})"
        matches = self.df['Trigger Date'].apply(
            lambda row: re.search(date_regex, row))
        self.df['Date'] = matches.apply(lambda match: match.group(1))
        self.df['Month'] = matches.apply(lambda match: match.group(2))
        self.df['Year'] = matches.apply(lambda match: match.group(3))
        self.df['Datetime'] = self.df[['Year', 'Month', 'Date']].astype(
            str).apply('-'.join, 1)
        self.df['Datetime'] = pd.to_datetime(self.df['Datetime'])
        self.df = self.df.sort_values(by=['Datetime'])
        self.df = self.df.drop_duplicates(
            subset=['Trigger Date', 'Instrument', 'Buy/Sell'])

    def save(self):
        # Save output
        output_file = f'output_{self.id}.xlsx'
        self.df.to_excel(output_file, sheet_name='Raw', index=False)

    def aggregate(self):
        return self.df.pivot_table(index=['Month'], columns=['Year'], values=[
            'Credit/Debit'], aggfunc='sum', margins=True, sort=False).iloc[:, :-1]


In [None]:
# Read all csv files
files = pd.DataFrame(glob.glob(f'data/*.csv'))
files['id'] = files[0].str.split("_", n=1, expand=True)[1]
files['id'] = files['id'].str.replace('.csv', '', regex=False)
ids = files['id'].unique()


In [None]:
for id in ids:
    analysis = Analysis(id, files[files['id'] == id][0])
    analysis.save()
    print(f'Analysis of {id}')
    # Analysis
    print(analysis.aggregate())
    print()
