In [1]:
import pandas as pd
import plotly_express as px

In [2]:
# Load the HTML table into a DataFrame
trades = pd.read_html('./Statement.htm')
df = trades[0].copy()  

header_row_index = 2
df.columns = df.iloc[header_row_index]

# Remove the header row and rows above it
df = df[header_row_index + 1:]

# Drop unwanted columns without using inplace=True
df = df.drop(['Ticket', 'Taxes'], axis=1)

# Rename columns
df.columns = ['Open Date', 'Type', 'Size', 'Asset', 'Open Price', 'Stop Loss', 'Take Profit', 'Close Date', 'Close Price', 'Commissions', 'Swap', 'PnL']

# Filter rows based on 'Type'
valid_trade_types = ['buy', 'sell', 'buy limit', 'sell limit', 'buy stop', 'sell stop']
df = df[df['Type'].str.lower().isin(valid_trade_types)]

# Filter out rows where 'PnL' equals 'cancelled'
df = df[df['PnL'].str.lower() != 'cancelled']

# Convert specific columns to float
cols_to_float = ['Size', 'Open Price', 'Stop Loss', 'Take Profit', 'Close Price', 'Commissions', 'Swap', 'PnL']
for col in cols_to_float:
    df[col] = df[col].astype(str).str.replace(' ', '').astype(float)

df['Open Date'] = pd.to_datetime(df['Open Date'], format='%Y.%m.%d %H:%M:%S')
df['Close Date'] = pd.to_datetime(df['Close Date'], format='%Y.%m.%d %H:%M:%S')
df = df.sort_values(by='Close Date', ascending=True)
df['Rolling PnL'] = df['PnL'].cumsum()



In [21]:
df['Close Date'] = pd.to_datetime(df['Close Date']).dt.date
daily_pnl_df = df.groupby('Close Date')['PnL'].sum().reset_index()
daily_pnl_df['Rolling PnL'] = daily_pnl_df['PnL'].cumsum()


In [19]:
lowest_pnl = df['Rolling PnL'].min().round(2)
index_of_lowest_pnl = df['Rolling PnL'].idxmin()
date_lowest_pnl = df.loc[index_of_lowest_pnl, 'Close Date']
date_lowest_pnl.strftime("%d %B %Y")

In [46]:
negative_days_df = daily_pnl_df[daily_pnl_df['Rolling PnL'] < 0]
px.area(negative_days_df, x='Close Date', y='Rolling PnL', color='red')


In [None]:
# Find the last row of trades and remove everything below it
mask = df2[df2.columns[0:10]].isna().all(axis=1)
first_nan_index = mask.idxmax() if mask.any() else len(df2)
first_nan_index = mask.idxmax() if mask.any() else len(df2)
df_final = df2.iloc[:first_nan_index]

In [None]:
# Remove deposits and withdrawals from df to get only pnl. 
df_deposit_withdrawal = df_final[df_final['Type'].str.lower().isin(['balance'])]
df_final = df_final[~df_final['Type'].str.lower().isin(['balance'])]
# Removed cancelled orders/trades
df_final = df_final[df_final['Profit'].str.lower() != 'cancelled']



In [None]:
# Correct Dtypes for calculations later on. Also remove white spaces in Profit column.
df_final['Ticket'] = df_final['Ticket'].astype(int)
cols_to_float = ['Size', 'Price', 'S / L', 'T / P', 'Price', 'Commission', 'Taxes', 'Swap' ,'Profit']

# Remove white spaces in Profit to convert from str to float
for col in cols_to_float:
    df_final[col] = df_final[col].replace(' ', '', regex=True).astype(float)

conversion_dict = {col: float for col in cols_to_float}
df = df_final.astype(conversion_dict)

In [None]:
df

In [None]:
df_final.tail()