In [1]:
import pandas as pd
import plotly_express as px
from sklearn.linear_model import LinearRegression
import numpy as np
from datetime import datetime, timedelta

In [2]:
# Load the HTML table into a DataFrame
trades = pd.read_html('./Statement.htm')
df = trades[0].copy()  

header_row_index = 2
df.columns = df.iloc[header_row_index]

# Remove the header row and rows above it
df = df[header_row_index + 1:]

# Drop unwanted columns without using inplace=True
df = df.drop(['Ticket', 'Taxes'], axis=1)

# Rename columns
df.columns = ['Open Date', 'Type', 'Size', 'Asset', 'Open Price', 'Stop Loss', 'Take Profit', 'Close Date', 'Close Price', 'Commissions', 'Swap', 'PnL']

# Filter rows based on 'Type'
valid_trade_types = ['buy', 'sell', 'buy limit', 'sell limit', 'buy stop', 'sell stop']
df = df[df['Type'].str.lower().isin(valid_trade_types)]

# Filter out rows where 'PnL' equals 'cancelled'
df = df[df['PnL'].str.lower() != 'cancelled']

# Convert specific columns to float
cols_to_float = ['Size', 'Open Price', 'Stop Loss', 'Take Profit', 'Close Price', 'Commissions', 'Swap', 'PnL']
for col in cols_to_float:
    df[col] = df[col].astype(str).str.replace(' ', '').astype(float)

df['Open Date'] = pd.to_datetime(df['Open Date'], format='%Y.%m.%d %H:%M:%S')
df['Close Date'] = pd.to_datetime(df['Close Date'], format='%Y.%m.%d %H:%M:%S')
df = df.sort_values(by='Close Date', ascending=True)
df['Rolling PnL'] = df['PnL'].cumsum()



In [3]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression

# Convert 'Close Date' to ordinal (numeric format)
df['DateNumeric'] = pd.to_datetime(df['Close Date']).map(datetime.toordinal)

# Prepare the dataset
X = df[['DateNumeric']]
y = df['Rolling PnL']

# Create a polynomial regression model
degree = 3  # You can adjust the degree
polyreg = make_pipeline(PolynomialFeatures(degree), LinearRegression())
polyreg.fit(X, y)

# Predict future 'Rolling PnL' values
num_days_to_predict = 30
last_date = pd.to_datetime(df['Close Date']).max()
future_dates = [last_date + timedelta(days=i) for i in range(1, num_days_to_predict + 1)]
future_dates_numeric = [datetime.toordinal(date) for date in future_dates]

# Make predictions
future_predictions = polyreg.predict(np.array(future_dates_numeric).reshape(-1, 1))

# Create a DataFrame for the predictions
predictions_df = pd.DataFrame({'Future Close Date': future_dates, 'Predicted Rolling PnL': future_predictions})

# Display the predictions
print(predictions_df)


     Future Close Date  Predicted Rolling PnL
0  2023-12-30 14:56:11           12153.880574
1  2023-12-31 14:56:11           12280.458758
2  2024-01-01 14:56:11           12407.037628
3  2024-01-02 14:56:11           12533.617182
4  2024-01-03 14:56:11           12660.197422
5  2024-01-04 14:56:11           12786.778347
6  2024-01-05 14:56:11           12913.359958
7  2024-01-06 14:56:11           13039.942253
8  2024-01-07 14:56:11           13166.525234
9  2024-01-08 14:56:11           13293.108900
10 2024-01-09 14:56:11           13419.693252
11 2024-01-10 14:56:11           13546.278288
12 2024-01-11 14:56:11           13672.864010
13 2024-01-12 14:56:11           13799.450417
14 2024-01-13 14:56:11           13926.037509
15 2024-01-14 14:56:11           14052.625287
16 2024-01-15 14:56:11           14179.213749
17 2024-01-16 14:56:11           14305.802897
18 2024-01-17 14:56:11           14432.392731
19 2024-01-18 14:56:11           14558.983249
20 2024-01-19 14:56:11           1



In [7]:
df.columns

Index(['Open Date', 'Type', 'Size', 'Asset', 'Open Price', 'Stop Loss',
       'Take Profit', 'Close Date', 'Close Price', 'Commissions', 'Swap',
       'PnL', 'Rolling PnL', 'DateNumeric'],
      dtype='object')

In [None]:
df