## Initialization

In [1]:

import yfinance as yf
import pandas as pd
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime, timedelta
import numpy as np
from plotly.subplots import make_subplots
import os
from jinja2 import Environment, FileSystemLoader
import json
import markdown2
from IPython.display import HTML
import plotly




In [2]:
## Choose the date you want to generate Analysis
## You should have csv including: 
## 1. The daily performance: data/Performance/Performance_{date}.csv,    
## 2. The daily trading data : data/TradingData/nasdaq_100_5min_data_{date}.csv, 
## 3. The previous day trading data : data/TradingData/nasdaq_100_5min_data_{date_previous_str}.csv
date = '2024-04-23'
label = 'MESM24'

In [3]:
## considering the previous date could be last week friday, but if there is holiday, you should adjust this part manually
# Convert the date string to a datetime object
date_obj = datetime.strptime(date, '%Y-%m-%d')

# Check if the given date is a Monday
if date_obj.weekday() == 0:  # Monday is represented by 0
    # If it's Monday, subtract 3 days to get the previous working day
    date_previous = date_obj - timedelta(days=3)
else:
    # If it's not Monday, subtract 1 day to get the previous working day
    date_previous = date_obj - timedelta(days=1)

# Convert the datetime object back to a string in the same format
date_previous_str = date_previous.strftime('%Y-%m-%d')
date_previous_str

'2024-04-22'

In [4]:
## load data
trade = pd.read_csv(f'../data/performance/Performance_{date}.csv')
df = pd.read_csv(f'../data/future/{label}/{label}_5min_data_{date}.csv')
df1 = pd.read_csv(f'../data/future/{label}/{label}_1min_data_{date}.csv')
try:
    df_previous = pd.read_csv(f'../data/future/{label}/{label}_5min_data_{date_previous_str}.csv')
except FileNotFoundError as e:
    df_previous = df.copy()
    print(f"Cannot find file '{e.filename}'")


In [5]:
## Preprocess trade to trade_rth

trade = trade.drop(columns=['_priceFormat', '_priceFormatType', '_tickSize', 'buyFillId','sellFillId'])
# trade = trade[trade['symbol'] == 'ABC']
# 1. Convert pnl to numeric format with proper handling of negative values
trade['pnl'] = trade['pnl'].str.replace('$', '')
trade['pnl'] = trade['pnl'].str.replace('(', '-').str.replace(')', '').astype(float)

# 2. Convert boughtTimestamp and soldTimestamp to datetime format and adjust to UTC
trade['boughtTimestamp'] = pd.to_datetime(trade['boughtTimestamp']) - pd.Timedelta(hours=7)
trade['soldTimestamp'] = pd.to_datetime(trade['soldTimestamp']) - pd.Timedelta(hours=7)

# 3. Convert duration to time format
trade['duration'] = pd.to_timedelta(trade['duration'])

#4. Filter out trades with boughtTimestamp or soldTimestamp between 9:30 and 16:10
trade_rth = trade[(trade['boughtTimestamp'].dt.time >= pd.Timestamp('09:30').time()) &
                       (trade['boughtTimestamp'].dt.time <= pd.Timestamp('16:10').time()) |
                       (trade['soldTimestamp'].dt.time >= pd.Timestamp('09:30').time()) &
                       (trade['soldTimestamp'].dt.time <= pd.Timestamp('16:10').time())].copy()  # Ensure a copy of the DataFrame is created

trade_rth['boughtTimestamp'] = trade_rth['boughtTimestamp'].dt.floor('min')
trade_rth['soldTimestamp'] = trade_rth['soldTimestamp'].dt.floor('min')

trade_rth['boughtTimestamp'] = trade_rth['boughtTimestamp'].dt.floor('5min')
trade_rth['soldTimestamp'] = trade_rth['soldTimestamp'].dt.floor('5min')


In [6]:
## Preprocess TradingData(df) to df_rth

# 0. Convert the 'Datetime' column to datetime dtype
df['Datetime'] = pd.to_datetime(df['Datetime'])
df.set_index('Datetime', inplace=True)

# 1. Convert the 'Datetime' column to datetime if it's not already in datetime format
df.index = pd.to_datetime(df.index)

# 2. Filter rows within the specified time range directly using the index
df_rth = df[(df.index.time >= pd.Timestamp('09:30').time()) & (df.index.time <= pd.Timestamp('16:11').time())]

In [7]:
## Preprocess TradingData(df) to df_rth

# 0. Convert the 'Datetime' column to datetime dtype
df1['Datetime'] = pd.to_datetime(df1['Datetime'])
df1.set_index('Datetime', inplace=True)

# 1. Convert the 'Datetime' column to datetime if it's not already in datetime format
df1.index = pd.to_datetime(df1.index)

# 2. Filter rows within the specified time range directly using the index
df1_rth = df1[(df1.index.time >= pd.Timestamp('09:30').time()) & (df1.index.time <= pd.Timestamp('16:11').time())]
df1_rth

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-04-23 09:30:00-04:00,5069.50,5070.75,5065.50,5066.25,5066.25,6531
2024-04-23 09:31:00-04:00,5066.25,5069.75,5065.25,5068.25,5068.25,4170
2024-04-23 09:32:00-04:00,5068.25,5070.25,5068.25,5069.00,5069.00,2646
2024-04-23 09:33:00-04:00,5069.25,5069.75,5067.00,5067.25,5067.25,2452
2024-04-23 09:34:00-04:00,5067.25,5067.50,5063.75,5065.25,5065.25,4140
...,...,...,...,...,...,...
2024-04-23 16:07:00-04:00,5108.00,5108.50,5107.00,5107.75,5107.75,450
2024-04-23 16:08:00-04:00,5107.50,5108.25,5106.50,5108.25,5108.25,653
2024-04-23 16:09:00-04:00,5108.50,5109.25,5107.50,5109.25,5109.25,339
2024-04-23 16:10:00-04:00,5109.00,5109.00,5107.75,5108.25,5108.25,524


In [8]:
# Resample the data to 5-minute intervals
ohlc_data = df1_rth.resample('5min').agg({
    'Open': 'first',      # First of 'Open' in each 5-minute window
    'High': 'max',        # Maximum of 'High' in the window
    'Low': 'min',         # Minimum of 'Low' in the window
    'Close': 'last',      # Last of 'Close' in the window
    'Adj Close': 'last',  # Last of 'Adj Close' in the window
    'Volume': 'sum'       # Sum of 'Volume' in the window
})

# Set display options to show all rows
pd.set_option('display.max_rows', None)

# Display the DataFrame
ohlc_data
# Print or return the resulting DataFrame
ohlc_data


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-04-23 09:30:00-04:00,5069.5,5070.75,5063.75,5065.25,5065.25,19939
2024-04-23 09:35:00-04:00,5065.25,5071.0,5064.5,5069.25,5069.25,12761
2024-04-23 09:40:00-04:00,5069.5,5075.25,5067.75,5071.75,5071.75,14110
2024-04-23 09:45:00-04:00,5071.5,5084.5,5071.5,5080.5,5080.5,29066
2024-04-23 09:50:00-04:00,5080.5,5084.0,5074.0,5083.75,5083.75,16906
2024-04-23 09:55:00-04:00,5083.75,5088.75,5080.75,5082.25,5082.25,17644
2024-04-23 10:00:00-04:00,5082.5,5084.5,5078.5,5082.25,5082.25,16948
2024-04-23 10:05:00-04:00,5082.0,5088.75,5081.25,5085.25,5085.25,15643
2024-04-23 10:10:00-04:00,5085.0,5089.75,5083.25,5084.5,5084.5,14448
2024-04-23 10:15:00-04:00,5084.75,5089.5,5083.75,5088.25,5088.25,11524


In [9]:
df_rth

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-04-23 09:30:00-04:00,5069.5,5070.75,5063.75,5065.25,5065.25,19939
2024-04-23 09:35:00-04:00,5065.25,5071.0,5064.5,5069.25,5069.25,12761
2024-04-23 09:40:00-04:00,5069.5,5075.25,5067.75,5071.75,5071.75,14110
2024-04-23 09:45:00-04:00,5071.5,5084.5,5071.5,5080.5,5080.5,29066
2024-04-23 09:50:00-04:00,5080.5,5084.0,5074.0,5083.75,5083.75,16906
2024-04-23 09:55:00-04:00,5083.75,5088.75,5080.75,5082.25,5082.25,17644
2024-04-23 10:00:00-04:00,5082.5,5084.5,5078.5,5082.25,5082.25,16948
2024-04-23 10:05:00-04:00,5082.0,5088.75,5081.25,5085.25,5085.25,15643
2024-04-23 10:10:00-04:00,5085.0,5089.75,5083.25,5084.5,5084.5,14448
2024-04-23 10:15:00-04:00,5084.75,5089.5,5083.75,5088.25,5088.25,11524


In [10]:
## Preprocess previous TradingData(df_previous) to df_previous_rth

# 0. Convert the 'Datetime' column to datetime dtype
df_previous['Datetime'] = pd.to_datetime(df_previous['Datetime'])
df_previous.set_index('Datetime', inplace=True)

# 1. Convert the 'Datetime' column to datetime if it's not already in datetime format
df_previous.index = pd.to_datetime(df_previous.index)

# 2. Filter rows within the specified time range directly using the index
df_previous_rth = df_previous[(df_previous.index.time >= pd.Timestamp('09:30').time()) & (df_previous.index.time <= pd.Timestamp('16:10').time())]


# 3. get previous open, close, highest and lowest
# Find the overall highest and lowest values for High and Low
pre_high = df_previous_rth['High'].max()
pre_low = df_previous_rth['Low'].min()

pre_open = df_previous_rth['Open'].iloc[0]
pre_close = df_previous_rth['Close'].iloc[-1]

pre_market = {
    'pre_high': pre_high,
    'pre_low': pre_low,
    'pre_open': pre_open,
    'pre_close': pre_close
}

# 4. Get trhe last 20 bars
df_previous_rth = df_previous_rth[-20:]

In [11]:
## Calculate rth EMA including last 20 of df_previous_rth

# Combine previous and current trading session data
combined_df = pd.concat([df_previous_rth, df_rth])

# Calculate the 20-period EMA on the combined close prices
combined_df['EMA_20'] = combined_df['Close'].ewm(span=20, adjust=False).mean().round(2)

# # Now, slice back out the EMA values that correspond only to df_rth dates
df_rth['EMA_20'] = combined_df['EMA_20'].iloc[-len(df_rth):].values



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_rth['EMA_20'] = combined_df['EMA_20'].iloc[-len(df_rth):].values


In [12]:
# Calculate winning and losing trades
winning_trades = trade_rth[trade_rth['pnl'] > 0]
losing_trades = trade_rth[trade_rth['pnl'] <= 0]

# Data for pie chart
win_loss_counts = {'Winning Trades': len(winning_trades), 'Losing Trades': len(losing_trades)}




In [13]:
# Generate statistics for tables
def trade_stats(trades):
    if len(trades) == 0:
        return {'AveragePnL': 0, 'MaxPnL': 0, 'MinPnL': 0, 'TotalPnL': 0, 'Count': 0}
    return {
        'AveragePnL': round(trades['pnl'].mean(), 2),
        'MaxPnL': round(trades['pnl'].max(), 2),
        'MinPnL': round(trades['pnl'].min(), 2),
        'TotalPnL': round(trades['pnl'].sum(), 2),
        'Count': len(trades)
    }

all_trades_stats = trade_stats(trade_rth)
winning_trades_stats = trade_stats(winning_trades)
losing_trades_stats = trade_stats(losing_trades)

In [14]:
## Until now, you should have the following data for plotting

# all_trades_stats 
# winning_trades_stats 
# losing_trades_stats
# df_rth
# trade_rth
# pre_high
# pre_low 
# pre_open 
# pre_close 
# win_loss_counts

## Preparing Plots

In [15]:
def create_candlestick_traces(df):
    candle_data = []
    annotations = []
    low_points = df['Low']
    offset = low_points.min() * 0.001  # Small offset to place text below each candlestick
    
    for index, (idx, row) in enumerate(df.iterrows(), start=1):
        x_value = idx.strftime('%Y-%m-%d %H:%M')
        single_candle = go.Candlestick(
            x=[x_value],
            open=[row['Open']],
            high=[row['High']],
            low=[row['Low']],
            close=[row['Close']],
            name=f"Candle {index}",  # Enumerating candlesticks
            visible=True  # Initially visible
        )
        candle_data.append(single_candle)
        
        # Corresponding annotation for each candlestick
        annotation = {
            'x': x_value, 
            'y': row['Low'] - offset,
            'xref': 'x', 
            'yref': 'y',
            'text': str(index) if index % 2 != 0 else '',# Text is index if odd, empty if even
            'showarrow': False, 
            'font': {'family': 'Arial, sans-serif', 'size': 12, 'color': 'black'},
            'align': 'center',
            'visible': True
        }
        annotations.append(annotation)

    return candle_data, annotations

def create_pie_trace(win_loss_counts):
    labels = list(win_loss_counts.keys())
    values = list(win_loss_counts.values())
    return go.Pie(labels=labels, values=values, showlegend=False ,name="Win vs Loss")

def create_trade_lines(trade_df):
    trade_lines = []
    for index, row in trade_df.iterrows():
        entry_time, exit_time = sorted([row['boughtTimestamp'], row['soldTimestamp']])
        entry_price, exit_price = (row['buyPrice'], row['sellPrice']) if entry_time == row['boughtTimestamp'] else (row['sellPrice'], row['buyPrice'])
        color = 'green' if row['pnl'] > 0 else 'red'
        trade_line = go.Scatter(
            x=[entry_time, exit_time],
            y=[entry_price, exit_price],
            mode='lines+markers',
            line=dict(color=color, width=2),
            marker=dict(color=color, size=8),
            name=f"Trade {index}",
            hoverinfo='text',
            hovertext=f"Entry: {entry_time}, Price: {entry_price} | Exit: {exit_time}, Price: {exit_price}, PnL: {row['pnl']}"
        )
        trade_lines.append(trade_line)
    return trade_lines

def create_ema_trace(df):
    return go.Scatter(
        x=df.index.strftime('%Y-%m-%d %H:%M'),
        y=df['EMA_20'],
        mode='lines',
        line=dict(color='blue', width=2),
        name='EMA 20'
    )


In [16]:
def create_horizontal_lines(df, values, colors, labels):
    return [
        go.Scatter(
            x=[df.index.min(), df.index.max()],
            y=[value] * 2,
            mode='lines',
            line=dict(color=color, width=2, dash='dash'),
            name=f'pre_{label.lower()}'
        ) for value, color, label in zip(values, colors, labels)
    ]



In [17]:
def assemble_plot(df, trade_df, pre_market, pre_colors):
    fig = go.Figure()
    fig.add_trace(create_ema_trace(df))
    fig.add_traces(create_horizontal_lines(df, list(pre_market.values()), pre_colors, list(pre_market.keys())))
    candle_traces, candle_annotations = create_candlestick_traces(df)
    for trace in candle_traces:
        fig.add_trace(trace)
    fig.add_traces(create_trade_lines(trade_df))
    
    for ann in candle_annotations:
        fig.add_annotation(ann)
    
    fig.update_layout(
        title=f'{label} Candlestick Chart - {df.index.date[0]}',
        xaxis_title='Datetime', 
        yaxis_title='Price',
        xaxis_rangeslider_visible=False, 
        xaxis=dict(tickangle=-45),
        yaxis=dict(tickformat='none'),
        dragmode='pan'
    )
    return fig

def create_pie_chart(win_loss_counts, all_trades_stats, winning_trades_stats, losing_trades_stats):
    # Define the layout with 1 column for the pie and 1 column for the tables, split into 3 rows
    fig = make_subplots(
        rows=3, cols=2,
        specs=[[{"type": "pie", "rowspan": 3}, {"type": "table"}],
               [None, {"type": "table"}],
               [None, {"type": "table"}]],
        column_widths=[0.4, 0.6],
        subplot_titles=(None, "All Trades", "Winning Trades", "Losing Trades")
    )

    # Add the pie chart in the first column, spanning all three rows
    pie_trace = create_pie_trace(win_loss_counts)
    fig.add_trace(pie_trace, row=1, col=1)

    # Helper function to create table traces
    def create_table_trace(data, header_color='lightgrey'):
        headers = list(data.keys())
        values = [[v] for v in data.values()]
        return go.Table(
            header=dict(values=headers, fill_color=header_color, align='left'),
            cells=dict(values=values, align='left')
        )

    # Add table for all trades statistics
    all_trades_table = create_table_trace(all_trades_stats)
    fig.add_trace(all_trades_table, row=1, col=2)

    # Add table for winning trades statistics
    winning_trades_table = create_table_trace(winning_trades_stats, header_color='lightgreen')
    fig.add_trace(winning_trades_table, row=2, col=2)

    # Add table for losing trades statistics
    losing_trades_table = create_table_trace(losing_trades_stats, header_color='salmon')
    fig.add_trace(losing_trades_table, row=3, col=2)

    # Update layout to fit the table sizes and remove empty subplot titles
    fig.update_layout(
        title='Trade Outcome Distribution and Statistics',
        showlegend=True
    )

    return fig




def create_summary(summary_file, date, summary):
    
    
    if summary: 
        section_found = False
        content = []
        
        with open(summary_file, 'r', encoding='utf-8') as file:
            capture = False
            content = []
            
            for line in file:
                # Check if we're at the start of any '##' header
                if line.startswith('##'):
                    if 'Summary' in line:
                        # Start capturing if it's the '## Summary' section
                        capture = True
                    elif capture:
                        # Stop capturing if another '##' section starts
                        break
                elif capture:
                    # Add the current line to content if we are in the capture mode
                    content.append(line)
                    
            # Convert the captured markdown content to HTML
        html_content = markdown2.markdown(''.join(content))
    else:   
        # Convert the provided date string to a datetime object for easier comparison
        target_date = datetime.strptime(date, '%Y-%m-%d').date()
        section_found = False
        content = []
        
        with open(summary_file, 'r', encoding='utf-8') as file:
            for line in file:
                # Check for section headers
                if line.startswith('## ') and not line.startswith('## Summary'):
                    # Extract the date from the section header
                    section_date = datetime.strptime(line.strip()[3:], '%Y-%m-%d').date()
                    if section_date == target_date:
                        section_found = True
                        continue
                    elif section_found:
                        # If another section starts, stop reading
                        break
                
                if section_found:
                    # Collect all lines after the target date section until another section starts
                    content.append(line)
        html_content = markdown2.markdown(''.join(content))

    return html_content




In [18]:
folder_path = "./data/HTML/"
fig_assemble = assemble_plot(df_rth, trade_rth, pre_market, ['orange', 'purple', 'green', 'red'])
fig_statistic = create_pie_chart(win_loss_counts, all_trades_stats, winning_trades_stats, losing_trades_stats)
html_summary= create_summary('./Summary.md', date, False)



FileNotFoundError: [Errno 2] No such file or directory: './Summary.md'

## overall statistical data

In [None]:
# Path to the directory containing CSV files
directory = './data/Performance'

# Initialize an empty list to store DataFrames
dfs = []

# Iterate through all files in the directory
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        # Construct the full file path
        filepath = os.path.join(directory, filename)
        
        # Read the CSV file into a DataFrame
        df = pd.read_csv(filepath)
        
        # Append the DataFrame to the list
        dfs.append(df)

# Concatenate all DataFrames vertically
df_overall = pd.concat(dfs, ignore_index=True)


In [None]:
## Preprocess trade to trade_rth

df_overall = df_overall.drop(columns=['symbol','_priceFormat', '_priceFormatType', '_tickSize', 'buyFillId','sellFillId'])
# 1. Convert pnl to numeric format with proper handling of negative values
df_overall['pnl'] = df_overall['pnl'].str.replace('$', '')
df_overall['pnl'] = df_overall['pnl'].str.replace('(', '-').str.replace(')', '').astype(float)

# 2. Convert boughtTimestamp and soldTimestamp to datetime format and adjust to UTC
df_overall['boughtTimestamp'] = pd.to_datetime(df_overall['boughtTimestamp']) - pd.Timedelta(hours=7)
df_overall['soldTimestamp'] = pd.to_datetime(df_overall['soldTimestamp']) - pd.Timedelta(hours=7)

# 3. Convert duration to time format
df_overall['duration'] = pd.to_timedelta(df_overall['duration'])


In [None]:
# Calculate winning and losing trades
overall_winning_trades = df_overall[df_overall['pnl'] > 0]
overall_losing_trades = df_overall[df_overall['pnl'] <= 0]

# Data for pie chart
overall_win_loss_counts = {'Winning Trades': len(overall_winning_trades), 'Losing Trades': len(overall_losing_trades)}

In [None]:
overall_trades_stats = trade_stats(df_overall)
overall_winning_trades_stats = trade_stats(overall_winning_trades)
overall_losing_trades_stats = trade_stats(overall_losing_trades)