# Functions

## Chart functions

In [2]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import webbrowser
import os

def create_plotly_chart(data, chart_id="chart1",title="Environment - Stock prices"):
    """
    Create a Plotly line chart for stock prices and save it to an HTML file.

    Parameters:
    - data: DataFrame containing 'Day' and 'Price'.
    - chart_id: Unique ID for the chart to allow adding more charts later.

    Returns:
    - Path to the saved HTML file.
    """
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=data['Day'], y=data['Price'], mode='lines', name='Stock Price'))
    fig.update_layout(title=title, xaxis_title='Day', yaxis_title='Price')

    # Create HTML with placeholder for additional charts
    html_content = f"""
    <!DOCTYPE html>
    <html>
    <head>
        <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
    </head>
    <body>
        <div id="{chart_id}" style="width: 100%; height: 100%;"></div>
        <script>
            var data = {fig.to_json()};
            Plotly.newPlot("{chart_id}", data.data, data.layout);
        </script>
    </body>
    </html>
    """
    
    # Save to disk
    output_file = "stock_chart.html"
    with open(output_file, "w") as f:
        f.write(html_content)

    return output_file

def open_html_chart(file_path):
    """
    Open an HTML file in the default web browser.

    Parameters:
    - file_path: Path to the HTML file.
    """
    abs_path = os.path.abspath(file_path)
    webbrowser.open(f"file://{abs_path}")



### State charts

In [3]:
def append_state_chart_to_html(data, html_file, state_chart_id="chart2"):
    """
    Append a combined stock price and state chart to an existing HTML file with a secondary y-axis.

    Parameters:
    - data: DataFrame containing 'Day', 'Price', and 'State'.
    - html_file: Path to the existing HTML file.
    - state_chart_id: Unique ID for the chart.
    """
    # Create Combined Chart with Secondary Y-Axis
    combined_fig = go.Figure()

    # Add stock price line (primary y-axis)
    combined_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=data['Price'],
            mode='lines',
            name='Stock Price',
            yaxis='y1',
        )
    )

    # Add state bar chart (secondary y-axis)
    combined_fig.add_trace(
        go.Bar(
            x=data['Day'],
            y=data['State'],
            name='State',
            yaxis='y2',
            opacity=0.6,  # Transparency for better overlay visibility
        )
    )

    # Update layout for dual y-axes
    combined_fig.update_layout(
        title='Stock Price and State Visualization',
        xaxis=dict(title='Day'),
        yaxis=dict(title='Stock Price', side='left'),
        yaxis2=dict(
            title='State (1: Above 100, 0: Below 100)',
            overlaying='y',
            side='right',
            range=[-0.2, 1.2],  # Adjust to fit the binary state range
        ),
        barmode='group',  # Bars won't overlap lines
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
    )

    # Generate chart script
    state_chart_script = f"""
    <div id="{state_chart_id}" style="width: 100%; height: 400px;"></div>
    <script>
        var stateData = {combined_fig.to_json()};
        Plotly.newPlot("{state_chart_id}", stateData.data, stateData.layout);
    </script>
    """

    # Append to existing HTML
    with open(html_file, "r") as f:
        content = f.read()

    # Insert before closing </body>
    updated_content = content.replace("</body>", state_chart_script + "\n</body>")

    with open(html_file, "w") as f:
        f.write(updated_content)

### Action charts

In [4]:
def append_action_chart_with_markers_to_html(data, html_file, action_chart_id="chart3"):
    """
    Append a combined stock price and cumulative action chart with buy/sell markers to an existing HTML file.

    Parameters:
    - data: DataFrame containing 'Day', 'Price', and 'Cumulative Units'.
    - html_file: Path to the existing HTML file.
    - action_chart_id: Unique ID for the chart.
    """
    # Create Combined Chart with Secondary Y-Axis
    action_fig = go.Figure()

    # Add stock price line (primary y-axis)
    action_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=data['Price'],
            mode='lines',
            name='Stock Price',
            yaxis='y1',
        )
    )


    # Add buy markers
    buy_points = data[data['Price'] > 100]
    action_fig.add_trace(
        go.Scatter(
            x=buy_points['Day'],
            y=buy_points['Price'],
            mode='markers',
            marker=dict(color='green', size=10, symbol='triangle-up'),
            name='Buy',
            yaxis='y1',
        )
    )

    # Add sell markers
    sell_points = data[data['Price'] <= 100]
    action_fig.add_trace(
        go.Scatter(
            x=sell_points['Day'],
            y=sell_points['Price'],
            mode='markers',
            marker=dict(color='red', size=10, symbol='triangle-down'),
            name='Sell',
            yaxis='y1',
        )
    )

    # Update layout for dual y-axes
    action_fig.update_layout(
        title='Stock Price and Actions with Buy/Sell Markers',
        xaxis=dict(title='Day'),
        yaxis=dict(title='Stock Price', side='left'),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
    )

    # Generate chart script
    action_chart_script = f"""
    <div id="{action_chart_id}" style="width: 100%; height: 400px;"></div>
    <script>
        var actionData = {action_fig.to_json()};
        Plotly.newPlot("{action_chart_id}", actionData.data, actionData.layout);
    </script>
    """

    # Append to existing HTML
    with open(html_file, "r") as f:
        content = f.read()

    # Insert before closing </body>
    updated_content = content.replace("</body>", action_chart_script + "\n</body>")

    with open(html_file, "w") as f:
        f.write(updated_content)


### Reward charts

In [5]:


def append_reward_chart_to_html(data, html_file, reward_chart_id="chart4"):
    """
    Append a reward visualization chart to an existing HTML file.

    Parameters:
    - data: DataFrame containing 'Day', 'Reward', and 'Cumulative Reward'.
    - html_file: Path to the existing HTML file.
    - reward_chart_id: Unique ID for the chart.
    """
    # Create Combined Chart
    reward_fig = go.Figure()

    # Add cumulative reward line (primary y-axis)
    reward_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=data['Cumulative Reward'],
            mode='lines',
            name='Cumulative Reward',
            yaxis='y1',
        )
    )

    # Add daily rewards as bar chart
    reward_fig.add_trace(
        go.Bar(
            x=data['Day'],
            y=data['Reward'],
            name='Daily Reward',
            yaxis='y1',
            marker=dict(color='blue'),
        )
    )

    # Update layout for clarity
    reward_fig.update_layout(
        title='Reward Visualization: Daily and Cumulative',
        xaxis=dict(title='Day'),
        yaxis=dict(title='Reward Value'),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
    )

    # Generate chart script
    reward_chart_script = f"""
    <div id="{reward_chart_id}" style="width: 100%; height: 400px;"></div>
    <script>
        var rewardData = {reward_fig.to_json()};
        Plotly.newPlot("{reward_chart_id}", rewardData.data, rewardData.layout);
    </script>
    """

    # Append to existing HTML
    with open(html_file, "r") as f:
        content = f.read()

    # Insert before closing </body>
    updated_content = content.replace("</body>", reward_chart_script + "\n</body>")

    with open(html_file, "w") as f:
        f.write(updated_content)

### Policy charts

In [6]:
def append_policy_chart_with_price_to_html(data, html_file, policy_chart_id="chart5"):
    """
    Append a policy visualization chart with stock price overlay to an existing HTML file.

    Parameters:
    - data: DataFrame containing 'Day', 'Price', and the policy (Buy/Sell).
    - html_file: Path to the existing HTML file.
    - policy_chart_id: Unique ID for the chart.
    """
    # Define policy: 1 for Buy, -1 for Sell
    data['Policy'] = data['Price'].apply(lambda x: 1 if x > 100 else -1)

    # Create Policy Chart
    policy_fig = go.Figure()

    # Add stock price line
    policy_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=data['Price'],
            mode='lines',
            name='Stock Price',
            line=dict(color='blue'),
            yaxis='y1',  # Use primary y-axis for stock price
        )
    )

    # Add policy decisions as a step chart
    policy_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=data['Policy'],
            mode='lines+markers',
            name='Policy',
            line=dict(shape='hv', color='green'),
            marker=dict(size=8, symbol='circle'),
            yaxis='y2',  # Use secondary y-axis for policy decisions
        )
    )

    # Add reference line at policy=0
    policy_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=[0] * len(data),
            mode='lines',
            line=dict(dash='dash', color='gray'),
            name='Reference Line (Policy=0)',
            yaxis='y2',  # Reference line aligns with the policy axis
        )
    )

    # Update layout for dual y-axes
    policy_fig.update_layout(
        title='Policy Visualization with Stock Price',
        xaxis=dict(title='Day'),
        yaxis=dict(title='Stock Price', side='left'),
        yaxis2=dict(
            title='Policy Decision',
            tickvals=[-1, 0, 1],
            ticktext=['Sell', 'Neutral', 'Buy'],
            overlaying='y',
            side='right',
        ),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
    )

    # Generate chart script
    policy_chart_script = f"""
    <div id="{policy_chart_id}" style="width: 100%; height: 400px;"></div>
    <script>
        var policyData = {policy_fig.to_json()};
        Plotly.newPlot("{policy_chart_id}", policyData.data, policyData.layout);
    </script>
    """

    # Append to existing HTML
    with open(html_file, "r") as f:
        content = f.read()

    # Insert before closing </body>
    updated_content = content.replace("</body>", policy_chart_script + "\n</body>")

    with open(html_file, "w") as f:
        f.write(updated_content)


### Q Value charts

In [7]:

def append_q_value_chart_to_html(data, html_file, q_value_chart_id="chart6"):
    """
    Append a Q-value visualization chart to an existing HTML file.

    Parameters:
    - data: DataFrame containing 'Day', 'Q_Buy', and 'Q_Sell'.
    - html_file: Path to the existing HTML file.
    - q_value_chart_id: Unique ID for the chart.
    """
    # Create Q-Value Chart
    q_value_fig = go.Figure()

    # Add Q(Buy) line
    q_value_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=data['Q_Buy'],
            mode='lines',
            name='Q(Buy)',
            line=dict(color='green', dash='solid'),
        )
    )

    # Add Q(Sell) line
    q_value_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=data['Q_Sell'],
            mode='lines',
            name='Q(Sell)',
            line=dict(color='red', dash='solid'),
        )
    )

    # Update layout
    q_value_fig.update_layout(
        title='Q-Value Visualization for Buy and Sell Actions',
        xaxis=dict(title='Day'),
        yaxis=dict(title='Q-Value'),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
    )

    # Generate chart script
    q_value_chart_script = f"""
    <div id="{q_value_chart_id}" style="width: 100%; height: 400px;"></div>
    <script>
        var qValueData = {q_value_fig.to_json()};
        Plotly.newPlot("{q_value_chart_id}", qValueData.data, qValueData.layout);
    </script>
    """

    # Append to existing HTML
    with open(html_file, "r") as f:
        content = f.read()

    # Insert before closing </body>
    updated_content = content.replace("</body>", q_value_chart_script + "\n</body>")

    with open(html_file, "w") as f:
        f.write(updated_content)

### Random Policy charts

In [8]:


def append_random_policy_marker_chart_to_html(data, html_file, random_policy_marker_chart_id="chart8"):
    """
    Append a chart that shows Buy/Sell markers for the random policy to an existing HTML file.

    Parameters:
    - data: DataFrame containing 'Day', 'Price', and 'Random_Policy'.
    - html_file: Path to the existing HTML file.
    - random_policy_marker_chart_id: Unique ID for the chart.
    """
    # Create Marker Chart
    random_policy_fig = go.Figure()

    # Add stock price line
    random_policy_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=data['Price'],
            mode='lines',
            name='Stock Price',
            line=dict(color='blue'),
        )
    )

    # Add Buy markers (Random Policy = 1)
    buy_points = data[data['Random_Policy'] == 1]
    random_policy_fig.add_trace(
        go.Scatter(
            x=buy_points['Day'],
            y=buy_points['Price'],
            mode='markers',
            marker=dict(color='green', size=10, symbol='triangle-up'),
            name='Buy',
        )
    )

    # Add Sell markers (Random Policy = -1)
    sell_points = data[data['Random_Policy'] == -1]
    random_policy_fig.add_trace(
        go.Scatter(
            x=sell_points['Day'],
            y=sell_points['Price'],
            mode='markers',
            marker=dict(color='red', size=10, symbol='triangle-down'),
            name='Sell',
        )
    )

    # Update layout
    random_policy_fig.update_layout(
        title='Buy and Sell Markers for Random Policy',
        xaxis=dict(title='Day'),
        yaxis=dict(title='Price'),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
    )

    # Generate chart script
    random_policy_marker_chart_script = f"""
    <div id="{random_policy_marker_chart_id}" style="width: 100%; height: 400px;"></div>
    <script>
        var randomPolicyMarkerData = {random_policy_fig.to_json()};
        Plotly.newPlot("{random_policy_marker_chart_id}", randomPolicyMarkerData.data, randomPolicyMarkerData.layout);
    </script>
    """

    # Append to existing HTML
    with open(html_file, "r") as f:
        content = f.read()

    # Insert before closing </body>
    updated_content = content.replace("</body>", random_policy_marker_chart_script + "\n</body>")

    with open(html_file, "w") as f:
        f.write(updated_content)
        

def append_random_policy_q_value_chart_to_html(data, html_file, random_policy_chart_id="chart7"):
    """
    Append a Q-value visualization chart for random policy to an existing HTML file.

    Parameters:
    - data: DataFrame containing 'Day', 'Q_Buy', and 'Q_Sell'.
    - html_file: Path to the existing HTML file.
    - random_policy_chart_id: Unique ID for the chart.
    """
    # Create Q-Value Chart
    q_value_fig = go.Figure()

    # Add Q(Buy) line
    q_value_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=data['Q_Buy'],
            mode='lines',
            name='Q(Buy)',
            line=dict(color='green', dash='solid'),
        )
    )

    # Add Q(Sell) line
    q_value_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=data['Q_Sell'],
            mode='lines',
            name='Q(Sell)',
            line=dict(color='red', dash='solid'),
        )
    )

    # Update layout
    q_value_fig.update_layout(
        title='Q-Value Visualization for Random Policy',
        xaxis=dict(title='Day'),
        yaxis=dict(title='Q-Value'),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
    )

    # Generate chart script
    random_policy_chart_script = f"""
    <div id="{random_policy_chart_id}" style="width: 100%; height: 400px;"></div>
    <script>
        var randomPolicyData = {q_value_fig.to_json()};
        Plotly.newPlot("{random_policy_chart_id}", randomPolicyData.data, randomPolicyData.layout);
    </script>
    """

    # Append to existing HTML
    with open(html_file, "r") as f:
        content = f.read()

    # Insert before closing </body>
    updated_content = content.replace("</body>", random_policy_chart_script + "\n</body>")

    with open(html_file, "w") as f:
        f.write(updated_content)


### Random Policy Charts - multiple runs

In [9]:

def append_simulation_q_value_chart_to_html(data, simulation_results, html_file, num_simulations, simulation_chart_id="chart9"):
    """
    Append a Q-value simulation chart to an existing HTML file.

    Parameters:
    - data: DataFrame containing 'Day'.
    - simulation_results: Dictionary containing Q-values from simulations and averages.
    - html_file: Path to the existing HTML file.
    - simulation_chart_id: Unique ID for the chart.
    """
    # Extract simulation results
    q_buy_all = simulation_results['q_buy_all']
    q_sell_all = simulation_results['q_sell_all']
    avg_q_buy = simulation_results['avg_q_buy']
    avg_q_sell = simulation_results['avg_q_sell']

    # Create Q-Value Simulation Chart
    simulation_fig = go.Figure()

    # Add individual simulation lines for Q(Buy)
    for q_buy in q_buy_all:
        simulation_fig.add_trace(
            go.Scatter(
                x=data['Day'],
                y=q_buy,
                mode='lines',
                line=dict(width=0.5, color='rgba(0, 128, 0, 0.2)'),  # Light green
                showlegend=False,
            )
        )

    # Add individual simulation lines for Q(Sell)
    for q_sell in q_sell_all:
        simulation_fig.add_trace(
            go.Scatter(
                x=data['Day'],
                y=q_sell,
                mode='lines',
                line=dict(width=0.5, color='rgba(255, 0, 0, 0.2)'),  # Light red
                showlegend=False,
            )
        )

    # Add average Q(Buy)
    simulation_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=avg_q_buy,
            mode='lines',
            name='Avg Q(Buy)',
            line=dict(color='green', width=2),
        )
    )

    # Add average Q(Sell)
    simulation_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=avg_q_sell,
            mode='lines',
            name='Avg Q(Sell)',
            line=dict(color='red', width=2),
        )
    )

    # Update layout
    simulation_fig.update_layout(
        title=f'Q-Value Simulation ({num_simulations} Runs) and Average',
        xaxis=dict(title='Day'),
        yaxis=dict(title='Q-Value'),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
    )

    # Generate chart script
    simulation_chart_script = f"""
    <div id="{simulation_chart_id}" style="width: 100%; height: 400px;"></div>
    <script>
        var simulationData = {simulation_fig.to_json()};
        Plotly.newPlot("{simulation_chart_id}", simulationData.data, simulationData.layout);
    </script>
    """

    # Append to existing HTML
    with open(html_file, "r") as f:
        content = f.read()

    # Insert before closing </body>
    updated_content = content.replace("</body>", simulation_chart_script + "\n</body>")

    with open(html_file, "w") as f:
        f.write(updated_content)

        
        
        
        
        
        
        
        
def append_q_value_histogram_chart_to_html(simulation_results, html_file, histogram_chart_id="chart10"):
    """
    Append a histogram chart for Q-values at the start of simulations to an existing HTML file.

    Parameters:
    - simulation_results: Dictionary containing Q-values from simulations.
    - html_file: Path to the existing HTML file.
    - histogram_chart_id: Unique ID for the chart.
    """
    # Extract Q-values at the start of simulations (t = 0)
    q_buy_start = simulation_results['q_buy_all'][:, 0]
    q_sell_start = simulation_results['q_sell_all'][:, 0]

    # Create Histogram Chart
    histogram_fig = go.Figure()

    # Add Q(Buy) histogram
    histogram_fig.add_trace(
        go.Histogram(
            x=q_buy_start,
            name='Q(Buy)',
            opacity=0.75,
            marker=dict(color='green'),
            nbinsx=200,  # Number of bins
        )
    )

    # Add Q(Sell) histogram
    histogram_fig.add_trace(
        go.Histogram(
            x=q_sell_start,
            name='Q(Sell)',
            opacity=0.75,
            marker=dict(color='red'),
            nbinsx=200,
        )
    )

    # Update layout
    histogram_fig.update_layout(
        title='Histogram of Q-Values at Start of Simulations',
        xaxis=dict(title='Q-Value'),
        yaxis=dict(title='Frequency'),
        barmode='overlay',  # Overlay histograms for comparison
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
    )

    # Generate chart script
    histogram_chart_script = f"""
    <div id="{histogram_chart_id}" style="width: 100%; height: 400px;"></div>
    <script>
        var histogramData = {histogram_fig.to_json()};
        Plotly.newPlot("{histogram_chart_id}", histogramData.data, histogramData.layout);
    </script>
    """

    # Append to existing HTML
    with open(html_file, "r") as f:
        content = f.read()

    # Insert before closing </body>
    updated_content = content.replace("</body>", histogram_chart_script + "\n</body>")

    with open(html_file, "w") as f:
        f.write(updated_content)

        
        

## Environment functions

In [10]:
def generate_stock_prices(start_price=100, days=300, volatility=0.02):
    """
    Generate a simulated stock price series.
    
    Parameters:
    - start_price: Initial stock price.
    - days: Number of days to simulate.
    - volatility: Daily volatility percentage.

    Returns:
    - DataFrame with simulated prices.
    """
    prices = [start_price]
    for _ in range(1, days):
        change_percent = np.random.normal(0, volatility)
        new_price = prices[-1] * (1 + change_percent)
        prices.append(new_price)
    return pd.DataFrame({"Day": range(days), "Price": prices})


## State functions

In [11]:
def generate_states(data):
    """
    Generate state data based on the stock price.

    Parameters:
    - data: DataFrame containing 'Price'.

    Returns:
    - DataFrame with an additional 'State' column.
    """
    data['State'] = (data['Price'] > 100).astype(int)
    return data

##  Reward functions

In [12]:
def generate_rewards(data):
    """
    Calculate rewards based on buy/sell actions.

    Parameters:
    - data: DataFrame containing 'Price', 'State', and 'Cumulative Units'.

    Returns:
    - DataFrame with 'Reward' and 'Cumulative Reward' columns added.
    """
    # Calculate price difference
    price_diff = data['Price'].diff().fillna(0)

    # Calculate reward based on actions
    rewards = price_diff * ((data['Price'] > 100).astype(int) - (data['Price'] <= 100).astype(int))
    data['Reward'] = rewards
    data['Cumulative Reward'] = rewards.cumsum()

    return data

## Q Value functions

In [13]:

def generate_q_values(data):
    """
    Generate Q-values for Buy and Sell actions.

    Parameters:
    - data: DataFrame containing 'Day' and 'Price'.

    Returns:
    - DataFrame with 'Q_Buy' and 'Q_Sell' columns added.
    """
    # Example Q-values: Random initialization or derived from a model
    # (For simplicity, these are calculated based on the price trend)
    data['Q_Buy'] = data['Price'].rolling(window=3, min_periods=1).mean() + np.random.uniform(-5, 5, len(data))
    data['Q_Sell'] = -data['Price'].rolling(window=3, min_periods=1).mean() + np.random.uniform(-5, 5, len(data))
    return data

def compute_q_values(data):
    """
    Compute Q-values for Buy and Sell actions at each time step.

    Parameters:
    - data: DataFrame containing 'Price' and 'Policy'.

    Returns:
    - DataFrame with 'Q_Buy' and 'Q_Sell' columns added.
    """
    q_buy = []
    q_sell = []

    for t in range(len(data)):
        # Immediate reward for Buy and Sell
        immediate_buy_reward = (data['Price'].iloc[t + 1] - data['Price'].iloc[t]) if t < len(data) - 1 else 0
        immediate_sell_reward = (data['Price'].iloc[t] - data['Price'].iloc[t + 1]) if t < len(data) - 1 else 0

        # Future reward following the policy
        future_reward = 0
        for future_t in range(t + 1, len(data)):
            reward = (data['Price'].iloc[future_t] - data['Price'].iloc[future_t - 1]) if data['Policy'].iloc[future_t] == 1 else \
                     (data['Price'].iloc[future_t - 1] - data['Price'].iloc[future_t])
            future_reward += reward

        # Q-values for Buy and Sell
        q_buy.append(immediate_buy_reward + future_reward)
        q_sell.append(immediate_sell_reward + future_reward)

    data['Q_Buy'] = q_buy
    data['Q_Sell'] = q_sell
    return data


## Random Policy functions

In [14]:


def generate_random_policy(data):
    """
    Generate a random policy (50% Buy, 50% Sell) for each time step.

    Parameters:
    - data: DataFrame containing 'Day'.

    Returns:
    - DataFrame with 'Random_Policy' column added.
    """
    data['Random_Policy'] = np.random.choice([1, -1], size=len(data))  # 1: Buy, -1: Sell
    return data

def compute_q_values_random_policy(data):
    """
    Compute Q-values for Buy and Sell actions under a random policy.

    Parameters:
    - data: DataFrame containing 'Price' and 'Random_Policy'.

    Returns:
    - DataFrame with 'Q_Buy' and 'Q_Sell' columns added.
    """
    q_buy = []
    q_sell = []

    for t in range(len(data)):
        # Immediate reward for Buy and Sell
        immediate_buy_reward = (data['Price'].iloc[t + 1] - data['Price'].iloc[t]) if t < len(data) - 1 else 0
        immediate_sell_reward = (data['Price'].iloc[t] - data['Price'].iloc[t + 1]) if t < len(data) - 1 else 0

        # Future reward following the random policy
        future_reward = 0
        for future_t in range(t + 1, len(data)):
            reward = (data['Price'].iloc[future_t] - data['Price'].iloc[future_t - 1]) if data['Random_Policy'].iloc[future_t] == 1 else \
                     (data['Price'].iloc[future_t - 1] - data['Price'].iloc[future_t])
            future_reward += reward

        # Q-values for Buy and Sell
        q_buy.append(immediate_buy_reward + future_reward)
        q_sell.append(immediate_sell_reward + future_reward)

    data['Q_Buy'] = q_buy
    data['Q_Sell'] = q_sell
    return data


def run_random_policy_simulations(data, num_simulations=100):
    """
    Run the random policy multiple times and compute Q-values for each simulation.

    Parameters:
    - data: DataFrame containing 'Day' and 'Price'.
    - num_simulations: Number of random policy simulations.

    Returns:
    - Dictionary with 'Q_Buy' and 'Q_Sell' for each simulation, and their averages.
    """
    q_buy_all = []
    q_sell_all = []

    for _ in range(num_simulations):
        # Generate a random policy
        sim_data = data.copy()
        sim_data = generate_random_policy(sim_data)
        sim_data = compute_q_values_random_policy(sim_data)

        # Collect Q-values
        q_buy_all.append(sim_data['Q_Buy'].values)
        q_sell_all.append(sim_data['Q_Sell'].values)

    # Convert to arrays for easy averaging
    q_buy_all = np.array(q_buy_all)
    q_sell_all = np.array(q_sell_all)

    # Compute average Q-values
    avg_q_buy = q_buy_all.mean(axis=0)
    avg_q_sell = q_sell_all.mean(axis=0)

    return {
        'q_buy_all': q_buy_all,
        'q_sell_all': q_sell_all,
        'avg_q_buy': avg_q_buy,
        'avg_q_sell': avg_q_sell,
    }


# **Chapter 1 - Introduction to RL**

# Environment - Stock price process 

In [15]:
# Example usage
stock_data = generate_stock_prices(days=252)
html_file = create_plotly_chart(stock_data, title="Environment - Stock prices")
open_html_chart(html_file)


# States

In [16]:
# Example usage
stock_data = generate_stock_prices()

# Generate states and append the second chart
state_data = generate_states(stock_data)

# charts
html_file = create_plotly_chart(stock_data)  # Create initial chart
append_state_chart_to_html(state_data, html_file)
open_html_chart(html_file)


# Actions

In [18]:
def generate_actions(data):
    """
    Generate buy/sell actions based on stock price.

    Parameters:
    - data: DataFrame containing 'Price'.

    Returns:
    - DataFrame with an additional 'Cumulative Units' column.
    """
    actions = (data['Price'] > 100).astype(int) - (data['Price'] <= 100).astype(int)
    data['Cumulative Units'] = actions.cumsum()
    return data


In [19]:
# Example usage
stock_data = generate_stock_prices()

# Generate states and append the second combined chart with dual y-axes
stock_data = generate_states(stock_data)

# Generate actions and append the third chart with markers
stock_data = generate_actions(stock_data)

# Open the updated HTML file
html_file = create_plotly_chart(stock_data)  # Create initial stock price chart
append_state_chart_to_html(stock_data, html_file)
append_action_chart_with_markers_to_html(stock_data, html_file)
open_html_chart(html_file)


# Rewards

In [20]:
# Example usage
stock_data = generate_stock_prices()

# Generate states and append the second combined chart with dual y-axes
stock_data = generate_states(stock_data)

# Generate actions and append the third chart with markers
stock_data = generate_actions(stock_data)

# Generate rewards and append the fourth chart
stock_data = generate_rewards(stock_data)

# Open the updated HTML file
html_file = create_plotly_chart(stock_data)  # Create initial stock price chart
append_state_chart_to_html(stock_data, html_file)
append_action_chart_with_markers_to_html(stock_data, html_file)
append_reward_chart_to_html(stock_data, html_file)
open_html_chart(html_file)


# Policy

In [21]:
# Example usage
stock_data = generate_stock_prices()

# Generate states and append the second combined chart with dual y-axes
stock_data = generate_states(stock_data)

# Generate actions and append the third chart with markers
stock_data = generate_actions(stock_data)

# Generate rewards and append the fourth chart
stock_data = generate_rewards(stock_data)

# Append policy chart with stock price overlay
html_file = create_plotly_chart(stock_data)  # Create initial stock price chart
append_state_chart_to_html(stock_data, html_file)
append_action_chart_with_markers_to_html(stock_data, html_file)
append_reward_chart_to_html(stock_data, html_file)
append_policy_chart_with_price_to_html(stock_data, html_file)

# Open the updated HTML file
open_html_chart(html_file)


# Q - Values

In [22]:



# Example usage
stock_data = generate_stock_prices()

# Generate states and append the second combined chart with dual y-axes
stock_data = generate_states(stock_data)

# Generate actions and append the third chart with markers
stock_data = generate_actions(stock_data)

# Generate rewards and append the fourth chart
stock_data = generate_rewards(stock_data)

# Append policy chart with stock price overlay

# Append policy chart with Q-values overlay
html_file = create_plotly_chart(stock_data)  # Create initial stock price chart
append_state_chart_to_html(stock_data, html_file)
append_action_chart_with_markers_to_html(stock_data, html_file)
append_reward_chart_to_html(stock_data, html_file)
append_policy_chart_with_price_to_html(stock_data, html_file)
stock_data = compute_q_values(stock_data)  # Compute Q-values

# Append Q-value chart to the HTML
append_q_value_chart_to_html(stock_data, html_file)

# Open the updated HTML file
open_html_chart(html_file)



# Random Policy

In [23]:
# Example usage
stock_data = generate_random_policy(stock_data)  # Generate random policy
stock_data = compute_q_values_random_policy(stock_data)  # Compute Q-values for random policy

# Append Buy/Sell marker chart for random policy to the HTML
append_random_policy_marker_chart_to_html(stock_data, html_file)

# Append Q-value chart for random policy to the HTML
append_random_policy_q_value_chart_to_html(stock_data, html_file)

# Open the updated HTML file
open_html_chart(html_file)


# Random Policy Multiple Times

In [38]:
# Example usage
stock_data = generate_stock_prices(days=100)

# Run a number of simulations of the random policy
num_simulations = 5000
simulation_results = run_random_policy_simulations(stock_data, num_simulations=num_simulations)

html_file = create_plotly_chart(stock_data)  # Create initial stock price chart

# Append Q-value simulation chart to the HTML
append_simulation_q_value_chart_to_html(stock_data, simulation_results, html_file, num_simulations=num_simulations)

# Append histogram chart for Q-values at the start of simulations
append_q_value_histogram_chart_to_html(simulation_results, html_file)

# Open the updated HTML file
open_html_chart(html_file)

# Improve Random Policy

In [25]:


def append_policy_comparison_chart_to_html(data, html_file, policy_comparison_chart_id="chart11"):
    """
    Append a policy comparison chart to an existing HTML file.

    Parameters:
    - data: DataFrame containing 'Day', 'Price', 'Random_Policy', and 'Improved_Policy'.
    - html_file: Path to the existing HTML file.
    - policy_comparison_chart_id: Unique ID for the chart.
    """
    # Create Policy Comparison Chart
    comparison_fig = go.Figure()

    # Add stock price line
    comparison_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=data['Price'],
            mode='lines',
            name='Stock Price',
            line=dict(color='blue'),
        )
    )

    # Add random policy Buy/Sell markers
    random_buy = data[data['Random_Policy'] == 1]
    random_sell = data[data['Random_Policy'] == -1]
    comparison_fig.add_trace(
        go.Scatter(
            x=random_buy['Day'],
            y=random_buy['Price'],
            mode='markers',
            marker=dict(color='green', size=8, symbol='triangle-up'),
            name='Random Policy Buy',
        )
    )
    comparison_fig.add_trace(
        go.Scatter(
            x=random_sell['Day'],
            y=random_sell['Price'],
            mode='markers',
            marker=dict(color='red', size=8, symbol='triangle-down'),
            name='Random Policy Sell',
        )
    )

    # Add improved policy Buy/Sell markers
    improved_buy = data[data['Improved_Policy'] == 1]
    improved_sell = data[data['Improved_Policy'] == -1]
    comparison_fig.add_trace(
        go.Scatter(
            x=improved_buy['Day'],
            y=improved_buy['Price'],
            mode='markers',
            marker=dict(color='darkgreen', size=10, symbol='triangle-up'),
            name='Improved Policy Buy',
        )
    )
    comparison_fig.add_trace(
        go.Scatter(
            x=improved_sell['Day'],
            y=improved_sell['Price'],
            mode='markers',
            marker=dict(color='darkred', size=10, symbol='triangle-down'),
            name='Improved Policy Sell',
        )
    )

    # Update layout
    comparison_fig.update_layout(
        title='Policy Comparison: Random vs. Improved',
        xaxis=dict(title='Day'),
        yaxis=dict(title='Price'),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
    )

    # Generate chart script
    policy_comparison_chart_script = f"""
    <div id="{policy_comparison_chart_id}" style="width: 100%; height: 400px;"></div>
    <script>
        var policyComparisonData = {comparison_fig.to_json()};
        Plotly.newPlot("{policy_comparison_chart_id}", policyComparisonData.data, policyComparisonData.layout);
    </script>
    """

    # Append to existing HTML
    with open(html_file, "r") as f:
        content = f.read()

    # Insert before closing </body>
    updated_content = content.replace("</body>", policy_comparison_chart_script + "\n</body>")

    with open(html_file, "w") as f:
        f.write(updated_content)



def append_policy_evaluation_chart_to_html(data, random_total_reward, improved_total_reward, html_file, evaluation_chart_id="chart12"):
    """
    Append a policy evaluation chart to an existing HTML file.

    Parameters:
    - data: DataFrame containing rewards for both policies.
    - random_total_reward: Total reward for the random policy.
    - improved_total_reward: Total reward for the improved policy.
    - html_file: Path to the existing HTML file.
    - evaluation_chart_id: Unique ID for the chart.
    """
    # Create Reward Comparison Chart
    evaluation_fig = go.Figure()

    # Add random policy rewards
    evaluation_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=data['Random_Reward'],
            mode='lines+markers',
            name='Random Policy Reward',
            line=dict(color='blue'),
        )
    )

    # Add improved policy rewards
    evaluation_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=data['Improved_Reward'],
            mode='lines+markers',
            name='Improved Policy Reward',
            line=dict(color='green'),
        )
    )

    # Update layout
    evaluation_fig.update_layout(
        title='Reward Comparison: Random vs. Improved Policy',
        xaxis=dict(title='Day'),
        yaxis=dict(title='Reward'),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
    )

    # Create Total Reward Bar Chart
    total_reward_fig = go.Figure()

    # Add bars for total rewards
    total_reward_fig.add_trace(
        go.Bar(
            x=['Random Policy', 'Improved Policy'],
            y=[random_total_reward, improved_total_reward],
            name='Total Reward',
            marker=dict(color=['blue', 'green']),
        )
    )

    # Update layout
    total_reward_fig.update_layout(
        title='Total Reward Comparison',
        xaxis=dict(title='Policy'),
        yaxis=dict(title='Total Reward'),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
    )

    # Generate chart scripts
    reward_chart_script = f"""
    <div id="{evaluation_chart_id}_rewards" style="width: 100%; height: 400px;"></div>
    <script>
        var rewardData = {evaluation_fig.to_json()};
        Plotly.newPlot("{evaluation_chart_id}_rewards", rewardData.data, rewardData.layout);
    </script>
    """

    total_reward_chart_script = f"""
    <div id="{evaluation_chart_id}_total" style="width: 100%; height: 400px;"></div>
    <script>
        var totalRewardData = {total_reward_fig.to_json()};
        Plotly.newPlot("{evaluation_chart_id}_total", totalRewardData.data, totalRewardData.layout);
    </script>
    """

    # Append to existing HTML
    with open(html_file, "r") as f:
        content = f.read()

    # Insert before closing </body>
    updated_content = content.replace("</body>", reward_chart_script + total_reward_chart_script + "\n</body>")

    with open(html_file, "w") as f:
        f.write(updated_content)

        
def append_cumulative_reward_chart_to_html(data, html_file, cumulative_chart_id="chart13"):
    """
    Append a cumulative reward comparison chart to an existing HTML file.

    Parameters:
    - data: DataFrame containing 'Random_Reward' and 'Improved_Reward'.
    - html_file: Path to the existing HTML file.
    - cumulative_chart_id: Unique ID for the chart.
    """
    # Compute cumulative rewards
    data['Random_Cumulative_Reward'] = data['Random_Reward'].cumsum()
    data['Improved_Cumulative_Reward'] = data['Improved_Reward'].cumsum()

    # Create Cumulative Reward Chart
    cumulative_fig = go.Figure()

    # Add cumulative rewards for random policy
    cumulative_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=data['Random_Cumulative_Reward'],
            mode='lines',
            name='Random Policy Cumulative Reward',
            line=dict(color='blue'),
        )
    )

    # Add cumulative rewards for improved policy
    cumulative_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=data['Improved_Cumulative_Reward'],
            mode='lines',
            name='Improved Policy Cumulative Reward',
            line=dict(color='green'),
        )
    )

    # Update layout
    cumulative_fig.update_layout(
        title='Cumulative Reward Comparison',
        xaxis=dict(title='Day'),
        yaxis=dict(title='Cumulative Reward'),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
    )

    # Generate chart script
    cumulative_chart_script = f"""
    <div id="{cumulative_chart_id}" style="width: 100%; height: 400px;"></div>
    <script>
        var cumulativeRewardData = {cumulative_fig.to_json()};
        Plotly.newPlot("{cumulative_chart_id}", cumulativeRewardData.data, cumulativeRewardData.layout);
    </script>
    """

    # Append to existing HTML
    with open(html_file, "r") as f:
        content = f.read()

    # Insert before closing </body>
    updated_content = content.replace("</body>", cumulative_chart_script + "\n</body>")

    with open(html_file, "w") as f:
        f.write(updated_content)
        
        
def append_decision_comparison_chart(data, html_file, decision_chart_id="chart15"):
    """
    Append a chart comparing decisions of Random and Improved policies.

    Parameters:
    - data: DataFrame containing 'Random_Policy' and 'Improved_Policy'.
    - html_file: Path to the existing HTML file.
    - decision_chart_id: Unique ID for the chart.
    """
    # Create Decision Comparison Chart
    decision_fig = go.Figure()

    decision_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=data['Random_Policy'],
            mode='lines+markers',
            name='Random Policy Decisions',
            line=dict(color='blue'),
        )
    )

    decision_fig.add_trace(
        go.Scatter(
            x=data['Day'],
            y=data['Improved_Policy'],
            mode='lines+markers',
            name='Improved Policy Decisions',
            line=dict(color='green'),
        )
    )

    # Update layout
    decision_fig.update_layout(
        title='Decision Comparison: Random vs. Improved Policy',
        xaxis=dict(title='Day'),
        yaxis=dict(title='Decision (-1: Sell, 1: Buy, 0: No Action)'),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
    )

    # Generate chart script
    decision_chart_script = f"""
    <div id="{decision_chart_id}" style="width: 100%; height: 400px;"></div>
    <script>
        var decisionData = {decision_fig.to_json()};
        Plotly.newPlot("{decision_chart_id}", decisionData.data, decisionData.layout);
    </script>
    """

    # Append to existing HTML
    with open(html_file, "r") as f:
        content = f.read()

    # Insert before closing </body>
    updated_content = content.replace("</body>", decision_chart_script + "\n</body>")

    with open(html_file, "w") as f:
        f.write(updated_content)


       

In [26]:
def evaluate_policies(data):
    """
    Evaluate both the random and improved policies by computing rewards and total rewards.

    Parameters:
    - data: DataFrame containing 'Price', 'Random_Policy', and 'Improved_Policy'.

    Returns:
    - DataFrame with 'Random_Reward', 'Improved_Reward', 'Random_Total_Reward', and 'Improved_Total_Reward'.
    """
    random_rewards = []
    improved_rewards = []

    for t in range(len(data) - 1):
        # Reward for Random Policy
        if data['Random_Policy'].iloc[t] == 1:  # Buy
            random_rewards.append(data['Price'].iloc[t + 1] - data['Price'].iloc[t])
        elif data['Random_Policy'].iloc[t] == -1:  # Sell
            random_rewards.append(data['Price'].iloc[t] - data['Price'].iloc[t + 1])
        else:
            random_rewards.append(0)

        # Reward for Improved Policy
        if data['Improved_Policy'].iloc[t] == 1:  # Buy
            improved_rewards.append(data['Price'].iloc[t + 1] - data['Price'].iloc[t])
        elif data['Improved_Policy'].iloc[t] == -1:  # Sell
            improved_rewards.append(data['Price'].iloc[t] - data['Price'].iloc[t + 1])
        else:
            improved_rewards.append(0)

    # Append the last reward as 0 (no action possible at the last step)
    random_rewards.append(0)
    improved_rewards.append(0)

    data['Random_Reward'] = random_rewards
    data['Improved_Reward'] = improved_rewards

    # Compute total rewards
    random_total_reward = sum(random_rewards)
    improved_total_reward = sum(improved_rewards)

    return data, random_total_reward, improved_total_reward


 
def derive_optimal_average_q_value_policy(data, simulation_results):
    """
    Derive a random policy based on average Q-values from simulations.

    Parameters:
    - data: DataFrame containing 'Day' and 'Price'.
    - simulation_results: Dictionary containing Q-values from simulations.

    Returns:
    - DataFrame with 'Random_Policy' column set to the policy derived from average Q-values.
    """
    avg_q_buy = simulation_results['avg_q_buy']
    avg_q_sell = simulation_results['avg_q_sell']

    # Derive policy based on average Q-values
    policy = []
    for q_buy, q_sell in zip(avg_q_buy, avg_q_sell):
        if q_buy > q_sell:
            policy.append(1)  # Buy
        else:
            policy.append(-1)  # Sell

    data['Random_Policy'] = policy
    return data        
# Example usage

def improved_policy(data):
    """
    Implement an improved policy based on Q-values.

    Parameters:
    - data: DataFrame containing 'Q_Buy' and 'Q_Sell'.

    Returns:
    - DataFrame with 'Improved_Policy' column added.
    """
    threshold = 0  # Define a decision threshold
    policy = []

    for i in range(len(data)):
        q_buy = data['Q_Buy'].iloc[i]
        q_sell = data['Q_Sell'].iloc[i]

        # Improved policy logic
        if q_buy - q_sell > threshold:
            policy.append(1)  # Buy
        elif q_sell - q_buy > threshold:
            policy.append(-1)  # Sell
        else:
            policy.append(0)  # No action

    data['Improved_Policy'] = policy
    return data

# **Chapter II Optimal Policies**

# Functions 

In [27]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
import webbrowser
import os

# Set Plotly to use the 'plotly' template by default
pio.templates.default = "plotly"

## Charts

In [28]:

# Function to create and save Plotly charts
def create_and_save_plots(stock_data, simulation_results, html_file='policies_comparison.html'):
    """
    Create multiple Plotly charts, save them to an HTML file, and open the file.

    Parameters:
    - stock_data: DataFrame containing all policies and rewards.
    - simulation_results: Dictionary containing Q-values from simulations.
    - html_file: Name of the HTML file to save the plots.
    """
    # Initialize HTML content
    html_content = """
    <html>
    <head>
        <title>Policies Comparison</title>
        <meta charset="utf-8" />
    </head>
    <body>
        <h1>Policies Comparison</h1>
    """

    # 1. Stock Prices Over Days
    fig1 = go.Figure()
    fig1.add_trace(go.Scatter(x=stock_data['Day'], y=stock_data['Price'],
                              mode='lines+markers', name='Stock Price'))
    fig1.update_layout(title='Stock Prices Over Days',
                       xaxis_title='Day',
                       yaxis_title='Price')
    html_content += pio.to_html(fig1, full_html=False, include_plotlyjs='cdn')

    # 2. Q_Buy and Q_Sell Across Simulations
    fig2 = go.Figure()
    for sim in range(simulation_results['q_buy_all'].shape[0]):
        fig2.add_trace(go.Scatter(x=stock_data['Day'],
                                  y=simulation_results['q_buy_all'][sim],
                                  mode='lines+markers',
                                  name=f'Simulation {sim + 1} Q_Buy'))
        fig2.add_trace(go.Scatter(x=stock_data['Day'],
                                  y=simulation_results['q_sell_all'][sim],
                                  mode='lines+markers',
                                  name=f'Simulation {sim + 1} Q_Sell'))
    fig2.update_layout(title='Q_Buy and Q_Sell Across Simulations',
                       xaxis_title='Day',
                       yaxis_title='Q-Value')
    html_content += pio.to_html(fig2, full_html=False, include_plotlyjs=False)

    # 3. Average Q_Buy and Q_Sell per Day
    fig3 = go.Figure()
    fig3.add_trace(go.Bar(x=stock_data['Day'], y=simulation_results['avg_q_buy'],
                          name='Average Q_Buy', marker_color='indianred'))
    fig3.add_trace(go.Bar(x=stock_data['Day'], y=simulation_results['avg_q_sell'],
                          name='Average Q_Sell', marker_color='lightsalmon'))
    fig3.update_layout(title='Average Q_Buy and Q_Sell per Day',
                       xaxis_title='Day',
                       yaxis_title='Average Q-Value',
                       barmode='group')
    html_content += pio.to_html(fig3, full_html=False, include_plotlyjs=False)

    # 4. Policy Decisions Over Days
    fig4 = go.Figure()
    # Avg Q Policy
    fig4.add_trace(go.Scatter(x=stock_data['Day'], y=stock_data['avg_q_policy'],
                              mode='markers+lines', name='Average Q-Value Policy',
                              marker=dict(symbol='triangle-up', size=10, color='green')))
    # True Random Policy
    fig4.add_trace(go.Scatter(x=stock_data['Day'], y=stock_data['random_policy'],
                              mode='markers+lines', name='True Random Policy',
                              marker=dict(symbol='triangle-down', size=10, color='red')))
    fig4.update_layout(title='Policy Decisions Over Days',
                       xaxis_title='Day',
                       yaxis_title='Policy Action',
                       yaxis=dict(tickvals=[-1, 0, 1],
                                  ticktext=['Sell', 'No Action', 'Buy'],
                                  range=[-1.5, 1.5]))
    html_content += pio.to_html(fig4, full_html=False, include_plotlyjs=False)

    # 5. Rewards Per Day
    fig5 = go.Figure()
    fig5.add_trace(go.Bar(x=stock_data['Day'], y=stock_data['Random_Reward'],
                          name='Random Policy Reward', marker_color='blue'))
    fig5.add_trace(go.Bar(x=stock_data['Day'], y=stock_data['Avg_Q_Reward'],
                          name='Average Q-Value Policy Reward', marker_color='orange'))
    fig5.update_layout(title='Rewards Per Day',
                       xaxis_title='Day',
                       yaxis_title='Reward',
                       barmode='group')
    html_content += pio.to_html(fig5, full_html=False, include_plotlyjs=False)

    # 6. Total Rewards Comparison
    fig6 = go.Figure(data=[
        go.Bar(name='Random Policy', x=['Total Reward'], y=[stock_data['Random_Reward'].sum()]),
        go.Bar(name='Average Q-Value Policy', x=['Total Reward'], y=[stock_data['Avg_Q_Reward'].sum()])
    ])
    fig6.update_layout(title='Total Rewards Comparison',
                       xaxis_title='Policy',
                       yaxis_title='Total Reward',
                       barmode='group')
    html_content += pio.to_html(fig6, full_html=False, include_plotlyjs=False)

    # 7. Q_Buy vs Q_Sell Average
    fig7 = go.Figure()
    fig7.add_trace(go.Scatter(x=stock_data['Day'], y=simulation_results['avg_q_buy'],
                              mode='lines+markers', name='Average Q_Buy',
                              line=dict(color='blue')))
    fig7.add_trace(go.Scatter(x=stock_data['Day'], y=simulation_results['avg_q_sell'],
                              mode='lines+markers', name='Average Q_Sell',
                              line=dict(color='red')))
    fig7.update_layout(title='Average Q_Buy vs Q_Sell Over Days',
                       xaxis_title='Day',
                       yaxis_title='Average Q-Value')
    html_content += pio.to_html(fig7, full_html=False, include_plotlyjs=False)

    # 8. Q_Buy and Q_Sell for Each Simulation (Heatmap)
    q_buy_df = pd.DataFrame(simulation_results['q_buy_all'],
                            columns=[f'Day{day}_Q_Buy' for day in stock_data['Day']])
    q_sell_df = pd.DataFrame(simulation_results['q_sell_all'],
                             columns=[f'Day{day}_Q_Sell' for day in stock_data['Day']])
    fig8 = go.Figure(data=[
        go.Heatmap(
            z=q_buy_df.values,
            x=q_buy_df.columns,
            y=[f'Sim {i+1}' for i in range(q_buy_df.shape[0])],
            colorscale='Viridis',
            name='Q_Buy'
        ),
        go.Heatmap(
            z=q_sell_df.values,
            x=q_sell_df.columns,
            y=[f'Sim {i+1}' for i in range(q_sell_df.shape[0])],
            colorscale='Cividis',
            name='Q_Sell'
        )
    ])
    fig8.update_layout(title='Q_Buy and Q_Sell Across Simulations',
                       xaxis_title='Day and Action',
                       yaxis_title='Simulation')
    html_content += pio.to_html(fig8, full_html=False, include_plotlyjs=False)

    # Close HTML content
    html_content += """
    </body>
    </html>
    """

    # Write HTML content to file
    with open(html_file, 'w') as f:
        f.write(html_content)

    # Open the HTML file in the default web browser
    file_path = os.path.abspath(html_file)
    webbrowser.open(f'file://{file_path}')

# Function to display detailed tables
def display_detailed_example(stock_data, simulation_results, random_total, avg_q_total):
    """
    Display detailed numerical example tables.

    Parameters:
    - stock_data: DataFrame containing all policies and rewards.
    - simulation_results: Dictionary containing Q-values from simulations.
    - random_total: Total reward for Random Policy.
    - avg_q_total: Total reward for Avg Q Policy.
    """
    print("### 1. Stock Prices Over 5 Days\n")
    print(stock_data[['Day', 'Price']].to_string(index=False))
    print("\n")

    print("### 2. Simulation Results: Q_Buy and Q_Sell for Each Simulation\n")
    q_buy_df = pd.DataFrame(simulation_results['q_buy_all'], 
                            columns=[f'Day{day}_Q_Buy' for day in stock_data['Day']])
    q_sell_df = pd.DataFrame(simulation_results['q_sell_all'], 
                             columns=[f'Day{day}_Q_Sell' for day in stock_data['Day']])
    print("Q_Buy All Simulations:")
    print(q_buy_df.to_string(index=False))
    print("\nQ_Sell All Simulations:")
    print(q_sell_df.to_string(index=False))
    print("\n")

    print("### 3. Average Q_Buy and Q_Sell per Day\n")
    avg_q_df = pd.DataFrame({
        'Day': stock_data['Day'],
        'Avg_Q_Buy': simulation_results['avg_q_buy'],
        'Avg_Q_Sell': simulation_results['avg_q_sell']
    })
    print(avg_q_df.to_string(index=False))
    print("\n")

    print("### 4. Derived Average Q-Value Policy (`avg_q_policy`)\n")
    print(stock_data[['Day', 'avg_q_policy']].to_string(index=False))
    print("\n")

    print("### 5. True Random Policy (`random_policy`)\n")
    print(stock_data[['Day', 'random_policy']].to_string(index=False))
    print("\n")

    print("### 6. Rewards for All Policies\n")
    print(stock_data[['Day', 'Random_Reward', 'Avg_Q_Reward']].to_string(index=False))
    print("\n")

    print("### 7. Total Rewards\n")
    rewards_df = pd.DataFrame({
        'Policy': ['Random Policy', 'Average Q-Value Policy'],
        'Total Reward': [random_total, avg_q_total]
    })
    print(rewards_df.to_string(index=False))
    print("\n")

    print("### 8. Policy Decision Comparison\n")
    comparison_df = pd.DataFrame({
        'Day': stock_data['Day'],
        'Random Policy': stock_data['random_policy'],
        'Average Q-Value Policy': stock_data['avg_q_policy']
    })
    print(comparison_df.to_string(index=False))
    print("\n")

## Computations

In [29]:
# Function to generate stock prices
def generate_stock_prices(days=5):
    """
    Generate synthetic stock prices for a given number of days.

    Parameters:
    - days: Number of days to generate prices for.

    Returns:
    - DataFrame with 'Day' and 'Price' columns.
    """
    np.random.seed(42)  # For reproducibility
    prices = [100]  # Starting price
    for _ in range(1, days):
        change = np.random.randint(-2, 3)  # Price changes between -2 and +2
        new_price = prices[-1] + change
        prices.append(new_price)
    data = pd.DataFrame({
        'Day': range(1, days + 1),
        'Price': prices
    })
    return data

# Function to run random policy simulations
def run_random_policy_simulations(data, num_simulations=3):
    """
    Run the random policy multiple times and compute Q-values for each simulation.

    Parameters:
    - data: DataFrame containing 'Day' and 'Price'.
    - num_simulations: Number of random policy simulations.

    Returns:
    - Dictionary with 'q_buy_all', 'q_sell_all', 'avg_q_buy', 'avg_q_sell'.
    """
    q_buy_all = []
    q_sell_all = []

    for sim in range(num_simulations):
        # Generate a random policy for this simulation
        random_policy = np.random.choice([1, -1], size=len(data))  # 1: Buy, -1: Sell
        data_sim = data.copy()
        data_sim['Random_Policy_Sim'] = random_policy

        # Compute Q-values for Buy and Sell
        q_buy = []
        q_sell = []
        for t in range(len(data_sim)):
            # Immediate reward for Buy and Sell
            if t < len(data_sim) - 1:
                immediate_buy_reward = data_sim['Price'].iloc[t + 1] - data_sim['Price'].iloc[t]
                immediate_sell_reward = data_sim['Price'].iloc[t] - data_sim['Price'].iloc[t + 1]
            else:
                immediate_buy_reward = 0
                immediate_sell_reward = 0

            # Future reward following the random policy
            future_reward = 0
            for future_t in range(t + 1, len(data_sim)):
                action = data_sim['Random_Policy_Sim'].iloc[future_t]
                if action == 1:
                    reward = data_sim['Price'].iloc[future_t] - data_sim['Price'].iloc[future_t - 1]
                else:
                    reward = data_sim['Price'].iloc[future_t - 1] - data_sim['Price'].iloc[future_t]
                future_reward += reward

            # Q-values for Buy and Sell
            q_buy.append(immediate_buy_reward + future_reward)
            q_sell.append(immediate_sell_reward + future_reward)

        q_buy_all.append(q_buy)
        q_sell_all.append(q_sell)

    # Convert to arrays for averaging
    q_buy_all = np.array(q_buy_all)
    q_sell_all = np.array(q_sell_all)

    # Compute average Q-values
    avg_q_buy = q_buy_all.mean(axis=0)
    avg_q_sell = q_sell_all.mean(axis=0)

    return {
        'q_buy_all': q_buy_all,
        'q_sell_all': q_sell_all,
        'avg_q_buy': avg_q_buy,
        'avg_q_sell': avg_q_sell,
    }

# Function to derive average Q-value policy
def derive_avg_q_policy(data, simulation_results):
    """
    Derive a policy based on average Q-values from simulations.

    Parameters:
    - data: DataFrame containing 'Day' and 'Price'.
    - simulation_results: Dictionary containing Q-values from simulations.

    Returns:
    - DataFrame with 'avg_q_policy' column added.
    """
    avg_q_buy = simulation_results['avg_q_buy']
    avg_q_sell = simulation_results['avg_q_sell']

    # Derive policy based on average Q-values
    policy = []
    for q_buy, q_sell in zip(avg_q_buy, avg_q_sell):
        if q_buy > q_sell:
            policy.append(1)  # Buy
        else:
            policy.append(-1)  # Sell

    data['avg_q_policy'] = policy
    return data

# Function to define true random policy
def define_random_policy(data):
    """
    Define a true random policy that chooses Buy or Sell with equal probability.

    Parameters:
    - data: DataFrame containing 'Day' and 'Price'.

    Returns:
    - DataFrame with 'random_policy' column added.
    """
    np.random.seed(100)  # Different seed for true random policy
    random_policy = np.random.choice([1, -1], size=len(data))  # 1: Buy, -1: Sell
    data['random_policy'] = random_policy
    return data

# Function to evaluate policies
def evaluate_policies(data):
    """
    Evaluate the random and avg_q policies by computing rewards and total rewards.

    Parameters:
    - data: DataFrame containing 'Price', 'random_policy', and 'avg_q_policy'.

    Returns:
    - DataFrame with 'Random_Reward', 'Avg_Q_Reward',
      and 'Total_Reward' columns added.
    """
    random_rewards = []
    avg_q_rewards = []

    for t in range(len(data) - 1):
        # Reward for Random Policy
        if data['random_policy'].iloc[t] == 1:  # Buy
            random_reward = data['Price'].iloc[t + 1] - data['Price'].iloc[t]
        elif data['random_policy'].iloc[t] == -1:  # Sell
            random_reward = data['Price'].iloc[t] - data['Price'].iloc[t + 1]
        else:
            random_reward = 0
        random_rewards.append(random_reward)

        # Reward for Avg Q Policy
        if data['avg_q_policy'].iloc[t] == 1:  # Buy
            avg_q_reward = data['Price'].iloc[t + 1] - data['Price'].iloc[t]
        elif data['avg_q_policy'].iloc[t] == -1:  # Sell
            avg_q_reward = data['Price'].iloc[t] - data['Price'].iloc[t + 1]
        else:
            avg_q_reward = 0
        avg_q_rewards.append(avg_q_reward)

    # Append the last reward as 0 (no action possible at the last step)
    random_rewards.append(0)
    avg_q_rewards.append(0)

    data['Random_Reward'] = random_rewards
    data['Avg_Q_Reward'] = avg_q_rewards

    # Compute total rewards
    random_total_reward = sum(random_rewards)
    avg_q_total_reward = sum(avg_q_rewards)

    return data, random_total_reward, avg_q_total_reward

# True Random Policy vs Optimal Avg Q Value Policy

In [30]:
# Step 1: Generate Stock Prices for 5 Days
stock_data = generate_stock_prices(days=100)

# Step 2: Run 3 Simulations of the Random Policy
simulation_results = run_random_policy_simulations(stock_data, num_simulations=100)

# Step 3: Derive the Average Q-Value Policy (`avg_q_policy`)
stock_data = derive_avg_q_policy(stock_data, simulation_results)

# Step 4: Define True Random Policy (`random_policy`)
stock_data = define_random_policy(stock_data)

# Step 5: Evaluate All Policies
stock_data, random_total_reward, avg_q_total_reward = evaluate_policies(stock_data)

# Step 6: Display Detailed Numerical Example Tables
display_detailed_example(stock_data, simulation_results, random_total_reward, avg_q_total_reward)

# Step 7: Create and Save Plotly Charts, then Open HTML
create_and_save_plots(stock_data, simulation_results, html_file='policies_comparison.html')

### 1. Stock Prices Over 5 Days

 Day  Price
   1    100
   2    101
   3    103
   4    103
   5    105
   6    107
   7    106
   8    106
   9    106
  10    106
  11    108
  12    109
  13    109
  14    111
  15    110
  16    111
  17    110
  18    111
  19    113
  20    111
  21    112
  22    111
  23    113
  24    114
  25    112
  26    110
  27    110
  28    110
  29    109
  30    110
  31    111
  32    111
  33    112
  34    113
  35    111
  36    111
  37    113
  38    113
  39    115
  40    113
  41    112
  42    113
  43    111
  44    112
  45    111
  46    110
  47    108
  48    107
  49    109
  50    108
  51    109
  52    110
  53    111
  54    112
  55    114
  56    114
  57    112
  58    113
  59    112
  60    113
  61    112
  62    111
  63    112
  64    114
  65    113
  66    112
  67    113
  68    112
  69    111
  70    112
  71    113
  72    111
  73    113
  74    115
  75    114
  76    116
  77    115
  78    113
  79    114
  80   

## Learning over Episodes

In [31]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
import os
import webbrowser

# Set Plotly template
pio.templates.default = "plotly"

# ==============================
# 1. Stock Price Generation
# ==============================

def generate_stock_prices(days=100, seed=42, start_price=100, mu=0, sigma=1):
    """
    Generate synthetic stock prices for a given number of days.

    Parameters:
    - days: Number of days to generate prices for.
    - seed: Random seed for reproducibility.
    - start_price: Starting price of the stock.
    - mu: Mean daily return (%).
    - sigma: Standard deviation of daily returns (%).

    Returns:
    - DataFrame with 'Day', 'Price', 'Price_State', 'State', and 'Next_Price' columns.
    """
    np.random.seed(seed)
    # Generate daily returns
    returns = np.random.normal(mu, sigma, days - 1)
    prices = [start_price]
    for r in returns:
        new_price = prices[-1] * (1 + r / 100)
        new_price = max(new_price, 1)  # Ensure price stays positive
        prices.append(new_price)

    # Create the DataFrame
    data = pd.DataFrame({
        'Day': range(1, days + 1),
        'Price': prices
    })

    # Define Price_State based on the threshold
    data['Price_State'] = data['Price'].apply(lambda x: 'Above' if x > start_price else 'Below')

    # Define State as a combination of Price_State and Day
    data['State'] = list(zip(data['Price_State'], data['Day']))

    # Define Next_Price by shifting Price column
    data['Next_Price'] = data['Price'].shift(-1).fillna(data['Price'])

    return data

def plot_stock_prices(data):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=data['Day'], y=data['Price'], mode='lines', name='Stock Price', line=dict(color='black')))
    fig.update_layout(title="Stock Prices Over Days", xaxis_title="Day", yaxis_title="Price")
    return fig

# ==============================
# 2. State Space Definition
# ==============================

def define_state_space(data, threshold=100):
    data['Price_State'] = data['Price'].apply(lambda x: 'Above' if x > threshold else 'Below')
    data['State'] = list(zip(data['Day'], data['Price_State']))
    return data

def plot_price_state(data):
    above = data[data['Price_State'] == 'Above']
    below = data[data['Price_State'] == 'Below']

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=above['Day'], y=above['Price'], mode='markers', name='Above 100', marker=dict(color='green')))
    fig.add_trace(go.Scatter(x=below['Day'], y=below['Price'], mode='markers', name='Below 100', marker=dict(color='red')))
    fig.update_layout(title="Price State (Above/Below 100)", xaxis_title="Day", yaxis_title="Price")
    return fig

# ==============================
# 3. Actions and Random Policy
# ==============================

def create_random_policy(states, actions):
    policy = {state: np.random.choice(actions) for state in states}
    return policy

def plot_random_policy(data, policy):
    data['Action'] = data['State'].map(policy)

    buys = data[data['Action'] == 'Buy']
    sells = data[data['Action'] == 'Sell']

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=data['Day'], y=data['Price'], mode='lines', name='Stock Price', line=dict(color='black')))
    fig.add_trace(go.Scatter(x=buys['Day'], y=buys['Price'], mode='markers', name='Buy Action', marker=dict(symbol='triangle-up', color='blue', size=10)))
    fig.add_trace(go.Scatter(x=sells['Day'], y=sells['Price'], mode='markers', name='Sell Action', marker=dict(symbol='triangle-down', color='orange', size=10)))
    fig.update_layout(title="Random Policy Actions", xaxis_title="Day", yaxis_title="Price")
    return fig

# ==============================
# 4. Q-Values Computation
# ==============================


def plot_q_values(q_values):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=q_values['Day'], y=q_values['Buy_Q'], mode='lines+markers', name='Buy Q-Value', line=dict(color='green')))
    fig.add_trace(go.Scatter(x=q_values['Day'], y=q_values['Sell_Q'], mode='lines+markers', name='Sell Q-Value', line=dict(color='red')))
    fig.update_layout(title="Q-Values Over Time", xaxis_title="Day", yaxis_title="Q-Value")
    return fig

# ==============================
# 5. HTML Generation
# ==============================

def create_and_save_html(figures, html_file='policy_visualization.html'):
    """
    Generate an HTML file with all charts embedded.

    Parameters:
    - figures: Dictionary of {title: Plotly Figure} pairs.
    - html_file: Name of the output HTML file.
    """
    html_content = """
    <html>
    <head>
        <title>Policy Visualization</title>
        <meta charset="utf-8" />
    </head>
    <body>
        <h1>Policy Visualization</h1>
    """
    # Include Plotly.js only once for all charts
    plotly_js_included = False
    for title, figure in figures.items():
        if not plotly_js_included:
            html_content += f"<h2>{title}</h2>{pio.to_html(figure, full_html=False, include_plotlyjs='cdn')}"
            plotly_js_included = True
        else:
            html_content += f"<h2>{title}</h2>{pio.to_html(figure, full_html=False, include_plotlyjs=False)}"
    
    html_content += """
    </body>
    </html>
    """

    # Save the HTML content to a file
    with open(html_file, 'w') as f:
        f.write(html_content)

    # Open the file in the default web browser
    webbrowser.open(f'file://{os.path.abspath(html_file)}')
    print(f"Visualization saved as {html_file} and opened in your browser.")

def create_rewards_chart_random_policy(stock_data):
    """
    Create a Plotly chart for immediate and cumulative rewards for the random policy.

    Parameters:
    - stock_data: DataFrame containing 'Day', 'Immediate_Reward', and 'Cumulative_Reward' for the random policy.

    Returns:
    - Plotly Figure object.
    """
    fig = go.Figure()

    # Add Immediate Rewards as a bar chart
    fig.add_trace(go.Bar(
        x=stock_data['Day'],
        y=stock_data['Immediate_Reward'],
        name='Immediate Reward',
        marker_color='blue',
        opacity=0.7
    ))

    # Add Cumulative Rewards as a line chart
    fig.add_trace(go.Scatter(
        x=stock_data['Day'],
        y=stock_data['Cumulative_Reward'],
        mode='lines+markers',
        name='Cumulative Reward',
        line=dict(color='orange', width=2)
    ))

    # Update layout
    fig.update_layout(
        title='Immediate and Cumulative Rewards (Random Policy)',
        xaxis_title='Day',
        yaxis_title='Reward',
        legend_title='Reward Type',
        template='plotly',
        barmode='overlay'
    )

    return fig

def evaluate_policy_with_rewards(stock_data, policy):
    """
    Evaluate a given policy by computing actions, immediate rewards, and cumulative rewards.

    Parameters:
    - stock_data: DataFrame containing stock data.
    - policy: Dictionary mapping states to actions.

    Returns:
    - DataFrame with additional 'Action', 'Immediate_Reward', and 'Cumulative_Reward' columns.
    """
    data = stock_data.copy()
    actions = []
    immediate_rewards = []

    for idx, row in data.iterrows():
        state = row['State']
        action = policy[state]
        actions.append(action)

        # Compute immediate reward based on the action
        reward = get_reward(action, row['Price'], row['Next_Price'])
        immediate_rewards.append(reward)

    # Add actions and rewards to the DataFrame
    data['Action'] = actions
    data['Immediate_Reward'] = immediate_rewards
    data['Cumulative_Reward'] = data['Immediate_Reward'].cumsum()

    return data


In [32]:
def create_q_values_optimal_chart(stock_data, optimal_policy):
    """
    Generate Q-values over time for the optimal policy.
    
    Parameters:
    - stock_data: DataFrame containing stock data.
    - optimal_policy: Dictionary mapping states to optimal actions.

    Returns:
    - Plotly Figure showing Q-values for optimal policy actions.
    """
    q_values = []
    for idx, row in stock_data.iterrows():
        state = row['State']
        action = optimal_policy[state]
        current_price = row['Price']
        next_price = row['Next_Price']
        immediate_reward = get_reward(action, current_price, next_price)
        q_values.append({
            'Day': row['Day'],
            'Action': action,
            'Immediate_Reward': immediate_reward
        })

    q_values_df = pd.DataFrame(q_values)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=q_values_df['Day'], y=q_values_df['Immediate_Reward'],
                             mode='lines+markers', name='Immediate Reward', line=dict(color='purple')))
    fig.update_layout(title='Q-Values Over Time (Optimal Policy)',
                      xaxis_title='Day', yaxis_title='Q-Value', template='plotly')
    return fig


def create_rewards_comparison_chart(stock_data_random, stock_data_optimal):
    """
    Generate a chart comparing immediate and cumulative rewards between random and optimal policies.
    
    Parameters:
    - stock_data_random: DataFrame with random policy data.
    - stock_data_optimal: DataFrame with optimal policy data.

    Returns:
    - Plotly Figure comparing immediate and cumulative rewards for both policies.
    """
    fig = go.Figure()
    # Immediate Rewards
    fig.add_trace(go.Bar(x=stock_data_random['Day'], y=stock_data_random['Immediate_Reward'],
                         name='Random Immediate Reward', marker_color='blue', opacity=0.6))
    fig.add_trace(go.Bar(x=stock_data_optimal['Day'], y=stock_data_optimal['Immediate_Reward'],
                         name='Optimal Immediate Reward', marker_color='green', opacity=0.6))

    # Cumulative Rewards
    fig.add_trace(go.Scatter(x=stock_data_random['Day'], y=stock_data_random['Cumulative_Reward'],
                             name='Random Cumulative Reward', mode='lines+markers', line=dict(color='blue')))
    fig.add_trace(go.Scatter(x=stock_data_optimal['Day'], y=stock_data_optimal['Cumulative_Reward'],
                             name='Optimal Cumulative Reward', mode='lines+markers', line=dict(color='green')))

    fig.update_layout(title='Rewards Comparison (Random vs. Optimal Policy)',
                      xaxis_title='Day', yaxis_title='Reward', template='plotly')
    return fig


def create_and_save_html_with_additional_charts(figures, html_file='policy_comparison.html'):
    """
    Save all charts, including additional charts, into an HTML file.
    
    Parameters:
    - figures: Dictionary of Plotly Figures to include in the HTML.
    - html_file: Name of the HTML file to save.
    """
    html_content = "<html><head><title>Policy Comparison</title></head><body>"
    for title, fig in figures.items():
        html_content += f"<h2>{title}</h2>"
        html_content += pio.to_html(fig, full_html=False, include_plotlyjs=False)
    html_content += "</body></html>"
    with open(html_file, 'w') as f:
        f.write(html_content)
    webbrowser.open(f'file://{os.path.abspath(html_file)}')

    

def compute_q_values(data, policy):
    """
    Compute Q-values for each state-action pair based on immediate reward and future rewards.

    Parameters:
    - data: DataFrame containing stock data.
    - policy: Dictionary mapping states to actions.

    Returns:
    - DataFrame with Q-values for 'Buy' and 'Sell' actions at each time step.
    """
    q_values = []
    for idx, row in data.iterrows():
        state = row['State']
        current_price = row['Price']

        # Immediate rewards for Buy and Sell
        reward_buy = 0
        reward_sell = 0
        future_reward_buy = 0
        future_reward_sell = 0

        # Calculate rewards iteratively over all future steps
        for future_idx in range(idx, len(data)):
            future_price = data.iloc[future_idx]['Price']
            time_discount = 0.9 ** (future_idx - idx)  # Discount factor for future rewards
            if future_idx > idx:
                action_future = policy.get(data.iloc[future_idx]['State'], 'Buy')
                if action_future == 'Buy':
                    future_reward_buy += time_discount * (future_price - current_price)
                else:
                    future_reward_sell += time_discount * (current_price - future_price)

        # Total Q-values = Immediate reward + Future rewards
        reward_buy += future_reward_buy
        reward_sell += future_reward_sell

        q_values.append({
            'Day': row['Day'],
            'Buy_Q': reward_buy,
            'Sell_Q': reward_sell
        })

    return pd.DataFrame(q_values)


def compute_q_table(stock_data, policy):
    """
    Compute Q-table for all states in the stock data based on a given policy.

    Parameters:
    - stock_data: DataFrame containing stock data.
    - policy: Dictionary mapping states to actions.

    Returns:
    - DataFrame with Q-values and rewards for each state-action pair.
    """
    q_values = []
    for idx, row in stock_data.iterrows():
        state = row['State']
        current_price = row['Price']
        next_price = row['Next_Price'] if idx < len(stock_data) - 1 else current_price
        action = policy[state]

        # Immediate reward for the action
        immediate_reward = get_reward(action, current_price, next_price)

        # Append Q-values
        q_values.append({
            'State': state,
            'Action': action,
            'Immediate_Reward': immediate_reward
        })

    return pd.DataFrame(q_values)


def create_q_values_over_time_chart(data, q_values):
    """
    Generate the Q-values over time chart for the policy.

    Parameters:
    - data: Original stock data.
    - q_values: DataFrame with Q-values for Buy and Sell actions.

    Returns:
    - Plotly figure.
    """
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=q_values['Day'], y=q_values['Buy_Q'],
                             mode='lines+markers', name='Buy Q-Value', line=dict(color='green')))
    fig.add_trace(go.Scatter(x=q_values['Day'], y=q_values['Sell_Q'],
                             mode='lines+markers', name='Sell Q-Value', line=dict(color='red')))
    fig.update_layout(title='Q-Values Over Time',
                      xaxis_title='Day', yaxis_title='Q-Value', template='plotly')
    return fig



def derive_optimal_policy(q_values):
    """
    Derive the optimal policy based on the computed Q-values.

    Parameters:
    - q_values: DataFrame containing Q-values for each state-action pair.

    Returns:
    - Dictionary mapping states to the optimal action ('Buy' or 'Sell').
    """
    # Initialize the optimal policy dictionary
    optimal_policy = {}

    # Iterate through each row in the Q-values DataFrame
    for _, row in q_values.iterrows():
        state = row['Day']
        buy_q = row['Buy_Q']
        sell_q = row['Sell_Q']

        # Choose the action with the higher Q-value
        if buy_q > sell_q:
            optimal_policy[state] = 'Buy'
        else:
            optimal_policy[state] = 'Sell'

    return optimal_policy
def create_optimal_policy_rewards_chart(stock_data_optimal):
    """
    Generate a chart showing optimal policy actions and rewards.

    Parameters:
    - stock_data_optimal: DataFrame containing data for the optimal policy.

    Returns:
    - A Plotly Figure object.
    """
    buys = stock_data_optimal[stock_data_optimal['Action'] == 'Buy']
    sells = stock_data_optimal[stock_data_optimal['Action'] == 'Sell']

    fig = go.Figure()

    # Plot stock price
    fig.add_trace(go.Scatter(x=stock_data_optimal['Day'], y=stock_data_optimal['Price'], mode='lines', name='Stock Price', line=dict(color='black')))

    # Plot Buy and Sell actions
    fig.add_trace(go.Scatter(x=buys['Day'], y=buys['Price'], mode='markers', name='Optimal Buy', marker=dict(symbol='triangle-up', color='blue', size=10)))
    fig.add_trace(go.Scatter(x=sells['Day'], y=sells['Price'], mode='markers', name='Optimal Sell', marker=dict(symbol='triangle-down', color='orange', size=10)))

    # Plot Immediate Rewards
    fig.add_trace(go.Bar(x=stock_data_optimal['Day'], y=stock_data_optimal['Immediate_Reward'], name='Immediate Reward', marker_color='green', opacity=0.6))

    # Plot Cumulative Rewards
    fig.add_trace(go.Scatter(x=stock_data_optimal['Day'], y=stock_data_optimal['Cumulative_Reward'], mode='lines+markers', name='Cumulative Reward', line=dict(color='purple')))

    fig.update_layout(
        title='Optimal Policy Actions and Rewards',
        xaxis_title='Day',
        yaxis_title='Price / Reward',
        template='plotly'
    )
    return fig


In [33]:

# ==============================
# 6. Main Function
# ==============================

def main():
    days = 100
    start_price = 100
    threshold = 100
    actions = ['Buy', 'Sell']

    # Step 1: Generate Stock Prices
    stock_data = generate_stock_prices(days=days, start_price=start_price)
    fig_stock_prices = plot_stock_prices(stock_data)

    # Step 2: Define State Space
    stock_data = define_state_space(stock_data, threshold=threshold)
    fig_price_state = plot_price_state(stock_data)

    # Step 3: Create Random Policy
    states = list(stock_data['State'])
    random_policy = create_random_policy(states, actions)
    fig_random_policy = plot_random_policy(stock_data, random_policy)

    # Step 4: Compute Q-Values and Plot
    q_values = compute_q_values(stock_data, random_policy)
    fig_q_values = plot_q_values(q_values)

    
    # Step 5: Save All Charts to HTML
    figures = {
        "Stock Prices": fig_stock_prices,
        "Price State (Above/Below 100)": fig_price_state,
        "Random Policy Actions": fig_random_policy,
        "Q-Values Over Time": fig_q_values
    }
    
    # Step 5: Evaluate Random Policy and Add Rewards to Stock Data
    stock_data_random = evaluate_policy_with_rewards(stock_data, random_policy)
    total_reward_random = stock_data_random['Cumulative_Reward'].iloc[-1]
    print(f"Total Reward from Random Policy: {total_reward_random:.2f}")

    
    # Add rewards chart for random policy
    fig_rewards_random = create_rewards_chart_random_policy(stock_data_random)

    # Add to figures dictionary
    figures['Immediate and Cumulative Rewards (Random Policy)'] = fig_rewards_random

    
    # Step 1: Compute Q-values based on random policy
    q_values_random = compute_q_values(stock_data, random_policy)

    # Step 2: Derive optimal policy based on Q-values
    optimal_policy = derive_optimal_policy(q_values_random)

    # Step 3: Compute Q-values for optimal policy
    q_values_optimal = compute_q_values(stock_data, optimal_policy)

    # Step 4: Visualize charts
    figures["Q-Values Over Time (Random Policy)"]  = create_q_values_over_time_chart(stock_data, q_values_random)
    figures["Q-Values Over Time (Optimal Policy)"] = create_q_values_over_time_chart(stock_data, q_values_optimal)

    
    stock_data_optimal = evaluate_policy_with_rewards(stock_data, optimal_policy)
    total_reward_optimal = stock_data_optimal['Cumulative_Reward'].iloc[-1]
    print(f"Total Reward from Optimal Policy: {total_reward_optimal:.2f}")

    
    
   # Step 6: Generate Figures
    figures = {
        "Stock Prices": fig_stock_prices,
        "Random Policy Actions": fig_random_policy,
        "Q-Values Over Time":fig_q_values,
        "Optimal Policy Actions and Rewards": create_optimal_policy_rewards_chart(stock_data_optimal)
    }    
    
    
    create_and_save_html(figures, html_file='policy_comparison_visualization.html')


if __name__ == "__main__":
    main()

    





NameError: name 'get_reward' is not defined

# Q Learning

In [34]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
import webbrowser
import os

# Set Plotly to use the 'plotly' template by default
pio.templates.default = "plotly"

# ========================
# 1. Q-Learning Parameters
# ========================
alpha = 0.1        # Learning rate
gamma = 0.9        # Discount factor
epsilon = 1.0      # Exploration rate
epsilon_decay = 0.995
epsilon_min = 0.01
num_episodes = 100 # Number of training episodes

# ========================
# 2. Helper Functions
# ========================

def generate_stock_prices(days=100, seed=42):
    """
    Generate synthetic stock prices for a given number of days.
    
    Parameters:
    - days: Number of days to generate prices for.
    - seed: Random seed for reproducibility.
    
    Returns:
    - DataFrame with 'Day' and 'Price' columns.
    """
    np.random.seed(seed)
    prices = [100]  # Starting price
    for _ in range(1, days):
        change = np.random.randint(-2, 3)  # Price changes between -2 and +2
        new_price = prices[-1] + change
        new_price = max(new_price, 1)      # Ensure price doesn't go below 1
        prices.append(new_price)
    data = pd.DataFrame({
        'Day': range(1, days + 1),
        'Price': prices
    })
    return data

def define_states(data, threshold=100):
    """
    Define states based on whether the price is above or below the threshold.
    
    Parameters:
    - data: DataFrame containing 'Price'.
    - threshold: Price threshold to define states.
    
    Returns:
    - DataFrame with an added 'State' column.
    """
    data['State'] = data['Price'].apply(lambda x: 'Above' if x > threshold else 'Below')
    return data

def initialize_q_table(states, actions):
    """
    Initialize the Q-Table with zeros for all state-action pairs.
    
    Parameters:
    - states: List of possible states.
    - actions: List of possible actions.
    
    Returns:
    - Q-Table as a DataFrame.
    """
    q_table = pd.DataFrame(
        data=np.zeros((len(states), len(actions))),
        index=states,
        columns=actions
    )
    return q_table

def choose_action(state, q_table, epsilon, actions):
    """
    Choose an action using epsilon-greedy policy.
    
    Parameters:
    - state: Current state.
    - q_table: Current Q-Table.
    - epsilon: Current exploration rate.
    - actions: List of possible actions.
    
    Returns:
    - Chosen action.
    """
    if np.random.rand() < epsilon:
        return np.random.choice(actions)
    else:
        state_actions = q_table.loc[state]
        max_value = state_actions.max()
        # Handle multiple actions with the same max value
        actions_with_max = state_actions[state_actions == max_value].index.tolist()
        return np.random.choice(actions_with_max)

def update_q_table(q_table, state, action, reward, next_state, alpha, gamma):
    """
    Update the Q-Table based on the action taken and reward received.
    
    Parameters:
    - q_table: Current Q-Table.
    - state: Current state.
    - action: Action taken.
    - reward: Reward received.
    - next_state: Next state after action.
    - alpha: Learning rate.
    - gamma: Discount factor.
    
    Returns:
    - Updated Q-Table.
    """
    current_q = q_table.loc[state, action]
    max_future_q = q_table.loc[next_state].max()
    new_q = current_q + alpha * (reward + gamma * max_future_q - current_q)
    q_table.loc[state, action] = new_q
    return q_table

def get_reward(action, current_price, next_price):
    """
    Define the reward based on the action taken.
    
    Parameters:
    - action: Action taken ('Buy' or 'Sell').
    - current_price: Price before action.
    - next_price: Price after action.
    
    Returns:
    - Reward value.
    """
    if action == 'Buy':
        return next_price - current_price
    elif action == 'Sell':
        return current_price - next_price
    else:
        return 0

def evaluate_initial_random_policy(data, actions=['Buy', 'Sell']):
    """
    Evaluate the initial random policy by taking actions based on a fixed rule:
    Buy if above threshold, Sell if below.
    
    Parameters:
    - data: DataFrame containing 'Price' and 'State'.
    - actions: List of possible actions.
    
    Returns:
    - Total reward from the initial policy.
    - List of rewards per day.
    """
    rewards = []
    for t in range(len(data) - 1):
        state = data['State'].iloc[t]
        # Initial Policy: Buy if above, Sell if below
        if state == 'Above':
            action = 'Buy'
        else:
            action = 'Sell'
        current_price = data['Price'].iloc[t]
        next_price = data['Price'].iloc[t + 1]
        reward = get_reward(action, current_price, next_price)
        rewards.append(reward)
    rewards.append(0)  # No action on the last day
    total_reward = sum(rewards)
    data['Initial_Policy_Reward'] = rewards
    return total_reward, rewards

# ========================
# 3. Q-Learning Implementation
# ========================

def q_learning_trading():
    global epsilon  # To modify epsilon inside the function
    
    # Step 1: Generate Stock Prices for 100 Days
    stock_data = generate_stock_prices(days=100)
    
    # Step 2: Define States
    stock_data = define_states(stock_data)
    
    # Step 3: Define Actions and Initialize Q-Table
    actions = ['Buy', 'Sell']
    unique_states = ['Above', 'Below']
    q_table = initialize_q_table(unique_states, actions)
    
    # Capture Initial Q-Table
    initial_q_table = q_table.copy()
    
    # Step 4: Evaluate Initial Random Policy
    random_total_reward, random_rewards = evaluate_initial_random_policy(stock_data, actions)
    print(f"Initial Random Policy Total Reward: {random_total_reward}")
    
    # Step 5: Q-Learning Training
    rewards_per_episode = []
    q_table_snapshots = {}
    snapshot_interval = 10  # Save Q-table every 10 episodes
    
    for episode in range(1, num_episodes + 1):
        total_reward = 0
        for t in range(len(stock_data) - 1):
            state = stock_data['State'].iloc[t]
            action = choose_action(state, q_table, epsilon, actions)
            current_price = stock_data['Price'].iloc[t]
            next_price = stock_data['Price'].iloc[t + 1]
            reward = get_reward(action, current_price, next_price)
            next_state = stock_data['State'].iloc[t + 1]
            q_table = update_q_table(q_table, state, action, reward, next_state, alpha, gamma)
            total_reward += reward
        rewards_per_episode.append(total_reward)
        
        # Decay epsilon
        if epsilon > epsilon_min:
            epsilon *= epsilon_decay
        
        # Save Q-table snapshot at specified intervals
        if episode % snapshot_interval == 0 or episode == num_episodes:
            q_table_snapshots[episode] = q_table.copy()
        
        # Optional: Print progress every 10 episodes
        if episode % snapshot_interval == 0 or episode == 1:
            print(f"Episode {episode}/{num_episodes}, Total Reward: {total_reward}, Epsilon: {epsilon:.4f}")
    
    # Step 6: Plotting and Visualization
    def create_plots(stock_data, initial_q_table, q_table_snapshots, rewards_per_episode, random_rewards, html_file='q_learning_trading.html'):
        """
        Create Plotly charts, save them to an HTML file, and open the file.
        """
        html_content = """
        <html>
        <head>
            <title>Q-Learning Trading Example</title>
            <meta charset="utf-8" />
        </head>
        <body>
            <h1>Q-Learning Trading Example</h1>
        """
        
        # 1. Plot Stock Prices
        fig1 = go.Figure()
        fig1.add_trace(go.Scatter(x=stock_data['Day'], y=stock_data['Price'], mode='lines+markers', name='Stock Price'))
        fig1.update_layout(title='Stock Prices Over Days', xaxis_title='Day', yaxis_title='Price')
        html_content += pio.to_html(fig1, full_html=False, include_plotlyjs='cdn')
        
        # 2. Plot Initial Random Policy Rewards
        fig2 = go.Figure()
        fig2.add_trace(go.Bar(x=stock_data['Day'], y=stock_data['Initial_Policy_Reward'], name='Initial Policy Reward', marker_color='grey'))
        fig2.update_layout(title='Initial Random Policy Rewards', xaxis_title='Day', yaxis_title='Reward')
        html_content += pio.to_html(fig2, full_html=False, include_plotlyjs=False)
        
        # 3. Plot Total Rewards Over Episodes
        fig3 = go.Figure()
        fig3.add_trace(go.Scatter(x=list(range(1, len(rewards_per_episode) + 1)), y=rewards_per_episode,
                                  mode='lines+markers',
                                  name='Total Reward per Episode',
                                  line=dict(color='orange')))
        fig3.update_layout(title='Total Rewards Over Episodes', xaxis_title='Episode', yaxis_title='Total Reward')
        html_content += pio.to_html(fig3, full_html=False, include_plotlyjs=False)
        
        # 4. Plot Q-Table Snapshots
        fig4 = go.Figure()
        for episode, snapshot in q_table_snapshots.items():
            for state in snapshot.index:
                fig4.add_trace(go.Bar(
                    name=f'Episode {episode} - {state}',
                    x=snapshot.columns,
                    y=snapshot.loc[state],
                    text=[f"{val:.2f}" for val in snapshot.loc[state]],
                    textposition='auto'
                ))
        fig4.update_layout(title='Q-Table Snapshots at Selected Episodes',
                           xaxis_title='Actions',
                           yaxis_title='Q-Value',
                           barmode='group')
        html_content += pio.to_html(fig4, full_html=False, include_plotlyjs=False)
        
        # Close HTML content
        html_content += """
        </body>
        </html>
        """
        
        # Write HTML content to file
        with open(html_file, 'w') as f:
            f.write(html_content)
        
        # Open the HTML file in the default web browser
        file_path = os.path.abspath(html_file)
        webbrowser.open(f'file://{file_path}')
    
    # Create and open plots
    create_plots(stock_data, initial_q_table, q_table_snapshots, rewards_per_episode, random_rewards)
    
    # Step 7: Display Final Q-Table
    print("\n### Final Q-Table ###\n")
    print(q_table)
    
    print(f"\n### Initial Random Policy Total Reward: {random_total_reward} ###\n")
    
    # ========================
    # 4. Run the Q-Learning Trading
    # ========================

if __name__ == "__main__":
    q_learning_trading()


Initial Random Policy Total Reward: 7
Episode 1/100, Total Reward: -19, Epsilon: 0.9950
Episode 10/100, Total Reward: 7, Epsilon: 0.9511
Episode 20/100, Total Reward: -9, Epsilon: 0.9046
Episode 30/100, Total Reward: 9, Epsilon: 0.8604
Episode 40/100, Total Reward: -5, Epsilon: 0.8183
Episode 50/100, Total Reward: -19, Epsilon: 0.7783
Episode 60/100, Total Reward: -7, Epsilon: 0.7403
Episode 70/100, Total Reward: 15, Epsilon: 0.7041
Episode 80/100, Total Reward: -5, Epsilon: 0.6696
Episode 90/100, Total Reward: -7, Epsilon: 0.6369
Episode 100/100, Total Reward: 15, Epsilon: 0.6058

### Final Q-Table ###

            Buy      Sell
Above  0.980729  1.347416
Below  2.057673  0.057781

### Initial Random Policy Total Reward: 7 ###



In [35]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
import webbrowser
import os

# Set Plotly to use the 'plotly' template by default
pio.templates.default = "plotly"

# ========================
# 1. Q-Learning Parameters
# ========================
alpha = 0.1       # Learning rate
gamma = 0.9       # Discount factor
epsilon = 1.0     # Exploration rate
epsilon_decay = 0.995
epsilon_min = 0.01
num_episodes = 1500  # Number of training episodes

# ========================
# 2. Helper Functions
# ========================

def generate_stock_prices(days=100, seed=None):
    """
    Generate synthetic stock prices for a given number of days.
    """
    if seed is not None:
        np.random.seed(seed)
    prices = [100]  # Starting price
    for _ in range(1, days):
        change = np.random.randint(-5, 6)  # Price changes between -5 and +5
        new_price = prices[-1] + change
        new_price = max(new_price, 1)  # Ensure price doesn't go below 1
        prices.append(new_price)
    data = pd.DataFrame({'Day': range(1, days + 1), 'Price': prices})
    return data

def define_states(data, threshold=100):
    """
    Define states based on whether the price is above or below the threshold.
    """
    data['State'] = data['Price'].apply(lambda x: 'Above' if x > threshold else 'Below')
    return data

def initialize_q_table(states, actions):
    """
    Initialize the Q-Table with zeros for all state-action pairs.
    """
    q_table = pd.DataFrame(np.zeros((len(states), len(actions))), index=states, columns=actions)
    return q_table

def choose_action(state, q_table, epsilon, actions):
    """
    Choose an action using epsilon-greedy policy.
    """
    if np.random.rand() < epsilon:
        return np.random.choice(actions)
    else:
        state_actions = q_table.loc[state]
        max_value = state_actions.max()
        actions_with_max = state_actions[state_actions == max_value].index.tolist()
        return np.random.choice(actions_with_max)

def update_q_table(q_table, state, action, reward, next_state, alpha, gamma):
    """
    Update the Q-Table based on the action taken and reward received.
    """
    current_q = q_table.loc[state, action]
    max_future_q = q_table.loc[next_state].max()
    new_q = current_q + alpha * (reward + gamma * max_future_q - current_q)
    q_table.loc[state, action] = new_q
    return q_table

def get_reward(action, current_price, next_price):
    """
    Define the reward based on the action taken.
    """
    if action == 'Buy':
        return next_price - current_price
    elif action == 'Sell':
        return current_price - next_price
    else:
        return 0

# ========================
# 3. Q-Learning Implementation
# ========================

def q_learning_trading():
    global epsilon

    # Step 1: Define Actions
    actions = ['Buy', 'Sell']

    # Step 2: Initialize Q-Table
    unique_states = ['Above', 'Below']
    q_table = initialize_q_table(unique_states, actions)

    # Step 3: Track Learning Progress
    rewards_per_episode = []
    average_q_values = []  # To track average Q-values per episode
    q_table_snapshots = {}
    state_action_counts = {state: {action: 0 for action in actions} for state in unique_states}

    # Define episodes for capturing snapshots
    snapshot_episodes = [10, 20, 30, 40, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500]

    # Step 4: Training Loop
    for episode in range(num_episodes):
        total_reward = 0
        stock_data = generate_stock_prices(days=20, seed=episode)
        stock_data = define_states(stock_data)

        for t in range(len(stock_data) - 1):
            state = stock_data['State'].iloc[t]
            action = choose_action(state, q_table, epsilon, actions)
            current_price = stock_data['Price'].iloc[t]
            next_price = stock_data['Price'].iloc[t + 1]
            reward = get_reward(action, current_price, next_price)
            next_state = stock_data['State'].iloc[t + 1]
            q_table = update_q_table(q_table, state, action, reward, next_state, alpha, gamma)
            total_reward += reward
            state_action_counts[state][action] += 1

        # Decay epsilon
        if epsilon > epsilon_min:
            epsilon *= epsilon_decay
        rewards_per_episode.append(total_reward)

        # Track average Q-values
        average_q_values.append(q_table.values.mean())

        if (episode + 1) in snapshot_episodes:
            q_table_snapshots[episode + 1] = q_table.copy()

        if (episode + 1) % 10 == 0:
            print(f"Episode {episode + 1}/{num_episodes}, Total Reward: {total_reward}, Epsilon: {epsilon:.4f}")

    # Step 5: Visualize Results
    def create_plots(q_table_snapshots, rewards_per_episode, average_q_values, state_action_counts, html_file='q_learning_trading.html'):
        """
        Create Plotly charts, save to an HTML file, and open in a browser.
        """
        html_content = """
        <html>
        <head>
            <title>Q-Learning Trading Example</title>
            <meta charset="utf-8" />
        </head>
        <body>
            <h1>Q-Learning Trading Example</h1>
        """

        # 1. Learning Progress: Total Rewards per Episode
        fig1 = go.Figure()
        fig1.add_trace(go.Scatter(x=list(range(1, len(rewards_per_episode) + 1)), y=rewards_per_episode,
                                  mode='lines+markers',
                                  name='Total Reward per Episode',
                                  line=dict(color='orange')))
        fig1.update_layout(title='Learning Progress: Total Rewards per Episode',
                           xaxis_title='Episode',
                           yaxis_title='Total Reward')
        html_content += pio.to_html(fig1, full_html=False, include_plotlyjs='cdn')

        # 2. Average Q-Values per Episode
        fig2 = go.Figure()
        fig2.add_trace(go.Scatter(x=list(range(1, len(average_q_values) + 1)), y=average_q_values,
                                  mode='lines+markers',
                                  name='Average Q-Value per Episode',
                                  line=dict(color='blue')))
        fig2.update_layout(title='Learning Progress: Average Q-Values per Episode',
                           xaxis_title='Episode',
                           yaxis_title='Average Q-Value')
        html_content += pio.to_html(fig2, full_html=False, include_plotlyjs=False)

        # 3. Combined Q-Table Values Across Episodes
        fig3 = go.Figure()
        for episode, snapshot in q_table_snapshots.items():
            for state in snapshot.index:
                fig3.add_trace(go.Bar(
                    name=f'Episode {episode} - {state}',
                    x=snapshot.columns,
                    y=snapshot.loc[state],
                    text=[f"{val:.2f}" for val in snapshot.loc[state]],
                    textposition='auto'
                ))
        fig3.update_layout(
            title='Q-Table Values Across Episodes',
            xaxis_title='Actions',
            yaxis_title='Q-Value',
            barmode='group'
        )
        html_content += pio.to_html(fig3, full_html=False, include_plotlyjs=False)

        # 4. State-Action Frequency
        fig4 = go.Figure()
        for state, action_counts in state_action_counts.items():
            fig4.add_trace(go.Bar(
                name=state,
                x=list(action_counts.keys()),
                y=list(action_counts.values())
            ))
        fig4.update_layout(
            title='State-Action Frequency During Training',
            xaxis_title='Actions',
            yaxis_title='Frequency',
            barmode='group'
        )
        html_content += pio.to_html(fig4, full_html=False, include_plotlyjs=False)

        # 5. Rewards Distribution
        fig5 = go.Figure()
        fig5.add_trace(go.Histogram(x=rewards_per_episode, nbinsx=10, marker=dict(color='purple')))
        fig5.update_layout(
            title='Rewards Distribution Across Episodes',
            xaxis_title='Total Reward',
            yaxis_title='Frequency'
        )
        html_content += pio.to_html(fig5, full_html=False, include_plotlyjs=False)

        # Close HTML content
        html_content += """
        </body>
        </html>
        """

        # Save to HTML and open in browser
        with open(html_file, 'w') as f:
            f.write(html_content)
        webbrowser.open(f'file://{os.path.abspath(html_file)}')

    # Create and display plots
    create_plots(q_table_snapshots, rewards_per_episode, average_q_values, state_action_counts)

# Run the Q-Learning Trading Simulation
if __name__ == "__main__":
    q_learning_trading()


Episode 10/1500, Total Reward: -16, Epsilon: 0.9511
Episode 20/1500, Total Reward: 14, Epsilon: 0.9046
Episode 30/1500, Total Reward: -5, Epsilon: 0.8604
Episode 40/1500, Total Reward: -4, Epsilon: 0.8183
Episode 50/1500, Total Reward: 7, Epsilon: 0.7783
Episode 60/1500, Total Reward: -2, Epsilon: 0.7403
Episode 70/1500, Total Reward: 7, Epsilon: 0.7041
Episode 80/1500, Total Reward: 22, Epsilon: 0.6696
Episode 90/1500, Total Reward: 11, Epsilon: 0.6369
Episode 100/1500, Total Reward: -8, Epsilon: 0.6058
Episode 110/1500, Total Reward: 27, Epsilon: 0.5762
Episode 120/1500, Total Reward: -47, Epsilon: 0.5480
Episode 130/1500, Total Reward: -14, Epsilon: 0.5212
Episode 140/1500, Total Reward: 5, Epsilon: 0.4957
Episode 150/1500, Total Reward: 15, Epsilon: 0.4715
Episode 160/1500, Total Reward: -9, Epsilon: 0.4484
Episode 170/1500, Total Reward: -19, Epsilon: 0.4265
Episode 180/1500, Total Reward: -14, Epsilon: 0.4057
Episode 190/1500, Total Reward: 12, Epsilon: 0.3858
Episode 200/1500, T