In [26]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import os
import csv

import yfinance as yf
from datetime import datetime, timedelta

from scipy.fft import fft
from prophet import Prophet
from prophet.serialize import model_to_json

from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score, \
    explained_variance_score, accuracy_score, precision_score


In [27]:
def get_delimiter(file_path, bytes=4096):
    """
    Retrieves the delimiter of a csv file.
    Args:
        file_path: path to csv file to read
        bytes: n bytes to read to detect the delimiter (higher is more guaranteed accuracy)

    Returns:
        delimiter: delimiter of the csv file located in the given path

    """
    sniffer = csv.Sniffer()
    data = open(file_path, "r").read(bytes)
    delimiter = sniffer.sniff(data).delimiter
    return delimiter

In [28]:
def read_csvs_from_folder(folder_path, bytes=4096, list=False):
    """
    Reads all CSV files in a folder and stores them as pandas DataFrames in a list.
    Automatically detects the delimiter for each CSV file.

    Args:
        folder_path (str): Path to the folder containing CSV files.
        bytes (int): Number of bytes to read from each file for delimiter detection.

    Returns:
        Concatenated pandas DataFrame containing each CSV file in the folder.
    """
    # Get a list of all CSV files in the folder
    csv_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv')]

    # Read each CSV file into a pandas DataFrame and store them in a list
    dataframes = []
    for file in csv_files:
        delimiter = get_delimiter(file, bytes)
        df = pd.read_csv(file, delimiter=delimiter)
        dataframes.append(df)

    if not list:
        dataframes = pd.concat(dataframes, axis=0, ignore_index=True)
    return dataframes


In [29]:
def query_yf(stock: tuple, period: int) -> pd.DataFrame:
    """
    Query the Yahoo Finance API for a given stock and store the data.

    :param stock: tuple
        A tuple containing three elements:
        - company (str): The name of the company.
        - ticker (str): The stock ticker symbol of the company.
        - exchange (str): The stock exchange where the company is listed.
    :param period: int
        The number of days of data to retrieve.

    :return: data: a pandas dataframe of the data
    """
    try:
        # Extract company, ticker, and exchange from the stock tuple
        company, ticker, exchange = stock

        yesterday = datetime.now() - timedelta(days=period)
        today = datetime.now()

        yesterday_str = yesterday.strftime('%Y-%m-%d')
        today_str = today.strftime('%Y-%m-%d')

        # Query the API using yfinance
        stock_data = yf.Ticker(ticker)
        data = stock_data.history(period='1d', start=yesterday_str, end=today_str)

        # Add the company name to the dataframe
        data['Company'] = company
        data['Ticker'] = ticker
        data['Exchange'] = exchange

        # Reorder the columns
        data = data[['Company', 'Ticker', 'Exchange', 'Open', 'High', 'Low', 'Close', 'Volume']]

        # Convert DataFrame index to a 'DatetimeIndex' without time zone information
        data.index = data.index.tz_localize(None)  # this is necessary for some algorithms

        return data
    except Exception as e:
        print(f"Error retrieving data: {e}")
        return None

In [30]:
def query_yf_list(stocks: pd.DataFrame, period: int) -> pd.DataFrame:
    """
    Query the Yahoo Finance API for all stocks in a list based on their exchange.

    :param stocks: list
        A list of tuples containing three elements:
        - company (str): The name of the company.
        - ticker (str): The stock ticker symbol of the company.
        - exchange (str): The stock exchange where the company is listed.
    :param period: int
        The number of days of data to retrieve.

    :return: data: a list of pandas dataframes of the data
    """
    # Create an empty list
    data = []

    # Loop through the list of stocks
    for _, row in stocks.iterrows():
        stock = (row['Company'], row['Ticker'], row['Stock Exchange'])

        # Query the API using yfinance
        stock_data = query_yf(stock, period)

        # Append the data to the dataframe
        if stock_data is not None:
            data.append(stock_data)

    return data

In [31]:
def save_stocks_to_csv(data_list: [pd.DataFrame], folder_path: str = "stock_data"):
    """
    Save a list of stock dataframes to a folder, each as a separate CSV file {company_name} data.csv.
    :param data_list: list of dataframes to save
    :param folder_path: folder path to save csvs to
    """
    try:
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)
        for df in data_list:
            df.reset_index(inplace=True)
            company = df.iloc[0]['Company']
            df.to_csv(os.path.join(folder_path, f"{company} data.csv"), index=None)
    except Exception as e:
        print(f"Error saving data: {e}")


### Data Processing

In [32]:
def day_average_std(df: pd.DataFrame, columns: [str] = ['Close', 'Open', 'High', 'Low', 'Volume'], days: int = 30, company_name: str = None) -> pd.DataFrame:
    """
    Compute the n-day average and standard deviation of the stock price.
    :param column: column to average
    :param df: dataframe of stock data
    :param days: number of days to average
    :return: dataframe of stock data with added column for an n day average
    """
    for column in columns:
        if company_name is not None:
            column_title = f"{company_name}-{column} Day Avg"
        else:
            column_title = f"{column} Day Avg"

        df[column_title] = df[column].rolling(days).mean().shift(1).astype('float32')  # shift to not include today
        day_mean = df[column].iloc[0:days+1].mean().astype('float32')
        df[column_title] = df[column_title].fillna(day_mean)

        # now add the standard deviation
        if company_name is not None:
            column_title = f"{company_name}-{column} Day Std"
        else:
            column_title = f"{column} Day Std"
        df[column_title] = df[column].rolling(days).std().shift(1).astype('float32')  # shift to not include today
        day_std = df[column].iloc[0:days+1].std()
        df[column_title] = df[column_title].fillna(day_std)

    return df

In [33]:
def overall_averages(data_list: [pd.DataFrame]) -> pd.DataFrame:
    """
    Compute the average values for each column for all stocks in the list,
    corresponding to the previous day's averages.
    :param data_list: list of dataframes of individual stock data.
    :return: dataframe of overall averages.
    """
    concatenated_df = pd.concat(data_list)
    concatenated_df.drop(columns=['Company', 'Ticker', 'Exchange'], inplace=True)
    concatenated_df.set_index('Date', inplace=True)

    # Calculate daily averages
    daily_averages = concatenated_df.groupby(concatenated_df.index).mean().astype('float32')

    # Shift the averages to represent the previous day
    daily_averages = daily_averages.shift(1).astype('float32')

    # Rename columns to reflect they are lagged values
    daily_averages.columns = ['All Prev Day Avg ' + col for col in daily_averages.columns]
    if 'index' in daily_averages.columns:
        daily_averages.drop(columns=['All Prev Day Avg index'], inplace=True)

    return daily_averages


In [34]:
def exchange_averages(data_list: [pd.DataFrame]) -> [pd.DataFrame]:
    """
    Compute the average values for each column for all stocks in the exchange,
    corresponding to the previous day's averages.
    :param data_list: list of dataframes of individual stock data.
    :return: dictionary of dataframes of exchange averages.
    """
    concatenated_df = pd.concat(data_list)

    # Group by 'Date' and 'Exchange'
    grouped = concatenated_df.groupby(['Date', 'Exchange'])

    # Calculate the average of the required columns
    averages = grouped[['Open', 'High', 'Low', 'Close', 'Volume']].mean().astype('float32')

    # Shift the averages to represent the previous day
    averages = averages.groupby(level='Exchange').shift(1).astype('float32')

    # Rename columns to reflect they are lagged values
    averages.columns = ['Exchange Prev Day Avg ' + col for col in averages.columns]

    # Create a dictionary of dataframes for each exchange
    exchange_dfs = {exchange: df.reset_index() for exchange, df in averages.groupby(level='Exchange')}

    return exchange_dfs


In [35]:
def add_exchange_averages_to_stocks(data_list: [pd.DataFrame], exchange_dfs: dict) -> [pd.DataFrame]:
    """
    Add exchange averages to each stock in the data list.

    :param data_list: list of dataframes of individual stock data.
    :param exchange_dfs: dictionary of dataframes containing exchange averages.
    :return: list of dataframes with added exchange average columns.
    """
    updated_data_list = []

    for stock_df in data_list:
        exchange_name = stock_df['Exchange'].iloc[0]

        # Get the corresponding exchange average dataframe
        exchange_avg_df = exchange_dfs.get(exchange_name, None)

        updated_df = pd.merge(stock_df, exchange_avg_df, how='left', on=['Date', 'Exchange'])
        updated_data_list.append(updated_df)

    return updated_data_list

In [36]:
def add_overall_averages_to_stocks(data_list: [pd.DataFrame], average_dfs: [pd.DataFrame]):
    """
    Add overall averages to each stock in the data list.

    :param data_list: list of dataframes of individual stock data.
    :param average_dfs: list of dataframes containing overall averages.
    :return: list of dataframes with added overall average columns.
    """
    updated_data_list = []

    for stock_df in data_list:
        updated_df = pd.merge(stock_df, average_dfs, how='left', on='Date')
        updated_data_list.append(updated_df)

    return updated_data_list

In [37]:
def detect_seasonality(df: pd.DataFrame, max_period: int = 365) -> (int, int, int):
    """
    Detect the seasonalities in a time series using the Fourier transform.
    :param df: Dataframe of stock data
    :param max_period: the max period to consider for seasonalities
    :return: the top three seasonalities for the 'Close' column
    """

    num_seasonalities = 3
    tolerance = 0.05

    # Perform the Fourier transform on the 'Close' column
    close_array = df['Close'].to_numpy()
    close_fft = fft(close_array)
    frequencies = np.fft.fftfreq(len(close_fft), d=1)  # d is the spacing between samples, which is 1 trading day
    power_spectrum = np.abs(close_fft) ** 2

    # Exclude the zero frequency term for analysis
    positive_frequencies = frequencies[frequencies > 0]
    positive_power_spectrum = power_spectrum[frequencies > 0]

    # Find all peaks in the frequency spectrum sorted by power
    peaks_indices = np.argsort(positive_power_spectrum)[::-1]  # Sort in descending order of power
    peak_frequencies = positive_frequencies[peaks_indices]
    peaks_periods = 1 / peak_frequencies

    # Filter to find peaks corresponding to periods under the specified max_period
    filtered_peaks_periods = peaks_periods[peaks_periods < max_period]

    # Select the top three seasonalities, excluding harmonics
    likely_seasonalities = []
    for period in filtered_peaks_periods:
        if not any(abs(period - p) < tolerance for p in likely_seasonalities):
            likely_seasonalities.append(period)
        if len(likely_seasonalities) == num_seasonalities:
            break

    # Return the likley seasonalities rounded to integers
    return np.round(likely_seasonalities).astype(int)

### Evaluation

In [38]:
def calculate_standard_metrics(truth_series: pd.Series, predicted_series: pd.Series) -> (float, float, float, float, float, float):
    """
    Calculate standard metrics for evaluating a trading strategy.
    :param truth_series: series of actual prices
    :param predicted_series: series of predicted prices
    :return: RMSE, MAE, MAPE, Rsq, Explained Variance
    """
    rmse = mean_squared_error(truth_series, predicted_series, squared=False)
    mae = mean_absolute_error(truth_series, predicted_series)
    mape = mean_absolute_percentage_error(truth_series, predicted_series)
    rsq = r2_score(truth_series, predicted_series)
    ev = explained_variance_score(truth_series, predicted_series)
    return rmse, mae, mape, rsq, ev

In [39]:
def direction_evaluation(df: pd.DataFrame, truth_column: str, predicted_column: str) -> pd.DataFrame:
    """
    Add a column to the dataframe indicating whether the predicted direction (up or down) matches the actual subsequent movement
    from the previous day's price to today's price.

    :param df: dataframe of data with actual and predicted price columns
    :param truth_column: string for the title of the truth column (actual price)
    :param predicted_column: string for the title of the predicted price column
    :return: dataframe with an additional column indicating if the predicted direction is correct
    """
    df = df.copy()
    # Determine the actual direction by comparing today's actual price to yesterday's actual price
    df['Actual Direction'] = np.where(df[truth_column] > df[truth_column].shift(1), 'Up', 'Down')
    # Determine the predicted direction by comparing today's predicted price to yesterday's actual price
    df['Predicted Direction'] = np.where(df[predicted_column] > df[truth_column].shift(1), 'Up', 'Down')
    # Compare the actual direction to the predicted direction
    df['Direction Correct'] = np.where(df['Actual Direction'] == df['Predicted Direction'], 'Correct', 'Incorrect')
    return df

In [40]:
def calculate_accuracy(df: pd.DataFrame, truth_column: str = 'Close', predicted_column: str = 'Predicted Close') -> (float, float):
    """
    Calculate the accuracy of a trading strategy by comparing the predicted direction to the actual direction.
    :param df: dataframe of data with actual and predicted price columns
    :param truth_column: column containing the actual price
    :param predicted_column: column containing the predicted price
    :return: accuracy, precision
    """
    df = direction_evaluation(df, truth_column, predicted_column)
    accuracy = accuracy_score(df['Actual Direction'], df['Predicted Direction'])
    precision = precision_score(df['Actual Direction'], df['Predicted Direction'], pos_label='Up')
    return accuracy, precision

In [41]:
def simulate_trading(df: pd.DataFrame, initial_funds: int = 10000, truth_column: str = 'Close', predicted_column: str = 'Predicted Close') -> (float, float):
    """
    Simulate trading based on predictions to calculate profitability and average profit per trade.

    :param df: dataframe of data with actual and predicted price columns
    :param initial_funds: initial investment amount
    :param truth_column: string for the title of the truth column (actual price)
    :param predicted_column: string for the title of the predicted price column
    :return: net gain or loss from trading strategy, average profit per trade
    """
    funds = initial_funds
    shares = 0
    trades = []

    df = df.copy()
    df['Predicted Tomorrow'] = df[predicted_column].shift(-1)  # shift predictions to align with the day they are for

    for i in range(len(df) - 1):  # minus 1 because the last day's prediction is for a day outside of our dataframe
        today_price = df[truth_column].iloc[i]
        predicted_tomorrow_price = df['Predicted Tomorrow'].iloc[i]

        if not np.isnan(predicted_tomorrow_price) or not np.isnan(today_price):
            # If the predicted price for tomorrow is higher than today's price, buy
            if predicted_tomorrow_price > today_price:
                shares_bought = funds // today_price
                funds -= shares_bought * today_price
                shares += shares_bought

            # The next day, sell all shares if any were bought
            if shares > 0:
                next_day_price = df[truth_column].iloc[i + 1]
                trade_profit = shares * (next_day_price - today_price)  # calculate profit for this trade
                funds += shares * next_day_price
                shares = 0  # reset shares to 0 after selling
                trades.append(trade_profit)  # keep track of the profit from this trade

    net_gain = funds - initial_funds
    if shares != 0:
        funds += shares * df[truth_column].iloc[-1]
    if not trades:
        average_profit = 0
    else:
        average_profit = sum(trades) / len(trades)
    net_gain_percent = net_gain / initial_funds * 100

    return net_gain, average_profit, net_gain_percent

Comparison to S&P 500 Uranium ETF

In [42]:
def compare_to_ETF(urnm: pd.DataFrame, df: pd.DataFrame = None, window: int = 100, initial_funds: int = 10000):
    """
    Compare the profitability of a trading strategy to the profitability of investing in the S&P 500 URNM ETF.
    :param urnm: dataframe of URNM data
    :param df: dataframe of data with actual and predicted price columns for a stock
    :param truth_column: column containing the actual price
    :param predicted_column: column containing the predicted price
    :return: metrics of using the ETF as a trading strategy

    Note this can be done in two ways:
        1. Investing into the ETF and holding.
        2. Using the ETF as a trading strategy.
    This function will do both using the historical data of the ETF.
    """
    urnm_copy = urnm.copy()
    if df is not None:
        start_date = df['Date'].iloc[0]
        end_date = df['Date'].iloc[-1]
        urnm = urnm[(urnm['Date'] >= start_date) & (urnm['Date'] <= end_date)]
    else:
        start_date = urnm['Date'].iloc[-window]
        end_date = urnm['Date'].iloc[-1]

    # option one, invest and Hold
    start_price = urnm['Close'].iloc[0]
    shares = initial_funds // start_price

    end_price = urnm['Close'].iloc[-1]
    funds = shares * end_price

    hold_net_gain = funds - initial_funds
    net_gain_percent = hold_net_gain / initial_funds * 100

    return hold_net_gain, net_gain_percent


### Running Prophet

# Start Here to Run Prophet
Run all of the above cells.
Then follow the comments for the Load or Query data section.


### Load or Query Data:
To load or query data, you will need to upload either the given stock list file or the stock_data folder.

In [None]:
# run this cell for querying using the stock list file
stocks_list = pd.read_csv('../Uranium Company Master List.csv')
periods = 365*5 # 3 years, however, some stocks only have 2022 to the present (was 5 years, need to conserve memory on my machine)
data_list = query_yf_list(stocks_list, periods)
for data in data_list:
    data.reset_index(inplace=True)  # must have an integer index for Prophet

Note: Output from the above cell may contain warnings about missing data. This is expected as the listings of companies change.

In [19]:
# run this cell for loading data from a folder mounted to your environment (optional)
data_folder = 'stock_data'
data_list = read_csvs_from_folder(data_folder, list=True)
urnm_data_folder = 'stock_data/SP Uranium ETF'
urnm_data = read_csvs_from_folder(urnm_data_folder, list=False)


In [19]:
# run this to query data for the SP 500 Uranium ETF
stock = "Sprott Uranium Miners ETF", "URNM", "XNYS"
period = 365*5
urnm_data = query_yf(stock, period)
urnm_data.reset_index(inplace=True)


In [20]:
# run this cell to save the data to a folder (optional)
save_stocks_to_csv(data_list, folder_path='stock_data')
urnm_data.to_csv(os.path.join("stock_data/SP Uranium ETF", "Sprott Uranium Miners ETF data.csv"), index='Date')


### Preprocessing and Running Prophet

In [None]:
def run_prophet(df: pd.DataFrame, custom_season: int = None, include_exchange_avgs = False, include_overall_avgs = False, save_model = False) -> [dict]:
    """
    Runs the Prophet model on a given DataFrame to predict future stock prices.

    This function preprocesses the input DataFrame, splits it into training and testing datasets,
    builds and fits a Prophet model with optional custom seasonality, and evaluates the model's predictions.
    It returns a dictionary containing various performance metrics.

    Parameters:
    - df (pd.DataFrame): The input DataFrame containing stock market data. It must include the columns:
      'Date', 'Company', 'Ticker', 'Exchange', 'Open', 'High', 'Low', 'Close', and 'Volume'.
    - custom_season (int, optional): The number of days for the custom seasonality period.
      If None, no custom seasonality is added. Default is None.

    Returns:
    - dict: A dictionary containing the following keys and their corresponding values:
      - 'RMSE' (float): Root Mean Squared Error of the model's predictions.
      - 'MAE' (float): Mean Absolute Error of the model's predictions.
      - 'MAPE' (float): Mean Absolute Percentage Error of the model's predictions.
      - 'RSQ' (float): R-squared value indicating the goodness of fit.
      - 'Accuracy' (float): Accuracy of the model's directional predictions.
      - 'Precision' (float): Precision of the model's directional predictions.
      - 'Net Gain' (float): Net gain from simulated trading based on model predictions.
      - 'Avg Profit' (float): Average profit per trade from simulated trading.
      - 'Net Gain Percent' (float): Net gain percentage from simulated trading.

    The function internally uses the Prophet model for time series forecasting and several custom functions
    for preprocessing and evaluation:
    - `day_average_std` for adding day average feature,
    - `calculate_standard_metrics` for basic evaluation metrics,
    - `direction_evaluation` for evaluating the direction of change,
    - `calculate_accuracy` for accuracy and precision calculation,
    - `simulate_trading` for simulating trading based on model predictions.

    Example:
    ```python
    import pandas as pd
    df = pd.read_csv('stock_data.csv')
    results = run_prophet(df)
    print(results)
    ```

    Note:
    The function assumes that 'Close' column in the input DataFrame is the target variable for prediction,
    and the next day's closing price is predicted. The function also modifies the input DataFrame by adding
    new columns and dropping unnecessary ones. Ensure that the DataFrame is in the correct format before calling this function.
    """

    # preprocessing
    df = df.copy()
    company = df['Company'].iloc[0]
    df['Date'] = pd.to_datetime(df['Date'])
    df['y'] = df['Close'].shift(-1)
    df['ds'] = df['Date']
    df = day_average_std(df)
    df = df.drop(columns=['Date', 'Company', 'Ticker', 'Exchange', 'Close'], inplace=False)

    split = round(len(df) * 0.8)

    train_df = df.iloc[:split-100]
    test_df = df.iloc[split-100:]

    # model
    model = Prophet(daily_seasonality=True, weekly_seasonality=False, yearly_seasonality=False)
    regressors = ['Open', 'High', 'Low', 'Volume', ] # , 'Exchange Close', 'Exchange Volume']
    if include_exchange_avgs is True:
        regressors += ['Exchange Prev Day Avg Open', 'Exchange Prev Day Avg High', 'Exchange Prev Day Avg Low', 'Exchange Prev Day Avg Close', 'Exchange Prev Day Avg Volume']
    if include_overall_avgs is True:
        regressors += ['All Prev Day Avg Open', 'All Prev Day Avg High', 'All Prev Day Avg Low', 'All Prev Day Avg Close', 'All Prev Day Avg Volume']
    for regressor in regressors:
        regressors.append(f'{regressor} Day Avg')
        regressors.append(f'{regressor} Day Std')
    regressors += ['Close Day Avg', 'Close Day Std']
    for regressor in regressors:
        model.add_regressor(regressor)

    if custom_season is not None:
        model.add_seasonality(name='custom_season', period=custom_season, fourier_order=5)

    model.fit(train_df)

    event_horizons = [5, 10, 20, 50, 100]
    metrics_dicts = []

    for horizon in event_horizons:
        if horizon > len(test_df['y'].values):
            print("Warning: Event horizon is larger than the test set size. Default test set size is 20% of the " +
                  f"dataset. Current horizon is: {len(test_df['y'].values)}.")
        else:
            test_horizon_df = test_df.iloc[:horizon]

        future = test_horizon_df.drop('y', axis=1)

        forecast = model.predict(future)
        test_horizon_df['Predicted Close'] = forecast['yhat'].values

        # evaluation
        true_series = test_horizon_df['y'].iloc[:-1]
        pred_series = test_horizon_df['Predicted Close'].shift(1).iloc[1:]

        metrics_dict = {}
        rmse, mae, mape, rsq, ev = calculate_standard_metrics(true_series, pred_series)
        metrics_dict['RMSE'] = rmse
        metrics_dict['MAE'] = mae
        metrics_dict['MAPE'] = mape
        metrics_dict['RSQ'] = rsq

        direction_df = direction_evaluation(test_horizon_df, 'y', 'Predicted Close')
        accuracy, precision = calculate_accuracy(direction_df, 'y', 'Predicted Close')
        metrics_dict['Accuracy'] = accuracy
        metrics_dict['Precision'] = precision

        net_gain, avg_profit, net_gain_percent = simulate_trading(direction_df, 10000, 'y', 'Predicted Close')
        metrics_dict['Net Gain'] = net_gain
        metrics_dict['Avg Profit'] = avg_profit
        metrics_dict['Net Gain Percent'] = net_gain_percent

        metrics_dicts.append(metrics_dict)

    if save_model:
        # save model fitted to latest data for future predictions
        model.fit(df)

        model_path = 'prophet_models'
        if not os.path.exists(model_path):
            os.makedirs(model_path)
        model_path = os.path.join(model_path, f"{company}_model.json")

        with open(model_path, 'w') as f:
            f.write(model_to_json(model))

    return metrics_dicts

In [None]:
exchange_averages_dfs = exchange_averages(data_list)
overall_averages_dfs = overall_averages(data_list)
data_list = add_exchange_averages_to_stocks(data_list, exchange_averages_dfs)
data_list = add_overall_averages_to_stocks(data_list, overall_averages_dfs)

In [None]:
# have a list to hold all of the horizon list dataframes
stocks_horizon_metrics = []

for data in data_list:
    seasonalities = detect_seasonality(data)
    metrics_list = run_prophet(df=data, custom_season=seasonalities[0], include_exchange_avgs=False, include_overall_avgs=False, save_model=True)
    event_horizons = [5, 10, 20, 50, 100]
    horizons_metrics_list = []

    for i in range(len(event_horizons)):
        row = {
            'Company': data['Company'][0],
            'Ticker': data['Ticker'][0],
            'Exchange': data['Exchange'][0],
            'Event Horizon': event_horizons[i],
            'RMSE': metrics_list[i]['RMSE'],
            'MAE': metrics_list[i]['MAE'],
            'MAPE': metrics_list[i]['MAPE'],
            'R2': metrics_list[i]['RSQ'],
            'Accuracy': metrics_list[i]['Accuracy'],
            'Precision': metrics_list[i]['Precision'],
            'Net Gain': metrics_list[i]['Net Gain'],
            'Net Gain %': metrics_list[i]['Net Gain Percent'],
            'Avg Profit': metrics_list[i]['Avg Profit']
        }
        horizons_metrics_list.append(row)
    company_df = pd.DataFrame(horizons_metrics_list)
    stocks_horizon_metrics.append(company_df)

### Examining Results

In [None]:
# average metrics for each horizon
overall_horizon_metrics = []
for df in stocks_horizon_metrics:
    df = df.copy()
    df.drop(['Company', 'Ticker', 'Exchange'], axis=1, inplace=True)
    overall_horizon_metrics.append(df)
overall_horizon_metrics = pd.concat(overall_horizon_metrics)
overall_horizon_metrics = overall_horizon_metrics.groupby('Event Horizon').mean()
overall_horizon_metrics

In [None]:
# ETF is being traded in more of a long term fashion
etf_net_gain, etf_net_gain_percent = compare_to_ETF(urnm_data, initial_funds=10000)
print("ETF Net Gain: ", etf_net_gain)
print("ETF Net Gain %: ", etf_net_gain_percent)

In [None]:
import matplotlib.pyplot as plt

# Initialize an empty DataFrame to hold Net Gain % for all companies
consolidated_gains = pd.DataFrame()

for metric_df in stocks_horizon_metrics:
    company_name = metric_df['Company'][0]
    # Extracting Net Gain % for the company
    net_gain_percent = metric_df['Net Gain %'].iloc[4]  # 100 day Event Horizon

    # Append the Net Gain % to the consolidated DataFrame
    consolidated_gains = consolidated_gains.append({
        'Company': company_name,
        'Net Gain %': net_gain_percent
    }, ignore_index=True)

# Now that we have all the data consolidated, let's plot it
consolidated_gains.plot.bar(x='Company', y='Net Gain %', title="Companies Net Gain %")
plt.show()  # Display the plot
