In [1]:
%cd /content/drive/MyDrive/Machine Learning

/content/drive/MyDrive/Machine Learning


In [2]:
%pip install git+https://github.com/Nixtla/neuralforecast.git

Collecting git+https://github.com/Nixtla/neuralforecast.git
  Cloning https://github.com/Nixtla/neuralforecast.git to /tmp/pip-req-build-rfh7aixe
  Running command git clone --filter=blob:none --quiet https://github.com/Nixtla/neuralforecast.git /tmp/pip-req-build-rfh7aixe
  Resolved https://github.com/Nixtla/neuralforecast.git to commit 0af27340c40760a874c41937799b7bbbdf018f1c
  Running command git submodule update --init --recursive -q
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting coreforecast>=0.0.6 (from neuralforecast==1.7.1)
  Downloading coreforecast-0.0.8-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (193 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.5/193.5 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Collecting pytorch-lightning>=2.0.0 (from neuralforecast==1.7.1)
  Downloading pytorch_lightning-2.2.2-py3-

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import copy
from datetime import datetime
from neuralforecast.core import NeuralForecast
from neuralforecast.models import NHITS, PatchTST, iTransformer, TSMixer
from utilsforecast.losses import mae, mse
from utilsforecast.evaluation import evaluate

def load_stock_data(ticker, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date)
    data = data[['Close', 'Volume']]  # Include trading volume as a feature
    data['Returns'] = data['Close'].pct_change()  # Calculate percentage returns
    data = data.dropna()  # Remove missing values
    data = data.reset_index()
    data.columns = ['ds', 'y', 'Volume', 'Returns']
    data['unique_id'] = ticker
    return data

def run_experiment(stock_data, train_size, val_size, freq, ticker, horizon, epochs):
    models = [
        iTransformer(h=horizon, input_size=5*horizon, n_series=1, max_steps=epochs),
        TSMixer(h=horizon, input_size=5*horizon, n_series=1, max_steps=epochs),
        NHITS(h=horizon, input_size=5*horizon, max_steps=epochs),
        PatchTST(h=horizon, input_size=5*horizon, max_steps=epochs)
    ]

    # Save the mean and std dev of the columns
    mean = stock_data[['y', 'Volume', 'Returns']].mean()
    std = stock_data[['y', 'Volume', 'Returns']].std()

    # Normalize the input data
    stock_data[['y', 'Volume', 'Returns']] = (stock_data[['y', 'Volume', 'Returns']] - mean) / std

    nf = NeuralForecast(models=models, freq=freq)

    # Initialize an empty DataFrame to store the predictions
    predictions_df = pd.DataFrame()

    # Perform rolling forecast
    for i in range(len(stock_data) - train_size - val_size - horizon + 1):
        train_val_data = stock_data.iloc[i:i+train_size+val_size]

        nf_preds = nf.cross_validation(df=train_val_data, val_size=val_size, n_windows=None, test_size=horizon)

        # Append the predictions to the predictions_df
        predictions_df = pd.concat([predictions_df, nf_preds])

    predictions_df = predictions_df.reset_index(drop=True)
    evaluation = evaluate(df=predictions_df, metrics=[mae, mse], models=['iTransformer', 'TSMixer', 'NHITS', 'PatchTST'])
    evaluation.to_csv(f'{ticker}_results_horizon_{horizon}_epochs_{epochs}.csv', index=False, header=True)
    return evaluation, predictions_df

tickers = ['SPY']
start_date = '2010-01-01'
end_date = '2023-04-30'
horizon = 7
epochs = 2000

dataframes = []
predictions_dfs = []
for ticker in tickers:
    stock_data = load_stock_data(ticker, start_date, end_date)
    train_size = len(stock_data) - 365*2  # Use the last 2 years for validation and testing
    val_size = 365  # Use the last year for validation
    freq = 'D'

    evaluation, nf_preds = run_experiment(stock_data, train_size, val_size, freq, ticker, horizon, epochs)
    evaluation['ticker'] = ticker
    dataframes.append(evaluation)
    predictions_dfs.append(nf_preds.copy())

full_df = pd.concat(dataframes, ignore_index=True)
full_df = full_df.drop(['unique_id'], axis=1)

model_names = ['iTransformer', 'TSMixer', 'NHITS', 'PatchTST']
fig, axs = plt.subplots(2, 2, figsize=(15, 15))
bar_width = 0.35
axs = axs.flatten()

for i, ticker in enumerate(tickers):
    df_subset = full_df[(full_df['ticker'] == ticker) & (full_df['metric'] == 'mae')]
    mae_vals = df_subset[model_names].values.flatten()
    df_subset = full_df[(full_df['ticker'] == ticker) & (full_df['metric'] == 'mse')]
    mse_vals = df_subset[model_names].values.flatten()
    indices = np.arange(len(model_names))

    bars_mae = axs[i].bar(indices - bar_width / 2, mae_vals, bar_width, color='skyblue', label='MAE')
    bars_mse = axs[i].bar(indices + bar_width / 2, mse_vals, bar_width, color='orange', label='MSE')

    for bars in [bars_mae, bars_mse]:
        for bar in bars:
            height = bar.get_height()
            axs[i].annotate(f'{height:.2f}', xy=(bar.get_x() + bar.get_width() / 2, height),
                            xytext=(0, 3), textcoords="offset points", ha='center', va='bottom')

    axs[i].set_xticks(indices)
    axs[i].set_xticklabels(model_names, rotation=45)
    axs[i].set_title(ticker)
    axs[i].legend(loc='best')

plt.tight_layout()
plt.show()
for i, ticker in enumerate(tickers):
    nf_preds = predictions_dfs[i]
    stock_data = load_stock_data(ticker, start_date, end_date)

    # Get the mean and standard deviation of the target variable (y)
    y_mean = stock_data['y'].mean()
    y_std = stock_data['y'].std()

    # Rescale the predicted prices
    rescaled_predictions_df = nf_preds.copy()
    rescaled_predictions_df[['iTransformer', 'TSMixer', 'NHITS', 'PatchTST', 'y']] = \
        (nf_preds[['iTransformer', 'TSMixer', 'NHITS', 'PatchTST', 'y']] * y_std) + y_mean
    # Save the rescaled predictions to a CSV file
    now = datetime.now()
    dt_string = now.strftime("%d_%m_%Y %H:%M")
    csv_file = f'predictions/{ticker}_rescaled_predictions_horizon_{horizon}_epochs_{epochs}_{dt_string}.csv'
    rescaled_predictions_df.to_csv(csv_file, index=False, header=True)

    for model_name in model_names:
        plt.figure(figsize=(16, 8))
        plt.plot(stock_data['ds'], stock_data['y'], label='Actual Price', linewidth=3, color='blue')
        plt.plot(rescaled_predictions_df['ds'], rescaled_predictions_df[model_name],
                 label=model_name, linewidth=1, color='red')

        plt.xlabel('Date', fontsize=14)
        plt.ylabel('Price', fontsize=14)
        plt.title(f'{ticker} Stock Price Prediction - {model_name}', fontsize=16)
        plt.grid(axis='y')
        plt.legend(loc='upper left', fontsize=12)
        plt.show()

[*********************100%%**********************]  1 of 1 completed
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Returns'] = data['Close'].pct_change()  # Calculate percentage returns
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/utilities/parsing.py:199: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
INFO:lightning_fabric.utilities.seed:Seed set to 1
INFO:lightning_fabric.utilities.seed:Seed set to 1
INFO:lightning_fabric.utilities.seed:Seed set to 1
INFO:lightning_fabric.utilities.seed:Seed set to 1
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU a

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 1. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]