<a href="https://colab.research.google.com/github/ohadbarr1/Thesis/blob/main/POC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installations and initializations

Install chronos

In [3]:
pip install git+https://github.com/amazon-science/chronos-forecasting.git

Collecting git+https://github.com/amazon-science/chronos-forecasting.git
  Cloning https://github.com/amazon-science/chronos-forecasting.git to /tmp/pip-req-build-e7l9qtvz
  Running command git clone --filter=blob:none --quiet https://github.com/amazon-science/chronos-forecasting.git /tmp/pip-req-build-e7l9qtvz
  Resolved https://github.com/amazon-science/chronos-forecasting.git to commit eb7bdfc047de3e7af972b4ee7cf23a7968b7daa3
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


## **Import libraries**

In [4]:
# Import relevant libraries
import pandas as pd
import numpy as np
from google.colab import files
from datetime import timedelta

# Data preprocessing
from sklearn.preprocessing import MinMaxScaler

# Gather financial data
import yfinance as yf

# Plots
import matplotlib.pyplot as plt
import plotly.graph_objects as go

# Models
import torch
from sklearn.model_selection import train_test_split
from chronos import ChronosPipeline

## Initializing pipeline

In [5]:
# Use the Chronos-t5-base model, 200m parameters
pipeline = ChronosPipeline.from_pretrained(
  "amazon/chronos-t5-base",
  device_map="cuda",
  torch_dtype=torch.bfloat16,
)

### Gather VIX Data

In [6]:
# Data fetching and preprocessing
tickers = ['^VIX9D', '^VIX', '^VIX3M', '^VIX6M']
data = yf.download(tickers, start="2011-01-03")['Close']
# Reordering columns and renaming them according to your format
data = data[['^VIX', '^VIX3M', '^VIX6M', '^VIX9D']]
data.columns = ['VIX', 'VIX3M', 'VIX6M', 'VIX9D']
# Resetting index to make 'Date' a column
data.reset_index(inplace=True)

data



[*********************100%%**********************]  4 of 4 completed


Unnamed: 0,Date,VIX,VIX3M,VIX6M,VIX9D
0,2011-01-03,17.610001,20.620001,23.400000,16.040001
1,2011-01-04,17.379999,20.610001,23.190001,16.059999
2,2011-01-05,17.020000,20.049999,22.780001,15.570000
3,2011-01-06,17.400000,20.350000,22.870001,15.710000
4,2011-01-07,17.139999,20.290001,22.920000,15.010000
...,...,...,...,...,...
3419,2024-08-06,27.709999,26.760000,25.559999,28.930000
3420,2024-08-07,27.850000,27.010000,25.750000,29.260000
3421,2024-08-08,23.790001,24.129999,23.500000,23.900000
3422,2024-08-09,20.370001,21.180000,21.430000,20.059999


In [None]:
file_path = "C:/University/Thesis"
data.to_csv('data.csv', index=False)

files.download('data.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [7]:
VIX9D = data[['Date','VIX9D']]
VIX = data[['Date','VIX']]
VIX3M = data[['Date','VIX3M']]
VIX6M = data[['Date','VIX6M']]


In [None]:
context = torch.tensor(VIX["VIX"])
prediction_length = 7
forecast = pipeline.predict(context, prediction_length)

In [None]:
# Avoid SettingWithCopyWarning by using .loc
VIX.loc[:, 'Date'] = pd.to_datetime(VIX['Date'])

# Initialize a DataFrame to store results
results = pd.DataFrame(columns=['Date', 'Actual', 'Forecast_1', 'Forecast_2', 'Forecast_3', 'Forecast_4', 'Forecast_5', 'Forecast_Avg'])

# Number of days in three days (approx.)
three_days = timedelta(days=3)

# Loop over the DataFrame in steps of three days
for i in range(1000, len(VIX), 3):  # 3 trading days interval
    # Define the current date
    current_date = VIX['Date'].iloc[i]
    end_date = current_date + three_days

    # Use all data up to the current date as context
    context_data = VIX[VIX['Date'] <= current_date]

    # Filter data for the next three days (actual future values)
    actual_next_three_days = VIX[(VIX['Date'] > current_date) & (VIX['Date'] <= end_date)]

    # Convert the entire context up to the current date to a tensor
    context = torch.tensor(context_data["VIX"].values, dtype=torch.float32).unsqueeze(0)

    # Initialize a list to store forecasts
    forecasts = []

    # Run the model 5 times
    for run in range(5):
        # Make a prediction for the next three days
        prediction_length = min(len(actual_next_three_days), 3)  # Predicting 3 days or fewer if the actual data is less
        if prediction_length > 0:  # Ensure there's something to predict
            forecast = pipeline.predict(context, prediction_length)
            if forecast is not None and len(forecast) > 0:
                forecast = forecast.squeeze().cpu().numpy()

                # Ensure forecast is 1-dimensional
                if forecast.ndim > 1:
                    forecast = forecast.flatten()

                # Store the forecast
                forecasts.append(forecast)

    # Proceed only if we have forecasts
    if forecasts:
        # Ensure the lengths of actual and forecasts match
        forecast_dates = actual_next_three_days['Date'].values
        aligned_length = min(len(forecast_dates), len(forecasts[0]))
        forecast_dates = forecast_dates[:aligned_length]
        actual_values = actual_next_three_days['VIX'].values[:aligned_length]

        # Truncate forecasts to the aligned length
        forecasts = [forecast[:aligned_length] for forecast in forecasts]

        # Calculate the average of the 5 forecasts
        forecast_avg = np.mean(forecasts, axis=0)

        # Store the results in the DataFrame
        result_df = pd.DataFrame({
            'Date': forecast_dates,
            'Actual': actual_values,
            'Forecast_1': forecasts[0],
            'Forecast_2': forecasts[1],
            'Forecast_3': forecasts[2],
            'Forecast_4': forecasts[3],
            'Forecast_5': forecasts[4],
            'Forecast_Avg': forecast_avg
        })

        # Avoid concatenating empty dataframes
        if not result_df.empty:
            # Append to the main results DataFrame
            results = pd.concat([results, result_df], ignore_index=True)

# Reset index of the final DataFrame
results.reset_index(drop=True, inplace=True)

# Create the interactive plot
import plotly.graph_objects as go

# Create the plot
fig = go.Figure()

# Add the actual values line
fig.add_trace(go.Scatter(
    x=results['Date'],
    y=results['Actual'],
    mode='lines',
    name='Actual Values',
    line=dict(color='blue')
))

# Add the averaged forecast values line
fig.add_trace(go.Scatter(
    x=results['Date'],
    y=results['Forecast_Avg'],
    mode='lines',
    name='Forecasted Values (Avg)',
    line=dict(color='red')
))

# Update layout
fig.update_layout(
    title="Forecasted vs Actual VIX Values (Averaged Forecast - 5 Iterations, 3-Day Predictions)",
    xaxis_title="Date",
    yaxis_title="VIX Value",
    legend_title="Legend",
    hovermode="x unified"
)

# Show the interactive plot
fig.show()


  results = pd.concat([results, result_df], ignore_index=True)


Train the model

RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.cuda.FloatTensor instead (while checking arguments for embedding)

In [None]:
# Set model to evaluation mode
pipeline.model.eval()

# Predict the next day's VIX value using the last sequence from the validation set
with torch.no_grad():
    last_sequence = torch.tensor(X_val_seq[-1:], dtype=torch.float32).cuda()
    predicted_vix = pipeline.model(last_sequence)

# Inverse transform the prediction to get the original scale
predicted_vix_original_scale = scaler.inverse_transform(predicted_vix.cpu().numpy().reshape(-1, 1))

print(f"Predicted VIX value for the next day: {predicted_vix_original_scale[0][0]}")
