In [1]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn import preprocessing



## Set the Notebook Parameters
- Start and End Date
- Model File Name

In [2]:
# Start and End Dates for Ticker Data
start_date = "2021-03-01"
end_date = "2021-06-28"

# Model File
#model_file = "models/vix_prediction_20210307.pl"
model_file = "models/vix_prediction_20210627.pl"
#model_file = "models/vix_prediction_20210321.pl"
#model_file = "models/vix_prediction_20210425.pl"

## Load the Yahoo Finance Data

In [3]:
# Helper Function
def download_data(ticker, start, end):
    return yf.download(ticker, start, end)

# List of Indices to Retrieve Data
indices = ["^VIX", "^GSPC", "^DJI", "^TNX", "DX-Y.NYB", "GLD", "TIP", "VNQ"]
#indices = ["^VIX", "^GSPC", "^IXIC", "^DJI", "^RUT", "CL=F", "GC=F", "SI=F", "^TNX", "BTC-USD", "ETH-USD", "DXY", "VNQ", "QQQ", "URA", "XAR", "AAPL", "EURUSD=X", "VEMAX", "ICVT", "XLY", "XLP", "VYM", "VFH"]

# List of lists
indices_data = [download_data(index, start_date, end_date) for index in indices]

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- DXY: No data found for this date range, symbol may be delisted
[*********************100%***********************]  1 of 1 completed
[*****************

## Scale the Independent Values

In [4]:
# Previous Closing
def previous_close(df):
    df['Previous Close'] = df['Close'].shift(1)
    return df

# Percent Movement Helper Function
def market_movement(row):
    return (row['Close'] - row['Previous Close'])/row['Previous Close']

# Get Previous Close to Account for Futures
indices_data_prev = [previous_close(df) for df in indices_data]

# Apply helper function to all index dataframes
percent_movement = [index.apply(market_movement, axis=1) for index in indices_data_prev]

## Transform into a DataFrame

In [5]:
all_indices_df = pd.concat(percent_movement, axis=1)
all_indices_df.columns = [index[1:] + " Percent Movement" for index in indices]
original_columns = all_indices_df.columns
all_indices_df['Day'] = all_indices_df.index

independent_variables = ['GSPC Percent Movement', 'TNX Percent Movement', 'X-Y.NYB Percent Movement', 'LD Percent Movement', 'IP Percent Movement', 'NQ Percent Movement']
#independent_variables = independent_variables = ['GSPC Percent Movement', 'IXIC Percent Movement', 'DJI Percent Movement', 'RUT Percent Movement', 'L=F Percent Movement', 'C=F Percent Movement', 'I=F Percent Movement', 'TNX Percent Movement', 'TC-USD Percent Movement', 'TH-USD Percent Movement', 'XY Percent Movement', 'NQ Percent Movement', 'QQ Percent Movement', 'RA Percent Movement', 'AR Percent Movement', 'APL Percent Movement', 'URUSD=X Percent Movement', 'EMAX Percent Movement', 'CVT Percent Movement', 'LY Percent Movement', 'LP Percent Movement', 'YM Percent Movement', 'FH Percent Movement']

X = all_indices_df[independent_variables]

# Fill Non null values with mean
X.fillna(X.mean(), inplace=True)

ValueError: Length mismatch: Expected axis has 30 elements, new values have 24 elements

## Load Model

In [None]:
model_file_handler = open(model_file,'rb')
model = pickle.load(model_file_handler)

## Predict Using Model

In [None]:
base = datetime.strptime(end_date, '%Y-%m-%d')
date_list = [base - timedelta(days=x) for x in range(60)]

for date in date_list:
    date_index = date.strftime("%Y-%m-%d")
    try:
        print(date_index)
        print(model.predict_proba([all_indices_df.loc[date_index][independent_variables]])[0][0:2])
        print(model.predict([all_indices_df.loc[date_index][independent_variables]]))
    except:
        print("No Trading Day")
        pass
    print("\n")