In [1]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn import preprocessing



## Set the Notebook Parameters
- Start and End Date
- Model File Name

In [2]:
# Start and End Dates for Ticker Data
start_date = "2021-03-01"
end_date = "2021-03-24"

# Model File
model_file = "models/vix_prediction_20210307.pl"
#model_file = "models/vix_prediction_20210321.pl"

## Load the Yahoo Finance Data

In [3]:
# Helper Function
def download_data(ticker, start, end):
    return yf.download(ticker, start, end)

# List of Indices to Retrieve Data
indices = ["^VIX", "^GSPC", "^DJI", "^TNX", "DX-Y.NYB", "GLD", "TIP", "VNQ"]
# List of lists
indices_data = [download_data(index, start_date, end_date) for index in indices]

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


## Scale the Independent Values

In [4]:
# Previous Closing
def previous_close(df):
    df['Previous Close'] = df['Close'].shift(1)
    return df

# Percent Movement Helper Function
def market_movement(row):
    return (row['Close'] - row['Previous Close'])/row['Previous Close']

# Get Previous Close to Account for Futures
indices_data_prev = [previous_close(df) for df in indices_data]

# Apply helper function to all index dataframes
percent_movement = [index.apply(market_movement, axis=1) for index in indices_data_prev]

## Transform into a DataFrame

In [5]:
all_indices_df = pd.concat(percent_movement, axis=1)
all_indices_df.columns = [index[1:] + " Percent Movement" for index in indices]
original_columns = all_indices_df.columns
all_indices_df['Day'] = all_indices_df.index

independent_variables = ['GSPC Percent Movement', 'TNX Percent Movement', 'X-Y.NYB Percent Movement', 'LD Percent Movement', 'IP Percent Movement', 'NQ Percent Movement']

X = all_indices_df[independent_variables]

# Fill Non null values with mean
X.fillna(X.mean(), inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


## Load Model

In [6]:
model_file_handler = open(model_file,'rb')
model = pickle.load(model_file_handler)

## Predict Using Model

In [7]:
base = datetime.strptime(end_date, '%Y-%m-%d')
date_list = [base - timedelta(days=x) for x in range(10)]

for date in date_list:
    date_index = date.strftime("%Y-%m-%d")
    try:
        print(date_index)
        print(model.predict_proba([all_indices_df.loc[date_index][independent_variables]])[0][0:2])
        print(model.predict([all_indices_df.loc[date_index][independent_variables]]))
    except:
        print("No Trading Day")
        pass
    print("\n")

2021-03-24
No Trading Day


2021-03-23
[0.67176363 0.32823637]
[0]


2021-03-22
[0.53119269 0.46880731]
[0]


2021-03-21
No Trading Day


2021-03-20
No Trading Day


2021-03-19
[0.570442 0.429558]
[0]


2021-03-18
[0.61971991 0.38028009]
[0]


2021-03-17
[0.3869998 0.6130002]
[1]


2021-03-16
[0.5633554 0.4366446]
[0]


2021-03-15
[0.47618455 0.52381545]
[1]


