In [711]:
%matplotlib inline
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
import matplotlib.pyplot as plt
import hvplot.pandas
import numpy as np
import seaborn as sns
from pathlib import Path

In [712]:
# Import the OHLCV dataset into a Pandas Dataframe
#stock_df = pd.read_csv(
    #Path("api_data.csv"), 
    #index_col="date", 
    #infer_datetime_format=True, 
    #parse_dates=True
#)

# Display sample data
#stock_df.head()

In [713]:
#stock_df.copy()
#stock_data = stock_df.loc[stock_df["symbol"] == "SPY"]
#stock_data

In [714]:
# Define function to create Pickle File
def get_api_dataframe():
    # Set Alpaca API key and secret

    load_dotenv()
    api_key= os.getenv("ALPACA_API_KEY")
    sec_key= os.getenv("ALPACA_SECRET_KEY")

# Create the Alpaca API object

    alpaca = tradeapi.REST(
        api_key,
        sec_key,
        api_version="v2"
    )
# Get 3 years worth of historical data for Alcohol stocks, Cannabis Stocks, and S&P 500

    start_date = pd.Timestamp('2019-04-20', tz='America/New_York').isoformat()
    end_date = pd.Timestamp('2022-04-20', tz='America/New_York').isoformat()

    #tickers = ["BUD", "DEO", "SAM", "TAP", "STZ","VFF","JAZZ","CRON", "TLRY","CGC", "SPY"]
    tickers = ["SPY"]

# Set timeframe to "1Day" for Alpaca API
    timeframe = "1Day"

# Get current closing prices for All Stocks

    stock_data = alpaca.get_bars(
        tickers,
        timeframe,
        start = start_date,
        end = end_date
    ).df

#Set index to date
    stock_data.index = stock_data.index.date
    return stock_data

In [715]:
# Check if file exists
# If file does not exist then load api and save file
# else read data from pikl

spy_data_file = Path("spy_data.pkl")
if spy_data_file.is_file():
    print ("API data file exists")
    stock_data = pd.read_pickle(spy_data_file)
else:
    print ("API file does not exist") 
    stock_data = get_api_dataframe()
    stock_data.to_pickle(spy_data_file)

#View dataframe
stock_data.head(-5)

API data file exists


Unnamed: 0,open,high,low,close,volume,trade_count,vwap,symbol
2019-04-22,289.17,290.435,289.0700,290.27,40520264,170767,290.002934,SPY
2019-04-23,290.68,293.140,290.4200,292.88,52411775,232023,292.102165,SPY
2019-04-24,292.76,293.160,292.0700,292.23,50469550,203858,292.580824,SPY
2019-04-25,292.12,292.780,290.7300,292.05,57875137,238260,292.051720,SPY
2019-04-26,292.12,293.490,291.2401,293.41,51449173,220594,292.642502,SPY
...,...,...,...,...,...,...,...,...
2022-04-06,446.87,448.930,443.4700,446.52,105592595,901634,446.345270,SPY
2022-04-07,445.67,450.690,443.5300,448.77,77538519,628673,447.098209,SPY
2022-04-08,448.04,450.630,445.9400,447.57,79148376,573491,448.250805,SPY
2022-04-11,444.11,445.000,439.3900,439.92,89825549,779421,442.206374,SPY


In [716]:
# Calculate the daily returns using the closing prices and the pct_change function
stock_data["actual_returns"] = stock_data["close"].pct_change()

# Display sample data
stock_data.head()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,symbol,actual_returns
2019-04-22,289.17,290.435,289.07,290.27,40520264,170767,290.002934,SPY,
2019-04-23,290.68,293.14,290.42,292.88,52411775,232023,292.102165,SPY,0.008992
2019-04-24,292.76,293.16,292.07,292.23,50469550,203858,292.580824,SPY,-0.002219
2019-04-25,292.12,292.78,290.73,292.05,57875137,238260,292.05172,SPY,-0.000616
2019-04-26,292.12,293.49,291.2401,293.41,51449173,220594,292.642502,SPY,0.004657


In [717]:
# Drop all NaN values from the DataFrame
stock_data = stock_data.dropna()

# Review the DataFrame
display(stock_data.head())
display(stock_data.tail())

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,symbol,actual_returns
2019-04-23,290.68,293.14,290.42,292.88,52411775,232023,292.102165,SPY,0.008992
2019-04-24,292.76,293.16,292.07,292.23,50469550,203858,292.580824,SPY,-0.002219
2019-04-25,292.12,292.78,290.73,292.05,57875137,238260,292.05172,SPY,-0.000616
2019-04-26,292.12,293.49,291.2401,293.41,51449173,220594,292.642502,SPY,0.004657
2019-04-29,293.53,294.45,293.41,293.87,61675682,186857,293.871519,SPY,0.001568


Unnamed: 0,open,high,low,close,volume,trade_count,vwap,symbol,actual_returns
2022-04-13,437.96,444.11,437.84,443.31,73734315,563285,441.262058,SPY,0.011454
2022-04-14,443.44,444.7301,437.68,437.79,98667135,703727,440.305786,SPY,-0.012452
2022-04-18,436.73,439.75,435.61,437.97,65814949,573478,437.862483,SPY,0.000411
2022-04-19,437.87,445.8,437.68,445.04,77678099,597840,443.072191,SPY,0.016143
2022-04-20,446.95,447.57,443.48,444.71,64737457,591762,445.610168,SPY,-0.000742


In [718]:
#Create Feature Set and Window Sizes

short_window = 4

# Create an SMA that uses short_window, and assign it to a new column named “sma_fast”
stock_data["sma_fast"] = stock_data["close"].rolling(window=short_window).mean()


# Define a window size 
long_window = 20

# Create an SMA that uses long_window, and assign it to a new columns named “sma_slow”
stock_data["sma_slow"] = stock_data["close"].rolling(window=long_window).mean()

In [719]:
# Drop the NaNs using dropna()
stock_data = stock_data.dropna()

In [720]:
stock_data["sma_slow_lag"] = stock_data["sma_slow"].shift(1)
stock_data["sma_fast_lag"] = stock_data["sma_fast"].shift(1)
stock_data['l1'] = stock_data['actual_returns'].shift(1)
stock_data = stock_data.dropna()
stock_data

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,symbol,actual_returns,sma_fast,sma_slow,sma_slow_lag,sma_fast_lag,l1
2019-05-21,285.83,286.9300,285.55,286.51,47390688,256169,286.268748,SPY,0.009016,286.0000,288.9480,289.2665,285.6375,-0.006612
2019-05-22,285.43,286.6900,285.10,285.63,49717083,260143,285.783071,SPY,-0.003071,285.4825,288.6180,288.9480,286.0000,0.009016
2019-05-23,283.16,283.2100,280.57,282.14,99142377,466151,281.947377,SPY,-0.012219,284.5575,288.1225,288.6180,285.4825,-0.003071
2019-05-24,283.75,284.2000,282.09,282.78,57468147,267403,283.004360,SPY,0.002268,284.2650,287.5910,288.1225,284.5575,-0.012219
2019-05-28,283.01,284.1500,280.13,280.15,70066654,340223,281.910164,SPY,-0.009301,282.6750,286.9050,287.5910,284.2650,0.002268
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-13,437.96,444.1100,437.84,443.31,73734315,563285,441.262058,SPY,0.011454,442.2725,448.9820,448.5940,443.6375,-0.003705
2022-04-14,443.44,444.7301,437.68,437.79,98667135,703727,440.305786,SPY,-0.012452,439.8275,448.8180,448.9820,442.2725,0.011454
2022-04-18,436.73,439.7500,435.61,437.97,65814949,573478,437.862483,SPY,0.000411,439.3400,448.5010,448.8180,439.8275,-0.012452
2022-04-19,437.87,445.8000,437.68,445.04,77678099,597840,443.072191,SPY,0.016143,441.0275,448.5380,448.5010,439.3400,0.000411


In [721]:
# Assign a copy of the `sma_fast` and `sma_slow` columns to a new DataFrame called `X`
#X = stock_data[["sma_fast", "sma_slow"]].shift(1).dropna().copy()
X = stock_data[["sma_fast_lag", "sma_slow_lag", "l1"]].dropna().copy()
# Display sample data
display(X.head())
display(X.tail())

Unnamed: 0,sma_fast_lag,sma_slow_lag,l1
2019-05-21,285.6375,289.2665,-0.006612
2019-05-22,286.0,288.948,0.009016
2019-05-23,285.4825,288.618,-0.003071
2019-05-24,284.5575,288.1225,-0.012219
2019-05-28,284.265,287.591,0.002268


Unnamed: 0,sma_fast_lag,sma_slow_lag,l1
2022-04-13,443.6375,448.594,-0.003705
2022-04-14,442.2725,448.982,0.011454
2022-04-18,439.8275,448.818,-0.012452
2022-04-19,439.34,448.501,0.000411
2022-04-20,441.0275,448.538,0.016143


In [722]:
#Create Target Set
# Create a new column in the `df_stock_data` called "signal" setting its value to zero.
stock_data["signal"] = 0.0

# Create the signal to buy
stock_data.loc[(stock_data["actual_returns"] >= 0), "signal"] = 1

# Create the signal to sell
stock_data.loc[(stock_data["actual_returns"] < 0), "signal"] = 0

In [723]:
# Copy the new "signal" column to a new Series called `y`.
y = stock_data["signal"].copy()

In [724]:
#Split the Data in Training and Test Sets
# Import required libraries
from pandas.tseries.offsets import DateOffset

# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2019-05-21


In [725]:
# Select the ending period for the training data with an offset of 18 months
training_end = X.index.min() + DateOffset(months=24)

# Display the training end date
print(training_end)

2021-05-21 00:00:00


In [726]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

  return arr.searchsorted(value, side=side, sorter=sorter)


In [727]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end:]
y_test = y.loc[training_end:]

In [728]:
X_test.shape

(231, 3)

In [729]:
X_train.shape

(506, 3)

In [730]:
#Standardize the Data
# Import required libraries
from sklearn.preprocessing import StandardScaler

# Create a StandardScaler instance
scaler = StandardScaler()
 
# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)
 
# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [731]:
# Create trading strategy
# Import the SVM model
from sklearn import svm
from sklearn.metrics import classification_report

In [732]:

# Create the classifier model
svm_model = svm.SVC()
 
# Fit the model to the data using X_train_scaled and y_train
svm_model = svm_model.fit(X_train_scaled, y_train)

# Use the trained model to predict the trading signals for the training data
training_signal_predictions = svm_model.predict(X_train_scaled)

# Display the sample predictions
training_signal_predictions[:10]

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [733]:
# Evaluate the model using a classification report
training_report = classification_report(y_train, training_signal_predictions)

# Display the report
print(training_report)

              precision    recall  f1-score   support

         0.0       0.80      0.06      0.10       216
         1.0       0.58      0.99      0.73       290

    accuracy                           0.59       506
   macro avg       0.69      0.52      0.42       506
weighted avg       0.68      0.59      0.47       506



In [734]:
#Backtesting a Machine Learning Trading Algorithm
# Use the trained model to predict the trading signals for the testing data.
testing_signal_predictions = svm_model.predict(X_test_scaled)

In [735]:
# Evaluate the model's ability to predict the trading signal for the testing data
testing_report = classification_report(y_test, testing_signal_predictions)

# Display the report
print(testing_report)

              precision    recall  f1-score   support

         0.0       0.50      0.03      0.05       106
         1.0       0.54      0.98      0.70       125

    accuracy                           0.54       231
   macro avg       0.52      0.50      0.38       231
weighted avg       0.52      0.54      0.40       231



In [736]:
# Create a predictions DataFrame
predictions_df = pd.DataFrame(index=X_test.index)

predictions_df["predicted_signal"] = testing_signal_predictions

predictions_df["actual_returns"] = stock_data["actual_returns"]

predictions_df["trading_algorithm_returns"] = (
    predictions_df["actual_returns"] * predictions_df["predicted_signal"]
)

# Review the DataFrame
predictions_df.head(-5)

Unnamed: 0,predicted_signal,actual_returns,trading_algorithm_returns
2021-05-21,1.0,-0.000819,-0.000819
2021-05-24,1.0,0.010194,0.010194
2021-05-25,1.0,-0.002219,-0.002219
2021-05-26,1.0,0.001985,0.001985
2021-05-27,1.0,0.000525,0.000525
...,...,...,...
2022-04-06,1.0,-0.009999,-0.009999
2022-04-07,1.0,0.005039,0.005039
2022-04-08,1.0,-0.002674,-0.002674
2022-04-11,1.0,-0.017092,-0.017092


In [737]:
# Calculate and plot the cumulative returns for the `actual_returns` and the `trading_algorithm_returns`
(1 + predictions_df[["actual_returns", "trading_algorithm_returns"]]).cumprod().hvplot()