In [122]:
%matplotlib inline
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
import matplotlib.pyplot as plt
import hvplot.pandas
import numpy as np
import seaborn as sns
from pathlib import Path

In [123]:
# Define function to create Pickle File
def get_api_dataframe():
    # Set Alpaca API key and secret

    load_dotenv()
    api_key= os.getenv("ALPACA_API_KEY")
    sec_key= os.getenv("ALPACA_SECRET_KEY")

# Create the Alpaca API object

    alpaca = tradeapi.REST(
        api_key,
        sec_key,
        api_version="v2"
    )
# Get 3 years worth of historical data for Alcohol stocks, Cannabis Stocks, and S&P 500

    start_date = pd.Timestamp('2019-04-20', tz='America/New_York').isoformat()
    end_date = pd.Timestamp('2022-04-20', tz='America/New_York').isoformat()

    #tickers = ["BUD", "DEO", "SAM", "TAP", "STZ","VFF","JAZZ","CRON", "TLRY","CGC", "SPY"]
    tickers = ["BUD"]

# Set timeframe to "1Day" for Alpaca API
    timeframe = "1Day"

# Get current closing prices for All Stocks

    bud_stock_data = alpaca.get_bars(
        tickers,
        timeframe,
        start = start_date,
        end = end_date
    ).df

#Set index to date
    bud_stock_data.index = bud_stock_data.index.date
    return bud_stock_data

In [124]:
# Check if file exists
# If file does not exist then load api and save file
# else read data from pikl

bud_data_file = Path("bud_data.pkl")
if bud_data_file.is_file():
    print ("API data file exists")
    bud_stock_data = pd.read_pickle(bud_data_file)
else:
    print ("API file does not exist") 
    bud_stock_data = get_api_dataframe()
    bud_stock_data.to_pickle(bud_data_file)

#View dataframe
bud_stock_data.head(-5)

API data file exists


Unnamed: 0,open,high,low,close,volume,trade_count,vwap,symbol
2019-04-22,89.69,90.470,89.5800,89.60,970834,9479,90.000447,BUD
2019-04-23,88.56,89.790,88.5000,89.25,755451,7907,89.242818,BUD
2019-04-24,89.02,89.130,87.8200,88.21,1038366,11509,88.324096,BUD
2019-04-25,86.89,87.020,86.0000,86.64,1341678,13124,86.467163,BUD
2019-04-26,87.38,87.625,86.8300,87.58,920430,9005,87.292918,BUD
...,...,...,...,...,...,...,...,...
2022-04-06,59.65,60.050,59.4200,59.75,1400313,16245,59.756942,BUD
2022-04-07,58.37,58.650,57.9900,58.47,2025008,17382,58.353684,BUD
2022-04-08,58.46,59.240,58.2900,58.78,964847,11297,58.850033,BUD
2022-04-11,58.42,59.020,58.3519,58.49,1458085,15172,58.643236,BUD


In [125]:
# Calculate the daily returns using the closing prices and the pct_change function
bud_stock_data["actual_returns"] = bud_stock_data["close"].pct_change()

# Display sample data
bud_stock_data.head()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,symbol,actual_returns
2019-04-22,89.69,90.47,89.58,89.6,970834,9479,90.000447,BUD,
2019-04-23,88.56,89.79,88.5,89.25,755451,7907,89.242818,BUD,-0.003906
2019-04-24,89.02,89.13,87.82,88.21,1038366,11509,88.324096,BUD,-0.011653
2019-04-25,86.89,87.02,86.0,86.64,1341678,13124,86.467163,BUD,-0.017798
2019-04-26,87.38,87.625,86.83,87.58,920430,9005,87.292918,BUD,0.010849


In [126]:
# Drop all NaN values from the DataFrame
bud_stock_data = bud_stock_data.dropna()

# Review the DataFrame
display(bud_stock_data.head())
display(bud_stock_data.tail())

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,symbol,actual_returns
2019-04-23,88.56,89.79,88.5,89.25,755451,7907,89.242818,BUD,-0.003906
2019-04-24,89.02,89.13,87.82,88.21,1038366,11509,88.324096,BUD,-0.011653
2019-04-25,86.89,87.02,86.0,86.64,1341678,13124,86.467163,BUD,-0.017798
2019-04-26,87.38,87.625,86.83,87.58,920430,9005,87.292918,BUD,0.010849
2019-04-29,87.35,88.37,87.27,88.01,840389,8728,87.93627,BUD,0.00491


Unnamed: 0,open,high,low,close,volume,trade_count,vwap,symbol,actual_returns
2022-04-13,57.92,58.72,57.9,58.58,964169,11290,58.388346,BUD,0.010871
2022-04-14,58.81,59.0,58.49,58.78,960973,10996,58.798913,BUD,0.003414
2022-04-18,58.65,59.01,58.46,58.72,734730,10193,58.727354,BUD,-0.001021
2022-04-19,58.45,59.25,58.44,59.19,1321247,15500,58.935246,BUD,0.008004
2022-04-20,60.99,61.99,60.97,61.49,1401489,17526,61.522348,BUD,0.038858


In [127]:
#Create Feature Set and Window Sizes

short_window = 4

# Create an SMA that uses short_window, and assign it to a new column named “sma_fast”
bud_stock_data["sma_fast"] = bud_stock_data["close"].rolling(window=short_window).mean()


# Define a window size 
long_window = 20

# Create an SMA that uses long_window, and assign it to a new columns named “sma_slow”
bud_stock_data["sma_slow"] = bud_stock_data["close"].rolling(window=long_window).mean()

In [128]:
# Drop the NaNs using dropna()
bud_stock_data = bud_stock_data.dropna()

In [129]:
# Assign a copy of the `sma_fast` and `sma_slow` columns to a new DataFrame called `X`
X = bud_stock_data[["sma_fast", "sma_slow"]].shift().dropna().copy()

# Display sample data
display(X.head())
display(X.tail())

Unnamed: 0,sma_fast,sma_slow
2019-05-21,82.84,85.7115
2019-05-22,82.1325,85.299
2019-05-23,81.8175,85.0195
2019-05-24,81.7325,84.8025
2019-05-28,82.17,84.5615


Unnamed: 0,sma_fast,sma_slow
2022-04-13,58.4225,59.9255
2022-04-14,58.45,59.871
2022-04-18,58.45,59.824
2022-04-19,58.5075,59.7295
2022-04-20,58.8175,59.6875


In [130]:
#Create Target Set
# Create a new column in the `df_stock_data` called "signal" setting its value to zero.
bud_stock_data["signal"] = 0.0

# Create the signal to buy
bud_stock_data.loc[(bud_stock_data["actual_returns"] >= 0), "signal"] = 1

# Create the signal to sell
bud_stock_data.loc[(bud_stock_data["actual_returns"] < 0), "signal"] = -1

In [131]:
# Copy the new "signal" column to a new Series called `y`.
y = bud_stock_data["signal"].copy()

In [132]:
#Split the Data in Training and Test Sets
# Import required libraries
from pandas.tseries.offsets import DateOffset

# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2019-05-21


In [133]:
# Select the ending period for the training data with an offset of 18 months
training_end = X.index.min() + DateOffset(months=4)

# Display the training end date
print(training_end)

2019-09-21 00:00:00


In [134]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

  return arr.searchsorted(value, side=side, sorter=sorter)


In [135]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end:]
y_test = y.loc[training_end:]

In [136]:
#Standardize the Data
# Import required libraries
from sklearn.preprocessing import StandardScaler

# Create a StandardScaler instance
scaler = StandardScaler()
 
# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)
 
# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [137]:
# Create trading strategy
# Import the SVM model
from sklearn import svm
from sklearn.metrics import classification_report

In [138]:
# Create the classifier model
svm_model = svm.SVC()
 
# Fit the model to the data using X_train_scaled and y_train
svm_model = svm_model.fit(X_train_scaled, y_train)

# Use the trained model to predict the trading signals for the training data
training_signal_predictions = svm_model.predict(X_train_scaled)

# Display the sample predictions
training_signal_predictions[:10]

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [139]:
# Evaluate the model using a classification report
training_report = classification_report(y_train, training_signal_predictions)

# Display the report
print(training_report)

              precision    recall  f1-score   support

        -1.0       0.74      0.55      0.63        42
         1.0       0.65      0.82      0.73        44

    accuracy                           0.69        86
   macro avg       0.70      0.68      0.68        86
weighted avg       0.70      0.69      0.68        86



In [140]:
#Backtesting a Machine Learning Trading Algorithm
# Use the trained model to predict the trading signals for the testing data.
testing_signal_predictions = svm_model.predict(X_test_scaled)

In [141]:
# Evaluate the model's ability to predict the trading signal for the testing data
testing_report = classification_report(y_test, testing_signal_predictions)

# Display the report
print(testing_report)

              precision    recall  f1-score   support

        -1.0       0.50      0.00      0.01       329
         1.0       0.49      1.00      0.66       321

    accuracy                           0.49       650
   macro avg       0.50      0.50      0.33       650
weighted avg       0.50      0.49      0.33       650



In [142]:
# Create a predictions DataFrame
predictions_df = pd.DataFrame(index=X_test.index)

predictions_df["predicted_signal"] = testing_signal_predictions

predictions_df["actual_returns"] = bud_stock_data["actual_returns"]

predictions_df["trading_algorithm_returns"] = (
    predictions_df["actual_returns"] * predictions_df["predicted_signal"]
)

# Review the DataFrame
predictions_df.head()

Unnamed: 0,predicted_signal,actual_returns,trading_algorithm_returns
2019-09-23,-1.0,0.000208,-0.000208
2019-09-24,-1.0,-0.026054,0.026054
2019-09-25,1.0,0.008313,0.008313
2019-09-26,1.0,0.003911,0.003911
2019-09-27,1.0,-0.005896,-0.005896


In [143]:
# Calculate and plot the cumulative returns for the `actual_returns` and the `trading_algorithm_returns`
(1 + predictions_df[["actual_returns", "trading_algorithm_returns"]]).cumprod().hvplot()