In [280]:
%matplotlib inline
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
import matplotlib.pyplot as plt
import hvplot.pandas
import numpy as np
import seaborn as sns
from pathlib import Path

In [281]:
# Define function to create Pickle File
def get_api_dataframe():
    # Set Alpaca API key and secret

    load_dotenv()
    api_key= os.getenv("ALPACA_API_KEY")
    sec_key= os.getenv("ALPACA_SECRET_KEY")

# Create the Alpaca API object

    alpaca = tradeapi.REST(
        api_key,
        sec_key,
        api_version="v2"
    )
# Get 3 years worth of historical data for Alcohol stocks, Cannabis Stocks, and S&P 500

    start_date = pd.Timestamp('2019-04-20', tz='America/New_York').isoformat()
    end_date = pd.Timestamp('2022-04-20', tz='America/New_York').isoformat()

    #tickers = ["BUD", "DEO", "SAM", "TAP", "STZ","VFF","JAZZ","CRON", "TLRY","CGC", "SPY"]
    tickers = ["VFF"]

# Set timeframe to "1Day" for Alpaca API
    timeframe = "1Day"

# Get current closing prices for All Stocks

    stock_data = alpaca.get_bars(
        tickers,
        timeframe,
        start = start_date,
        end = end_date
    ).df

#Set index to date
    stock_data.index = stock_data.index.date
    return stock_data

In [282]:
# Check if file exists
# If file does not exist then load api and save file
# else read data from pikl

vff_data_file = Path("vff_data.pkl")
if vff_data_file.is_file():
    print ("API data file exists")
    stock_data = pd.read_pickle(vff_data_file)
else:
    print ("API file does not exist") 
    stock_data = get_api_dataframe()
    stock_data.to_pickle(vff_data_file)

#View dataframe
stock_data.head(-5)

API data file exists


Unnamed: 0,open,high,low,close,volume,trade_count,vwap,symbol
2019-04-22,11.95,11.9500,11.2100,11.56,1350093,5960,11.552211,VFF
2019-04-23,11.55,11.6380,10.9500,11.08,1333892,5688,11.245282,VFF
2019-04-24,10.92,12.5900,10.8500,12.29,2360366,9658,12.061797,VFF
2019-04-25,12.31,12.4400,11.6000,11.62,1177232,5180,11.868467,VFF
2019-04-26,11.55,12.1800,11.3700,11.57,1097918,4723,11.702576,VFF
...,...,...,...,...,...,...,...,...
2022-04-06,5.16,5.2350,5.0301,5.14,480630,4559,5.137048,VFF
2022-04-07,5.13,5.1711,4.9600,5.04,356830,3783,5.061856,VFF
2022-04-08,5.01,5.0700,4.8200,4.83,1129375,6429,4.917287,VFF
2022-04-11,4.70,4.9200,4.6400,4.81,721407,5570,4.787688,VFF


In [283]:
# Calculate the daily returns using the closing prices and the pct_change function
stock_data["actual_returns"] = stock_data["close"].pct_change()

# Display sample data
stock_data.head()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,symbol,actual_returns
2019-04-22,11.95,11.95,11.21,11.56,1350093,5960,11.552211,VFF,
2019-04-23,11.55,11.638,10.95,11.08,1333892,5688,11.245282,VFF,-0.041522
2019-04-24,10.92,12.59,10.85,12.29,2360366,9658,12.061797,VFF,0.109206
2019-04-25,12.31,12.44,11.6,11.62,1177232,5180,11.868467,VFF,-0.054516
2019-04-26,11.55,12.18,11.37,11.57,1097918,4723,11.702576,VFF,-0.004303


In [284]:
# Drop all NaN values from the DataFrame
stock_data = stock_data.dropna()

# Review the DataFrame
display(stock_data.head())
display(stock_data.tail())

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,symbol,actual_returns
2019-04-23,11.55,11.638,10.95,11.08,1333892,5688,11.245282,VFF,-0.041522
2019-04-24,10.92,12.59,10.85,12.29,2360366,9658,12.061797,VFF,0.109206
2019-04-25,12.31,12.44,11.6,11.62,1177232,5180,11.868467,VFF,-0.054516
2019-04-26,11.55,12.18,11.37,11.57,1097918,4723,11.702576,VFF,-0.004303
2019-04-29,11.65,12.04,11.42,12.01,891479,4472,11.833013,VFF,0.038029


Unnamed: 0,open,high,low,close,volume,trade_count,vwap,symbol,actual_returns
2022-04-13,4.75,4.91,4.72,4.88,286697,3324,4.838941,VFF,0.027368
2022-04-14,4.89,4.92,4.69,4.69,385134,3018,4.79796,VFF,-0.038934
2022-04-18,4.7,4.7,4.4,4.47,588755,6521,4.504151,VFF,-0.046908
2022-04-19,4.45,4.6,4.34,4.52,533863,4550,4.478629,VFF,0.011186
2022-04-20,4.57,4.62,4.39,4.45,326564,3916,4.478399,VFF,-0.015487


In [285]:
#Create Feature Set and Window Sizes

short_window = 15

# Create an SMA that uses short_window, and assign it to a new column named “sma_fast”
stock_data["sma_fast"] = stock_data["close"].rolling(window=short_window).mean()


# Define a window size 
long_window = 50

# Create an SMA that uses long_window, and assign it to a new columns named “sma_slow”
stock_data["sma_slow"] = stock_data["close"].rolling(window=long_window).mean()

In [286]:
# Drop the NaNs using dropna()
stock_data = stock_data.dropna()

In [287]:
#Create lagged features and returns to calculate previous difference for predictions.
stock_data["sma_slow_lag"] = stock_data["sma_slow"].shift(1)
stock_data["sma_fast_lag"] = stock_data["sma_fast"].shift(1)
stock_data['l1'] = stock_data['actual_returns'].shift(1)
stock_data = stock_data.dropna()
stock_data

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,symbol,actual_returns,sma_fast,sma_slow,sma_slow_lag,sma_fast_lag,l1
2019-07-03,11.36,11.3600,10.6900,10.97,710535,3655,10.946243,VFF,-0.028344,11.736667,12.2814,12.2836,11.848000,-0.026724
2019-07-05,10.95,11.5600,10.7647,11.53,579857,3181,11.264105,VFF,0.051048,11.648667,12.2662,12.2814,11.736667,-0.028344
2019-07-08,11.40,11.4399,11.0700,11.40,474164,3164,11.246145,VFF,-0.011275,11.631333,12.2618,12.2662,11.648667,0.051048
2019-07-09,11.21,11.3500,11.0000,11.28,407629,1841,11.176613,VFF,-0.010526,11.568667,12.2560,12.2618,11.631333,-0.011275
2019-07-10,11.35,11.3500,10.5700,10.65,1047173,5167,10.905323,VFF,-0.055851,11.460000,12.2288,12.2560,11.568667,-0.010526
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-13,4.75,4.9100,4.7200,4.88,286697,3324,4.838941,VFF,0.027368,5.314667,5.1534,5.1636,5.352667,-0.012474
2022-04-14,4.89,4.9200,4.6900,4.69,385134,3018,4.797960,VFF,-0.038934,5.240667,5.1446,5.1534,5.314667,0.027368
2022-04-18,4.70,4.7000,4.4000,4.47,588755,6521,4.504151,VFF,-0.046908,5.132000,5.1346,5.1446,5.240667,-0.038934
2022-04-19,4.45,4.6000,4.3400,4.52,533863,4550,4.478629,VFF,0.011186,5.053333,5.1210,5.1346,5.132000,-0.046908


In [288]:
# Assign a copy of the `sma_fast` and `sma_slow` columns to a new DataFrame called `X`

X = stock_data[["sma_fast_lag", "sma_slow_lag", "l1"]].dropna().copy()
# Display sample data
display(X.head())
display(X.tail())

Unnamed: 0,sma_fast_lag,sma_slow_lag,l1
2019-07-03,11.848,12.2836,-0.026724
2019-07-05,11.736667,12.2814,-0.028344
2019-07-08,11.648667,12.2662,0.051048
2019-07-09,11.631333,12.2618,-0.011275
2019-07-10,11.568667,12.256,-0.010526


Unnamed: 0,sma_fast_lag,sma_slow_lag,l1
2022-04-13,5.352667,5.1636,-0.012474
2022-04-14,5.314667,5.1534,0.027368
2022-04-18,5.240667,5.1446,-0.038934
2022-04-19,5.132,5.1346,-0.046908
2022-04-20,5.053333,5.121,0.011186


In [289]:
#Create Target Set
# Create a new column in the `df_stock_data` called "signal" setting its value to zero.
stock_data["signal"] = 0.0

# Create the signal to buy
stock_data.loc[(stock_data["actual_returns"] >= 0), "signal"] = 0

# Create the signal to sell
stock_data.loc[(stock_data["actual_returns"] < 0), "signal"] = -1

In [290]:
# Copy the new "signal" column to a new Series called `y`.
y = stock_data["signal"].copy()

In [291]:
#Split the Data in Training and Test Sets
# Import required libraries
from pandas.tseries.offsets import DateOffset

# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2019-07-03


In [292]:
# Select the ending period for the training data with an offset of 18 months - Note: 24 Months worked better for both algos.
training_end = X.index.min() + DateOffset(months=24)

# Display the training end date
print(training_end)

2021-07-03 00:00:00


In [293]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

  return self._engine.get_loc(casted_key)
  return arr.searchsorted(value, side=side, sorter=sorter)


In [294]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end:]
y_test = y.loc[training_end:]

In [295]:
X_test.shape

(201, 3)

In [296]:
X_train.shape

(505, 3)

In [297]:
#Standardize the Data
# Import required libraries
from sklearn.preprocessing import StandardScaler

# Create a StandardScaler instance
scaler = StandardScaler()
 
# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)
 
# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [298]:
# Create trading strategy
# Import the SVM model
from sklearn import svm
from sklearn.metrics import classification_report

In [299]:
# Create the classifier model
svm_model = svm.SVC()
 
# Fit the model to the data using X_train_scaled and y_train
svm_model = svm_model.fit(X_train_scaled, y_train)

# Use the trained model to predict the trading signals for the training data
training_signal_predictions = svm_model.predict(X_train_scaled)

# Display the sample predictions
training_signal_predictions[:10]

array([ 0.,  0., -1.,  0.,  0.,  0.,  0.,  0., -1.,  0.])

In [300]:
training_report = classification_report(y_train, training_signal_predictions)

# Display the report
print(training_report, "VFF - Training Report")

              precision    recall  f1-score   support

        -1.0       0.55      0.59      0.57       256
         0.0       0.55      0.51      0.53       249

    accuracy                           0.55       505
   macro avg       0.55      0.55      0.55       505
weighted avg       0.55      0.55      0.55       505
 VFF - Training Report


In [301]:
#Backtesting a Machine Learning Trading Algorithm
# Use the trained model to predict the trading signals for the testing data.
testing_signal_predictions = svm_model.predict(X_test_scaled)

In [302]:
# Evaluate the model's ability to predict the trading signal for the testing data
testing_report = classification_report(y_test, testing_signal_predictions)

# Display the report
print(testing_report, "VFF - Testing Report")

              precision    recall  f1-score   support

        -1.0       0.53      0.44      0.48       117
         0.0       0.37      0.46      0.41        84

    accuracy                           0.45       201
   macro avg       0.45      0.45      0.45       201
weighted avg       0.46      0.45      0.45       201
 VFF - Testing Report


In [303]:
# Create a predictions DataFrame
predictions_df = pd.DataFrame(index=X_test.index)

predictions_df["predicted_signal"] = testing_signal_predictions

predictions_df["actual_returns"] = stock_data["actual_returns"]

predictions_df["trading_algorithm_returns"] = (
    predictions_df["actual_returns"] * predictions_df["predicted_signal"]
)

# Review the DataFrame
predictions_df.head(-5)

Unnamed: 0,predicted_signal,actual_returns,trading_algorithm_returns
2021-07-06,0.0,-0.024715,-0.000000
2021-07-07,0.0,-0.018519,-0.000000
2021-07-08,0.0,-0.011917,-0.000000
2021-07-09,0.0,0.025126,0.000000
2021-07-12,-1.0,0.007843,-0.007843
...,...,...,...
2022-04-06,0.0,-0.007722,-0.000000
2022-04-07,-1.0,-0.019455,0.019455
2022-04-08,-1.0,-0.041667,0.041667
2022-04-11,0.0,-0.004141,-0.000000


In [304]:
# Calculate and plot the cumulative returns for the `actual_returns` and the `trading_algorithm_returns`
(1 + predictions_df[["actual_returns", "trading_algorithm_returns"]]).cumprod().hvplot(
    xlabel = "Date",
    ylabel = "Returns",
    label = "Village Farms International (VFF) - Short Algo"
)