# High Frequency Trading Algorithm

## Part 1 Preparing The Data For Training And Testing

### Initial Set-Up

In [1]:
# Initial Imports 
import os
from pathlib import Path
import alpaca_trade_api as tradeapi
import pandas as pd
import numpy as np
import datetime
import time
from dotenv import load_dotenv
import schedule
import joblib
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import balanced_accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.naive_bayes import GaussianNB as gnb
from imblearn.over_sampling import RandomOverSampler
from collections import Counter
from sklearn.model_selection import train_test_split

In [2]:
# Loading the .env enviroment variables
load_dotenv()

True

In [3]:
# Setting Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

In [4]:
# Creating the Alpaca API object, specifying use of the paper trading account:
api = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    base_url = 'https://paper-api.alpaca.markets',
    # 'https://paper-api.alpaca.markets/v1beta1/crypto'
    api_version = "v2"
)

In [5]:
# Testing for active or inactive 
TSLA_asset = api.get_asset('TSLA')
TSLA_asset

Asset({   'class': 'us_equity',
    'easy_to_borrow': True,
    'exchange': 'NASDAQ',
    'fractionable': True,
    'id': '8ccae427-5dd0-45b3-b5fe-7ba5e422c766',
    'marginable': True,
    'name': 'Tesla, Inc. Common Stock',
    'shortable': True,
    'status': 'active',
    'symbol': 'TSLA',
    'tradable': True})

### Data Preparation



In [6]:
# Defining a list of tickers
ticker_list = ["FB", "AMZN", "AAPL", "NFLX", "GOOGL", "MSFT","TSLA",]
# beginning and end date strings
beg_date = '2021-01-05'
end_date = '2021-01-05'
# Converting begin and end date for ALPACA API 
start =  pd.Timestamp(f'{beg_date} 09:30:00-0400', tz='America/New_York').replace(hour=9, minute=30, second=0).astimezone('GMT').isoformat()[:-6]+'Z'
end   =  pd.Timestamp(f'{end_date} 16:00:00-0400', tz='America/New_York').replace(hour=16, minute=0, second=0).astimezone('GMT').isoformat()[:-6]+'Z'
# setting the timeframe
timeframe='1Min'

In [7]:
# Pulling prices from ALPACA API
com_prices = api.get_barset(ticker_list, timeframe,limit=1000, start=start, end=end).df
com_prices

  


Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL,AMZN,AMZN,AMZN,AMZN,AMZN,...,NFLX,NFLX,NFLX,NFLX,NFLX,TSLA,TSLA,TSLA,TSLA,TSLA
Unnamed: 0_level_1,open,high,low,close,volume,open,high,low,close,volume,...,open,high,low,close,volume,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2021-01-05 09:30:00-05:00,128.960,129.485,128.45,129.485,51887,3166.80,3173.53,3166.80,3172.980,1963.0,...,521.980,521.980,520.77,521.030,1355.0,723.66,726.280,721.35,725.23,18284.0
2021-01-05 09:31:00-05:00,129.480,130.170,129.30,130.060,44188,3173.59,3182.67,3173.58,3177.810,1266.0,...,520.920,521.755,520.92,521.365,1112.0,726.60,726.999,722.42,723.00,7760.0
2021-01-05 09:32:00-05:00,130.170,130.320,129.93,130.020,12852,3175.00,3175.47,3174.91,3175.470,778.0,...,522.355,522.355,520.77,520.770,1347.0,723.10,723.100,719.78,720.57,9902.0
2021-01-05 09:33:00-05:00,130.090,130.140,129.78,130.120,14192,3181.52,3181.52,3177.87,3179.360,660.0,...,520.840,520.840,520.00,520.000,1582.0,720.53,722.710,719.22,719.71,7086.0
2021-01-05 09:34:00-05:00,130.150,130.580,130.15,130.510,12002,3183.66,3189.98,3183.66,3184.015,731.0,...,521.440,522.260,521.37,522.240,1039.0,719.97,724.220,719.97,724.22,8581.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-01-05 15:56:00-05:00,130.965,130.970,130.84,130.850,12768,3220.70,3220.84,3219.32,3219.840,1639.0,...,520.120,520.150,519.57,519.570,3542.0,732.89,733.490,732.78,733.00,4909.0
2021-01-05 15:57:00-05:00,130.860,131.010,130.86,131.010,17416,3219.95,3222.70,3219.60,3222.700,1951.0,...,519.850,520.460,519.76,520.460,1527.0,733.05,734.585,733.05,734.49,7281.0
2021-01-05 15:58:00-05:00,131.020,131.040,130.86,130.990,28102,3221.78,3221.79,3221.18,3221.180,1112.0,...,520.430,520.650,520.29,520.300,1804.0,734.28,734.830,734.09,734.83,11767.0
2021-01-05 15:59:00-05:00,130.985,131.090,130.95,130.965,26220,3220.14,3220.38,3219.03,3219.670,3164.0,...,520.360,520.890,520.24,520.760,7235.0,734.63,735.420,734.62,735.33,13446.0


In [8]:
# Checking Information
com_prices.dtypes

AAPL   open      float64
       high      float64
       low       float64
       close     float64
       volume      int64
AMZN   open      float64
       high      float64
       low       float64
       close     float64
       volume    float64
FB     open      float64
       high      float64
       low       float64
       close     float64
       volume    float64
GOOGL  open      float64
       high      float64
       low       float64
       close     float64
       volume    float64
MSFT   open      float64
       high      float64
       low       float64
       close     float64
       volume    float64
NFLX   open      float64
       high      float64
       low       float64
       close     float64
       volume    float64
TSLA   open      float64
       high      float64
       low       float64
       close     float64
       volume    float64
dtype: object

In [9]:
# Creating empty DataFrame for closing prices
closing_prices_df = pd.DataFrame({
    "FB": com_prices["FB"].close,
    "AMZN": com_prices["AMZN"].close,
    "AAPL": com_prices["AAPL"].close,
    "NFLX": com_prices["NFLX"].close,
    "GOOGL": com_prices["GOOGL"].close,
    "MSFT": com_prices["MSFT"].close,
    "TSLA": com_prices["TSLA"].close,
    }, index=com_prices.index
)

In [10]:
# first five rows
closing_prices_df.head(5)

Unnamed: 0_level_0,FB,AMZN,AAPL,NFLX,GOOGL,MSFT,TSLA
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-05 09:30:00-05:00,269.0,3172.98,129.485,521.03,1724.17,217.65,725.23
2021-01-05 09:31:00-05:00,269.17,3177.81,130.06,521.365,1724.05,217.63,723.0
2021-01-05 09:32:00-05:00,269.72,3175.47,130.02,520.77,1721.61,217.77,720.57
2021-01-05 09:33:00-05:00,268.8,3179.36,130.12,520.0,,217.72,719.71
2021-01-05 09:34:00-05:00,269.58,3184.015,130.51,522.24,1720.3,217.31,724.22


In [11]:
# last five rows
closing_prices_df.tail(5)

Unnamed: 0_level_0,FB,AMZN,AAPL,NFLX,GOOGL,MSFT,TSLA
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-05 15:56:00-05:00,270.65,3219.84,130.85,519.57,1738.15,217.97,733.0
2021-01-05 15:57:00-05:00,270.91,3222.7,131.01,520.46,1738.99,218.175,734.49
2021-01-05 15:58:00-05:00,270.88,3221.18,130.99,520.3,1738.84,218.15,734.83
2021-01-05 15:59:00-05:00,270.86,3219.67,130.965,520.76,1740.57,218.0,735.33
2021-01-05 16:00:00-05:00,,,131.14,,,,


In [12]:
# Using forward fill function to fill missing values 
closing_prices_df.ffill(inplace=True)
closing_prices_df.head()

Unnamed: 0_level_0,FB,AMZN,AAPL,NFLX,GOOGL,MSFT,TSLA
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-05 09:30:00-05:00,269.0,3172.98,129.485,521.03,1724.17,217.65,725.23
2021-01-05 09:31:00-05:00,269.17,3177.81,130.06,521.365,1724.05,217.63,723.0
2021-01-05 09:32:00-05:00,269.72,3175.47,130.02,520.77,1721.61,217.77,720.57
2021-01-05 09:33:00-05:00,268.8,3179.36,130.12,520.0,1721.61,217.72,719.71
2021-01-05 09:34:00-05:00,269.58,3184.015,130.51,522.24,1720.3,217.31,724.22


### Computing Returns

In [13]:
# Setting prediction period
forecast = 1
# pct_change using period forecast  
returns_df = closing_prices_df.pct_change(periods=forecast)
# Shift the returns to convert them to forward returns
returns_df = returns_df.shift(-(forecast))
# View the DataFrame
returns_df.head()

Unnamed: 0_level_0,FB,AMZN,AAPL,NFLX,GOOGL,MSFT,TSLA
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-01-05 09:30:00-05:00,0.000632,0.001522,0.004441,0.000643,-7e-05,-9.2e-05,-0.003075
2021-01-05 09:31:00-05:00,0.002043,-0.000736,-0.000308,-0.001141,-0.001415,0.000643,-0.003361
2021-01-05 09:32:00-05:00,-0.003411,0.001225,0.000769,-0.001479,0.0,-0.00023,-0.001193
2021-01-05 09:33:00-05:00,0.002902,0.001464,0.002997,0.004308,-0.000761,-0.001883,0.006266
2021-01-05 09:34:00-05:00,0.001335,0.002074,0.000651,-0.001704,0.003061,-0.001703,0.004667


In [14]:
# Using unstack() to bring the data in long format and save the output as as dataframe
returns_df = pd.DataFrame(returns_df.unstack(level=0))
# Rename the column to make it easer to identify it:
name = f'F_{forecast}_m_returns'
returns_df.rename(columns={0: name}, inplace = True)
# Reset the index of the dataframe for merging later
returns_df.reset_index(inplace=True)

In [15]:
# Preview the first five rows
returns_df.head(5)

Unnamed: 0,level_0,time,F_1_m_returns
0,FB,2021-01-05 09:30:00-05:00,0.000632
1,FB,2021-01-05 09:31:00-05:00,0.002043
2,FB,2021-01-05 09:32:00-05:00,-0.003411
3,FB,2021-01-05 09:33:00-05:00,0.002902
4,FB,2021-01-05 09:34:00-05:00,0.001335


In [16]:
# Preview the last five rows
returns_df.tail(5)

Unnamed: 0,level_0,time,F_1_m_returns
2732,TSLA,2021-01-05 15:56:00-05:00,0.002033
2733,TSLA,2021-01-05 15:57:00-05:00,0.000463
2734,TSLA,2021-01-05 15:58:00-05:00,0.00068
2735,TSLA,2021-01-05 15:59:00-05:00,0.0
2736,TSLA,2021-01-05 16:00:00-05:00,


#### Creating 1 min 5 min and 10 min Momentum Returns Algorithm

In [17]:
# Creating a list of momentums that we want to predict
list_of_momentums = [1,5,10]
for i in list_of_momentums:   
    # Computing percentage change for each one of the momentums in the momentum list
    pct_change = closing_prices_df.pct_change(i)
    # Unstacking the returns and save the output as as dataframe called returns_temp 
    returns_temp = pd.DataFrame(pct_change.unstack(level=0))
    # Rename the column to make it easer to identify it
    name = f'{i}_m_returns'
    returns_temp.rename(columns={0: name}, inplace = True)
    # Reset the index so can merge based on index
    returns_temp.reset_index(inplace=True)
    # Merge returns_temp  with the original returns 
    returns_df = pd.merge(returns_df,returns_temp,left_on=['level_0', 'time'],right_on=['level_0', 'time'], how='left', suffixes=('_original', 'right'))
returns_df.head(11)

Unnamed: 0,level_0,time,F_1_m_returns,1_m_returns,5_m_returns,10_m_returns
0,FB,2021-01-05 09:30:00-05:00,0.000632,,,
1,FB,2021-01-05 09:31:00-05:00,0.002043,0.000632,,
2,FB,2021-01-05 09:32:00-05:00,-0.003411,0.002043,,
3,FB,2021-01-05 09:33:00-05:00,0.002902,-0.003411,,
4,FB,2021-01-05 09:34:00-05:00,0.001335,0.002902,,
5,FB,2021-01-05 09:35:00-05:00,0.000185,0.001335,0.003494,
6,FB,2021-01-05 09:36:00-05:00,0.000778,0.000185,0.003046,
7,FB,2021-01-05 09:37:00-05:00,-0.000777,0.000778,0.00178,
8,FB,2021-01-05 09:38:00-05:00,0.001,-0.000777,0.004427,
9,FB,2021-01-05 09:39:00-05:00,7.4e-05,0.001,0.002522,


In [18]:
# Using dropna() to get rid of those missing values.
returns_df.dropna(inplace=True)
# Creating a multi index based on level_0 and time
returns_df.set_index(['level_0','time'], inplace=True)
returns_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,F_1_m_returns,1_m_returns,5_m_returns,10_m_returns
level_0,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
FB,2021-01-05 09:40:00-05:00,0.000814,7.4e-05,0.00126,0.004758
FB,2021-01-05 09:41:00-05:00,0.000887,0.000814,0.001889,0.004941
FB,2021-01-05 09:42:00-05:00,0.000628,0.000887,0.001999,0.003782
FB,2021-01-05 09:43:00-05:00,0.00048,0.000628,0.003408,0.00785
FB,2021-01-05 09:44:00-05:00,-0.001291,0.00048,0.002886,0.005416


## Part 2


### Training The Data

In [19]:
# Creating a separate dataframe for features with values as ints
X = returns_df.iloc[:,1:4]
y = []
# Creating the target variable
for row in returns_df["F_1_m_returns"]:
    if row > 0:
        y.append(1)

    elif row <= 0:
        y.append(0)

In [20]:
# Splitting the dataset to train and test 
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, shuffle=False)

In [21]:
# Using Counter to count the number 1s and 0 in y_train
Counter(y_train)

Counter({1: 730, 0: 1265})

In [22]:
# Using RandomOverSampler to resample the data using random_state=1
ros = RandomOverSampler(random_state=1)
X_resampled, y_resampled = ros.fit_resample(X_train, y_train)

In [23]:
# Using Counter again to check they are equal
Counter(y_resampled)

Counter({1: 1265, 0: 1265})

## Machine Learning

#### GaussianNB

In [24]:
# Creating a GaussianNB model and training it on the X_resampled data
gnb_model = gnb()
gnb_model.fit(X_resampled, y_resampled)  

# Using the trained model to predict using X_test
y_pred = gnb_model.predict(X_test)   

# Printing out a classification report to evaluate the performance
print(classification_report(y_test, y_pred, digits=4))

              precision    recall  f1-score   support

           0     0.5022    0.6976    0.5840       334
           1     0.4975    0.3021    0.3759       331

    accuracy                         0.5008       665
   macro avg     0.4998    0.4999    0.4799       665
weighted avg     0.4998    0.5008    0.4804       665



#### LogisticRegression

In [25]:
# Creating a LogisticRegression model and training it on the X_resampled data
log_model = LogisticRegression()
log_model.fit(X_resampled, y_resampled)  

# Using the trained model to predict using X_test
y_pred = log_model.predict(X_test)   

# Printing out a classification report to evaluate performance
print(classification_report(y_test, y_pred, digits=4))

              precision    recall  f1-score   support

           0     0.5299    0.5299    0.5299       334
           1     0.5257    0.5257    0.5257       331

    accuracy                         0.5278       665
   macro avg     0.5278    0.5278    0.5278       665
weighted avg     0.5278    0.5278    0.5278       665



#### RandomForestClassifier

In [26]:
# Creating a RandomForest model and training it on the X_resampled data
rfc_model = RandomForestClassifier()
rfc_model.fit(X_resampled, y_resampled)  

# Using the trained model to predict using X_test
y_pred = rfc_model.predict(X_test)   

# Printing out a classification report to evaluate performance
print(classification_report(y_test, y_pred, digits=4))

# Printing out a balanced accuracy score report to evaluate performance
print(f"Balanced Accuracy Score: {balanced_accuracy_score(y_test, y_pred)}")

# Calculating the Sharpe Ratio
sharpe_ratio = y_pred.mean() / y_pred.std()
print(f"Sharpe Ratio: {sharpe_ratio}")

              precision    recall  f1-score   support

           0     0.5012    0.6377    0.5613       334
           1     0.4958    0.3595    0.4168       331

    accuracy                         0.4992       665
   macro avg     0.4985    0.4986    0.4890       665
weighted avg     0.4985    0.4992    0.4894       665

Balanced Accuracy Score: 0.4986205836062015
Sharpe Ratio: 0.7514691493021793


#### GradientBoostingClassifier

In [27]:
# Creating a GradientBoosting model and training it on the X_resampled data
gbc_model = GradientBoostingClassifier()
gbc_model.fit(X_resampled, y_resampled)  

# Using the trained model to predict using X_test
y_pred = gbc_model.predict(X_test)   

# Printing out a classification report to evaluate performance
print(classification_report(y_test, y_pred, digits=4))

# Printing out a balanced accuracy score report to evaluate performance
print(f"Balanced Accuracy Score: {balanced_accuracy_score(y_test, y_pred)}")

# Calculating the Sharpe Ratio
sharpe_ratio = y_pred.mean() / y_pred.std()
print(f"Sharpe Ratio: {sharpe_ratio}")

              precision    recall  f1-score   support

           0     0.5122    0.5030    0.5076       334
           1     0.5074    0.5166    0.5120       331

    accuracy                         0.5098       665
   macro avg     0.5098    0.5098    0.5098       665
weighted avg     0.5098    0.5098    0.5098       665

Balanced Accuracy Score: 0.5098051630877218
Sharpe Ratio: 1.0136266691392073


#### AdaBoostClassifier

In [28]:
# Creating a AdaBoost model and training it on the X_resampled data
abc_model = AdaBoostClassifier()
abc_model.fit(X_resampled, y_resampled)  

# Using the trained model to predict using X_test
y_pred = abc_model.predict(X_test)   

# Printing out a classification report to evaluate performance
print(classification_report(y_test, y_pred, digits=4))

# Printing out a balanced accuracy score report to evaluate performance
print(f"Balanced Accuracy Score: {balanced_accuracy_score(y_test, y_pred)}")

# Calculating the Sharpe Ratio
sharpe_ratio = y_pred.mean() / y_pred.std()
print(f"Sharpe Ratio: {sharpe_ratio}")

              precision    recall  f1-score   support

           0     0.4875    0.4102    0.4455       334
           1     0.4870    0.5650    0.5231       331

    accuracy                         0.4872       665
   macro avg     0.4873    0.4876    0.4843       665
weighted avg     0.4873    0.4872    0.4841       665

Balanced Accuracy Score: 0.4875671617490095
Sharpe Ratio: 1.1689944579443594


#### XGBClassifier

In [29]:
# Creating a XGBClassifier model and training it on the X_resampled data
xgbc_model = XGBClassifier()
xgbc_model.fit(X_resampled, y_resampled)  

# Using the trained model to predict using X_test
y_pred = xgbc_model.predict(X_test)   

# Printing out a classification report to evaluate performance
print(classification_report(y_test, y_pred, digits=4))

# Printing out a balanced accuracy score report to evaluate performance
print(f"Balanced Accuracy Score: {balanced_accuracy_score(y_test, y_pred)}")

# Calculating the Sharpe Ratio
sharpe_ratio = y_pred.mean() / y_pred.std()
print(f"Sharpe Ratio: {sharpe_ratio}")





              precision    recall  f1-score   support

           0     0.5128    0.6018    0.5537       334
           1     0.5128    0.4230    0.4636       331

    accuracy                         0.5128       665
   macro avg     0.5128    0.5124    0.5086       665
weighted avg     0.5128    0.5128    0.5089       665

Balanced Accuracy Score: 0.5123785661305787
Sharpe Ratio: 0.8345229603962802


In [30]:
# Using the joblib library to save the model to use for trading
joblib.dump(log_model, 'log_model.pkl')

['log_model.pkl']

## Part 3 Implementing The Strongest Model Using Apaca Paper Trading API

### Developing The Algorithm


In [31]:
# Creating the list of tickers
ticker_list = ['FB','AMZN','AAPL','NFLX', 'GOOGL', 'MSFT', 'TSLA']

# Defining the Dates
beg_date = '2020-01-06'
end_date = '2021-01-06'

# Converting the date for Alpaca API 
start =  pd.Timestamp(f'{beg_date} 09:30:00-0400', tz='America/New_York').replace(hour=9, minute=30, second=0).astimezone('GMT').isoformat()[:-6]+'Z'
end   =  pd.Timestamp(f'{end_date} 16:00:00-0400', tz='America/New_York').replace(hour=15, minute=0, second=0).astimezone('GMT').isoformat()[:-6]+'Z'
timeframe='1Min'

# Getting the last 10 mins every time we pull new data
prices = api.get_barset(ticker_list, "minute", start=start, end=end).df.iloc[-11:]
prices.ffill(inplace=True)   

# Creating an empty DataFrame for closing prices
df_closing_prices = pd.DataFrame()

# Fetching the closing prices of the tickers
df_closing_prices["FB"] = prices["FB"]["close"]
df_closing_prices["AMZN"] = prices["AMZN"]["close"]
df_closing_prices["AAPL"] = prices["AAPL"]["close"]
df_closing_prices["NFLX"] = prices["NFLX"]["close"]
df_closing_prices["GOOGL"] = prices["GOOGL"]["close"]
df_closing_prices['MSFT'] = prices['MSFT']["close"]
df_closing_prices['TSLA'] = prices['TSLA']["close"]

# Printing df 
print(df_closing_prices.head(20))

  


                                FB      AMZN     AAPL    NFLX    GOOGL  \
time                                                                     
2021-01-06 14:50:00-05:00  264.610  3146.960  127.110  506.54  1721.82   
2021-01-06 14:51:00-05:00  264.630  3146.910  127.430  506.54  1721.82   
2021-01-06 14:52:00-05:00  264.830  3147.980  127.720  506.69  1723.67   
2021-01-06 14:53:00-05:00  264.525  3148.570  127.510  506.01  1723.67   
2021-01-06 14:54:00-05:00  264.560  3147.840  127.645  506.01  1720.84   
2021-01-06 14:55:00-05:00  264.880  3150.330  127.920  506.30  1720.60   
2021-01-06 14:56:00-05:00  264.965  3150.610  128.150  506.72  1721.10   
2021-01-06 14:57:00-05:00  264.980  3151.745  127.980  507.07  1720.07   
2021-01-06 14:58:00-05:00  265.000  3149.280  127.850  506.33  1720.07   
2021-01-06 14:59:00-05:00  265.360  3150.840  127.930  506.13  1720.48   
2021-01-06 15:00:00-05:00  264.840  3148.580  127.630  506.43  1720.48   

                              MSFT   

In [32]:
# Creating a list of momentums
list_of_momentums = [1,5,10]

#Creating a for loop to iterate through the closing prices and pct change and d
for i in list_of_momentums:  
    # Computing percentage change for each one of the momentums
    returns_temp = df_closing_prices.pct_change(i)
    # Unstacking the returns 
    returns_temp = pd.DataFrame(returns_temp.unstack())
    name = f'{i}_m_returns'
    returns_temp.rename(columns={0: name}, inplace = True)
    # Resetting the index 
    returns_temp.reset_index(inplace = True)
    # Merging newly computed returns with previously created returns
    if i == 1:
        returns = returns_temp
    else:
        returns = pd.merge(returns,returns_temp,left_on=['level_0', 'time'],right_on=['level_0', 'time'], how='left', suffixes=('_original', 'right'))

# Dropping all nulls and set index
returns.dropna(axis=0, how='any', inplace=True)
returns.set_index(['level_0', 'time'], inplace=True)

# Generating feature data and viewing first 10 rows.
X = returns
X.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,1_m_returns,5_m_returns,10_m_returns
level_0,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
FB,2021-01-06 15:00:00-05:00,-0.00196,-0.000151,0.000869
AMZN,2021-01-06 15:00:00-05:00,-0.000717,-0.000555,0.000515
AAPL,2021-01-06 15:00:00-05:00,-0.002345,-0.002267,0.004091
NFLX,2021-01-06 15:00:00-05:00,0.000593,0.000257,-0.000217
GOOGL,2021-01-06 15:00:00-05:00,0.0,-7e-05,-0.000778
MSFT,2021-01-06 15:00:00-05:00,-0.00077,-0.001213,0.000654
TSLA,2021-01-06 15:00:00-05:00,-0.000735,-0.001587,0.0105


In [33]:
# Loading the previously trained and saved model using joblib
model = joblib.load('log_model.pkl')

In [34]:
# Using the model file to predict on X
y_pred = model.predict(X)

# Converting y_pred to a dataframe, setting the index to X index
y_df = pd.DataFrame(y_pred, index=X.index)

# Renaming the column 0 to 'buy'
y_df.rename(columns={0: "buy"}, inplace = True)
y_df

Unnamed: 0_level_0,Unnamed: 1_level_0,buy
level_0,time,Unnamed: 2_level_1
FB,2021-01-06 15:00:00-05:00,1
AMZN,2021-01-06 15:00:00-05:00,1
AAPL,2021-01-06 15:00:00-05:00,1
NFLX,2021-01-06 15:00:00-05:00,0
GOOGL,2021-01-06 15:00:00-05:00,1
MSFT,2021-01-06 15:00:00-05:00,1
TSLA,2021-01-06 15:00:00-05:00,0


In [35]:
# Filterring the stocks where 'buy' is equal to 1
y_pred = y_df.loc[y_df["buy"] == 1]
y_pred

Unnamed: 0_level_0,Unnamed: 1_level_0,buy
level_0,time,Unnamed: 2_level_1
FB,2021-01-06 15:00:00-05:00,1
AMZN,2021-01-06 15:00:00-05:00,1
AAPL,2021-01-06 15:00:00-05:00,1
GOOGL,2021-01-06 15:00:00-05:00,1
MSFT,2021-01-06 15:00:00-05:00,1


In [36]:
# Creating a dictionary from y_pred and assign a 'n' to each of them for now as a placeholder.
buy_dict = dict.fromkeys(y_pred.index.get_level_values(0), 'n')
buy_dict

{'FB': 'n', 'AMZN': 'n', 'AAPL': 'n', 'GOOGL': 'n', 'MSFT': 'n'}

In [37]:
# Pulling the total available equity in our account from the Alpaca API
account = api.get_account()
total_capital = float(account.equity)
print(f"Total available capital: {total_capital}")

Total available capital: 100000.0


In [38]:
# Computing capital per stock, divide equity in account by number of stocks
# Use Alpaca API to pull the equity in the account
if len(buy_dict) > 0:
    capital_per_stock = float(total_capital)/ len(buy_dict)
else:
    capital_per_stock = 0
print(f'Capital per stock: {capital_per_stock}')

Capital per stock: 20000.0


In [39]:
# Using a for loop to iterate through dictionary of buys 
# Determining the number stocks we need to buy for each ticker
for ticker in buy_dict:
    try:
        buy_dict[ticker] = int(capital_per_stock /int(prices[ticker].iloc[-1]['close']))
    except:
        pass

print(buy_dict)

{'FB': 75, 'AMZN': 6, 'AAPL': 157, 'GOOGL': 11, 'MSFT': 93}


In [40]:
# Cancelling all previous orders in the Alpaca API
api.cancel_all_orders()

# Selling all currently held stocks to close all positions
api.close_all_positions()

[]

In [41]:
# Iterating through the long list object and sending a buy order for each ticker with a corresponding number of shares:
for stock, qty in buy_dict.items():    
    # Submitting a market order to buy shares as described in buy_dict
    api.submit_order(
        symbol=stock,
        qty=qty,
        side='buy',
        type='market',
        time_in_force='gtc',
    )
    print(f'buying {stock} numShares {qty}')

buying FB numShares 75
buying AMZN numShares 6
buying AAPL numShares 157
buying GOOGL numShares 11
buying MSFT numShares 93


### Automating The Algorithm

In [42]:
# Adding all of the steps conducted above into the function trade
def trade():

    # Setting a list of the Symbols for tickers 
    ticker_list = ['FB','AMZN','AAPL','NFLX', 'GOOGL', 'MSFT', 'TSLA']
    # setting timeframe for pull
    timeframe='1Min'
    # Using iloc to get the last 10 mins every time we pull new data
    com_prices = api.get_barset(ticker_list, "minute").df.iloc[-11:]
    com_prices.ffill(inplace=True)   

    # Creating and empty DataFrame for closing prices
    closing_prices_df = pd.DataFrame()

    # Adding the closing prices to the new Dataframe
    closing_prices_df = pd.DataFrame({
    "FB": com_prices["FB"].close,
    "AMZN": com_prices["AMZN"].close,
    "AAPL": com_prices["AAPL"].close,
    "NFLX": com_prices["NFLX"].close,
    "GOOGL": com_prices["GOOGL"].close,
    "MSFT": com_prices["MSFT"].close,
    "TSLA": com_prices["TSLA"].close,
    }, index=com_prices.index
)
    print(closing_prices_df.head())
    
    # Looping through momentums to build new DataFrame
    list_of_momentums = [1,5,10]
    for i in list_of_momentums:   
        returns_temp = closing_prices_df.pct_change(i)
        returns_temp = pd.DataFrame(returns_temp.unstack())
        name = f'{i}_m_returns'
        returns_temp.rename(columns={0: name}, inplace = True)
        returns_temp.reset_index(inplace = True)
        if i ==1:
            returns_df = returns_temp
        else:
            returns_df = pd.merge(returns_df,returns_temp,left_on=['level_0', 'time'],right_on=['level_0', 'time'], how='left', suffixes=('_original', 'right'))

    # Dropping all nulls and setting index            
    returns_df.dropna(axis=0, how='any', inplace=True)
    returns_df.set_index(['level_0', 'time'], inplace=True)

    # Preprocessing the data for model
    model = joblib.load('log_model.pkl')
    y_pred = model.predict(X)
    y_df = pd.DataFrame(y_pred, index=X.index)
    y_df.rename(columns={0: "buy"}, inplace = True)
    y_pred = y_df.loc[y_df["buy"] == 1]
    
    # Createing the `buy_dict` object
    buy_dict = dict.fromkeys(y_pred.index.get_level_values(0), 'n')
    
    # Splitting capital between stocks and determining buy or sell
    account = api.get_account()
    total_capital = float(account.equity)
    if len(buy_dict) > 0:
        capital_per_stock = float(total_capital)/ len(buy_dict)
    else:
        capital_per_stock = 0
    for ticker in buy_dict:
        try:
            buy_dict[ticker] = int(capital_per_stock /int(prices[ticker].iloc[-1]['close']))
        except:
            pass

    
    # Cancelling pending orders and closing positions
    api.cancel_all_orders()
    api.close_all_positions()
    
    # Submitting orders
    for stock, qty in buy_dict.items():    
        # Submitting a market order to buy shares as described in buy_dict
        api.submit_order(
            symbol=stock,
            qty=qty,
            side='buy',
            type='market',
            time_in_force='gtc',
        )
    print(f'buying {stock} numShares {qty}')


In [43]:
# Clearing the schedule
schedule.clear()

# Defining a schedule to run the trade function every minute at 5 seconds past the minute mark (e.g. 10:31:05)
trade_schedule = schedule.every().minute.at(":05").do(trade)

# Using the Alpaca API to check whether the market is open
clock = api.get_clock()

# Using run_pending() function inside schedule to execute the schedule you defined as long as the market is open
while clock.is_open == True:
    print(f'The market trading window for {clock.next_open} is open, executing trade function')
    schedule.run_pending()
    time.sleep(1)
else:
    print(f'The market is closed the next open market day will be {clock.next_open}')

The market is closed the next open market day will be 2022-03-14 09:30:00-04:00


In [44]:
# Schedule
schedule.get_jobs()

[Every 1 minute at 00:00:05 do trade() (last run: [never], next run: 2022-03-14 10:19:05)]