# Algo Trading News or Noise

## Imports
---

In [784]:
import pandas as pd
import numpy as np
import datetime as dt
from datetime import datetime
from dateutil.relativedelta import relativedelta
from pathlib import Path
%matplotlib inline
import matplotlib.pyplot as plt
import plotly.express as px
import hvplot.pandas
from dotenv import load_dotenv
import panel as pn
from panel.interact import interact
from panel import widgets
import alpaca_trade_api as tradeapi
import quandl
import os
import requests
import ccxt
import time

## Read in data
---

In [785]:
ETF_csv = Path("Resources/ETFs0.csv")
ETF_df = pd.read_csv(ETF_csv, index_col='Unnamed: 0', infer_datetime_format=True, parse_dates=True)
ETF_df = ETF_df.drop(ETF_df.index[0])
ETF_df.index = pd.to_datetime(ETF_df.index, utc=True).date
ETF_df.sort_index(inplace=True)
ETF_df.tail()

Unnamed: 0,SPY,XLF,XLK,XLU,XLI,XLV,XLY,XLP,XLB,XOP,XHB,XME,XRT,ETH,LTC
2020-12-24,368.9899,28.94,129.05,61.19,88.01,111.48,157.91,66.44,71.58,58.99,59.4,32.57,64.74,20.09,39.3
2020-12-28,372.14,29.07,130.52,61.48,88.09,111.72,159.68,66.97,71.28,57.92,58.41,32.4,65.07,20.25,39.41
2020-12-29,371.48,28.97,129.89,61.47,87.52,112.24,159.74,67.42,71.12,57.35,57.92,32.26,64.275,20.02,39.03
2020-12-30,371.93,29.11,129.83,61.79,88.13,112.24,160.69,67.13,72.1,59.32,58.16,33.4,64.91,20.26,38.68
2020-12-31,373.85,29.47,130.07,62.7,88.59,113.43,160.77,67.05,72.38,58.49,57.63,33.43,64.33,20.21,38.91


## Set analysis inputs
---


In [786]:
ticker = input("Please enter the symbol name in caps:\n")

# Set intial capital
initial_capital = input("Please input the amount of capital you have (USD):\n")
initial_capital = int(initial_capital)

# Set lot size
share_size = input("Please input the lot size you wish to trade:\n")
share_size = int(share_size)

# Set windows
short_window = input("Please choose a short moving average lookback window: 20, 50 days\n")
short_window = int(short_window)
long_window = input("Please choose a long moving average lookback window: 100, 200 days\n")
long_window = int(long_window)
bollinger_window = input("Please choose a 10, 20, 50 bollinger lookback window\n")
bollinger_window = int(bollinger_window)

Please enter the symbol name in caps:
 SPY
Please input the amount of capital you have (USD):
 1000
Please input the lot size you wish to trade:
 100
Please choose a short moving average lookback window: 20, 50 days
 20
Please choose a long moving average lookback window: 100, 200 days
 100
Please choose a 10, 20, 50 bollinger lookback window
 20



## Signal generation
---
### SMA
*Simple Moving Average for a given lookback window*

In [787]:
# Grab `ticker` from the dataset
SMA_df = ETF_df[f'{ticker}'].copy().to_frame()

In [788]:
# Generate the short and long moving averages (50 and 100 days, respectively)
SMA_df["SMA_short"] = SMA_df[{ticker}].rolling(window=short_window).mean()
SMA_df["SMA_long"] = SMA_df[{ticker}].rolling(window=long_window).mean()

In [789]:
# Initialize the new `Signal` column
SMA_df["signal"] = 0.0

# Generate the trading signal -1, 1,
# where -1 is when the SMA50 is under the SMA100, and
# where 1 is when the SMA50 is higher (or crosses over) the SMA100
SMA_df["signal"][short_window:] = np.where(
    SMA_df["SMA_short"][short_window:] < SMA_df["SMA_long"][short_window:], -1.0, 1.0
)

In [790]:
# Calculate the points in time at which a position should be taken, 1 or -1
SMA_df["entry/exit"] = SMA_df["signal"].diff()

In [791]:
# Tidy up
SMA_df.dropna(inplace=True)
SMA_df.tail()

Unnamed: 0,SPY,SMA_short,SMA_long,signal,entry/exit
2020-12-24,368.9899,367.568975,347.260294,1.0,0.0
2020-12-28,372.14,367.993475,347.661094,1.0,0.0
2020-12-29,371.48,368.467475,348.032794,1.0,0.0
2020-12-30,371.93,368.765475,348.406594,1.0,0.0
2020-12-31,373.85,369.11998,348.789594,1.0,0.0


---
### EMA
*Exponential-weighted Moving Average for a given lookback window*

In [792]:
# Grab `ticker` from the dataset
EMA_df = ETF_df[f'{ticker}'].copy().to_frame()

In [793]:
# Construct a `Fast` and `Slow` Exponential Moving Average from short and long windows, respectively
EMA_df['EMA_short'] = EMA_df[{ticker}].ewm(halflife=short_window).mean()
EMA_df['EMA_long'] = EMA_df[{ticker}].ewm(halflife=long_window).mean()

In [794]:
# Initialize signal column
EMA_df["signal"] = 0.0

# Construct a crossover trading signal
# where -1 is when the EMA50 is under the EMA200, and
# where 1 is when the EMA50 is higher (or crosses over) the EMA200
EMA_df["signal"][short_window:] = np.where(
    EMA_df["EMA_short"][short_window:] < EMA_df["EMA_long"][short_window:], -1.0, 1.0
)

In [795]:
# Calculate the points in time at which a position should be taken, 1 or -1
EMA_df["entry/exit"] = EMA_df["signal"].diff()

In [796]:
# Tidy up
EMA_df.dropna(inplace=True)
EMA_df.tail()

Unnamed: 0,SPY,EMA_short,EMA_long,signal,entry/exit
2020-12-24,368.9899,356.165678,324.284557,1.0,0.0
2020-12-28,372.14,356.709822,324.615119,1.0,0.0
2020-12-29,371.48,357.212948,324.938838,1.0,0.0
2020-12-30,371.93,357.714265,325.26343,1.0,0.0
2020-12-31,373.85,358.263908,325.599042,1.0,0.0


---
### BBD
*Bollinger Bands for a given lookback window*

In [797]:
# Grab `ticker` from the dataset
BBD_df = ETF_df[f'{ticker}'].copy().to_frame()

In [798]:
# Calculate rolling mean and standard deviation
BBD_df['BBD_mid'] = BBD_df[{ticker}].rolling(window=bollinger_window).mean()
BBD_df['BBD_std'] = BBD_df[{ticker}].rolling(window=bollinger_window).std()

In [799]:
# Calculate upper and lowers bands of bollinger band
BBD_df['BBD_upper']  = BBD_df['BBD_mid'] + (BBD_df['BBD_std'] * 1)
BBD_df['BBD_lower']  = BBD_df['BBD_mid'] - (BBD_df['BBD_std'] * 1)

In [800]:
# Construct a crossover trading signal
# where 1 is when the close is under the lower bound, and
# where -1 is when the close is higher the upper bound
BBD_df['long'] = np.where(BBD_df[f'{ticker}'] < BBD_df['BBD_lower'], 1.0, 0.0)
BBD_df['short'] = np.where(BBD_df[f'{ticker}'] > BBD_df['BBD_upper'], -1.0, 0.0)

In [801]:
# Calculate bollinger band trading signal
BBD_df['signal'] = BBD_df['long'] + BBD_df['short']
BBD_df["entry/exit"] = BBD_df["signal"].diff()

In [802]:
# Tidy up
BBD_df.dropna(inplace=True)
BBD_df.tail()

Unnamed: 0,SPY,BBD_mid,BBD_std,BBD_upper,BBD_lower,long,short,signal,entry/exit
2020-12-24,368.9899,367.568975,2.43963,370.008605,365.129345,0.0,0.0,0.0,0.0
2020-12-28,372.14,367.993475,2.460381,370.453856,365.533094,0.0,-1.0,-1.0,-1.0
2020-12-29,371.48,368.467475,2.136852,370.604327,366.330623,0.0,-1.0,-1.0,0.0
2020-12-30,371.93,368.765475,2.185264,370.950739,366.580211,0.0,-1.0,-1.0,0.0
2020-12-31,373.85,369.11998,2.406666,371.526646,366.713314,0.0,-1.0,-1.0,0.0


---
### RSI
*Relative Strength Index for a given lookback window*

In [803]:
########################### TO DO ########################### 

---
### MACD
*Moving Average Convergence/ Divergence for a given lookback window*

In [804]:
########################### TO DO ########################### 

## Backtesting
---
### Simulated portfolio
*Evaluate historic signal performance*

In [805]:
# Take a 500 share position where the dual moving average crossover is 1 (SMA50 is greater than SMA100)
SMA_df["position"] = share_size * SMA_df["signal"]
EMA_df["position"] = share_size * EMA_df["signal"]
BBD_df["position"] = share_size * BBD_df["signal"]

In [806]:
def signal_position(df, share_size, initial_capital, ticker):
    df['position'] = share_size * df['signal']
    df["entry/exit position"] = df["position"].diff()
    df["portfolio holdings"] = (df[f'{ticker}'] * df["entry/exit position"].cumsum())
    df["portfolio cash"] = (initial_capital - (df[f'{ticker}'] * df["entry/exit position"]).cumsum())
    df["portfolio total"] = (df["portfolio cash"] + df["portfolio holdings"])
    df["portfolio daily returns"] = df["portfolio total"].pct_change()
    df["portfolio cumulative returns"] = (1 + df["portfolio daily returns"]).cumprod() - 1
    df.dropna(inplace=True)
    return df
    
SMA_df = signal_position(SMA_df, share_size, initial_capital, ticker)
EMA_df = signal_position(EMA_df, share_size, initial_capital, ticker)
BBD_df = signal_position(BBD_df, share_size, initial_capital, ticker)

---
### Descriptive statistics
*Evaluate backtesting*

In [815]:
# Prepare DataFrame for metrics
metrics = [
    'Annual Return (%)',
    'Cumulative Returns (%)',
    'Annual Volatility (%)',
    'Sharpe Ratio',
    'Sortino Ratio',
    'Max Drawdown (%)']

columns = ['SMA', 'EMA', 'BBD', 'RSI', 'MACD']

In [816]:
# Initialize the DataFrame with index set to evaluation metrics and columns
eval_df = pd.DataFrame(index=metrics, columns=columns)
eval_df.index.name = f'{ticker}' 

In [817]:
def evalll_df(ticker):
    eval_df = pd.DataFrame(index=metrics, columns=columns)
    eval_df.index.name = f'{ticker}' 
        
    def evall_df(df, signal, ticker):
        eval_df.loc['Cumulative Returns (%)', signal] = round(df['portfolio cumulative returns'][-1],2)
        eval_df.loc['Annual Return (%)', signal] = round((df['portfolio daily returns'].mean() * 252),2)
        eval_df.loc['Annual Volatility (%)', signal] = round(df['portfolio daily returns'].std() * np.sqrt(252),2)
        eval_df.loc['Sharpe Ratio', signal] = round((df['portfolio daily returns'].mean() * 252) / (df['portfolio daily returns'].std() * np.sqrt(252)), 2)
        df.loc[:,'downside returns'] = 0
        df_mask = df['portfolio daily returns'] < 0
        df.loc[df_mask, 'downside returns'] = df['portfolio daily returns']**2
        df_sortino = (df['portfolio daily returns'].mean() * 252) / (np.sqrt(df['downside returns'].mean()) * np.sqrt(252))
        eval_df.loc['Sortino Ratio', signal] = round(df_sortino,2)
        eval_df.loc['Max Drawdown (%)', signal] = round(df['portfolio cumulative returns'].diff(short_window).min(),2)
        return eval_df
    eval_df.loc[['Cumulative Returns (%)', 'Annual Return (%)', 'Annual Volatility (%)', 'Sharpe Ratio', 'Sortino Ratio', 'Max Drawdown (%)'],
               ['SMA']] = evall_df(SMA_df, 'SMA', ticker)
    eval_df.loc[['Cumulative Returns (%)', 'Annual Return (%)', 'Annual Volatility (%)', 'Sharpe Ratio', 'Sortino Ratio', 'Max Drawdown (%)'],
               ['EMA']] = evall_df(EMA_df, 'EMA', ticker) 
    eval_df.loc[['Cumulative Returns (%)', 'Annual Return (%)', 'Annual Volatility (%)', 'Sharpe Ratio', 'Sortino Ratio', 'Max Drawdown (%)'],
               ['BBD']] = evall_df(BBD_df, 'BBD', ticker)
    return eval_df

evalll_df(ticker)

Unnamed: 0_level_0,SMA,EMA,BBD,RSI,MACD
SPY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Annual Return (%),0.67,-87.71,3.62,,
Cumulative Returns (%),28.38,-8.3,-4.31,,
Annual Volatility (%),0.99,259.44,7.34,,
Sharpe Ratio,0.68,-0.34,0.49,,
Sortino Ratio,1.47,-0.34,1.73,,
Max Drawdown (%),-11.56,-10.46,-8.94,,


## Deep learning
---
### Signals df

In [829]:
# Combine signals
signals_df = pd.concat([SMA_df['signal'],
                        EMA_df['signal'],
                        BBD_df['signal'], 
                       ], axis=1)
signals_df.columns = ['SMA', 'EMA', 'BBD']
signals_df.index.name = f'{ticker}'
signals_df.dropna(inplace=True)

In [830]:
# Shift signals data by 1 in order to prime for predictions
signals_df = signals_df.shift(1)

In [831]:
# Set dependant variable
signals_df['daily return'] = ETF_df[f'{ticker}'].pct_change(short_window)
signals_df['positive return'] = np.where(signals_df['daily return'] > 0, 1.0, 0.0)
signals_df['negative return'] = np.where(signals_df['daily return'] < 0, -1.0, 0.0)
signals_df['ACTUAL'] = signals_df['positive return'] + signals_df['negative return']
signals_df.drop(columns=['positive return', 'negative return', 'daily return'], inplace=True)
signals_df.dropna(inplace=True)
signals_df.tail()

Unnamed: 0_level_0,SMA,EMA,BBD,ACTUAL
SPY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-12-24,1.0,1.0,0.0,1.0
2020-12-28,1.0,1.0,0.0,1.0
2020-12-29,1.0,1.0,-1.0,1.0
2020-12-30,1.0,1.0,-1.0,1.0
2020-12-31,1.0,1.0,-1.0,1.0


In [832]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn import preprocessing
from sklearn import utils

In [833]:
# Construct training start and end dates
#creating the train and test sets
X = signals_df.copy()
X.drop('ACTUAL', axis=1, inplace=True)
y = signals_df['ACTUAL'].values.reshape(-1,1)

In [834]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)
# Create the StandardScaler instance
scaler = StandardScaler()
# Fit the Standard Scaler with the training data
X_scaler = scaler.fit(X_train)
# Scale the training data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
# Create the random forest classifier instance
rf_model = RandomForestClassifier(n_estimators=500, random_state=78)
# Fit the model
rf_model = rf_model.fit(X_train_scaled, y_train)
# Making predictions using the testing data
predictions = rf_model.predict(X_test_scaled)
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, predictions)
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

  rf_model = rf_model.fit(X_train_scaled, y_train)


Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,157,77
Actual 1,40,392


Accuracy Score : 0.8243243243243243
Classification Report
              precision    recall  f1-score   support

        -1.0       0.80      0.67      0.73       234
         1.0       0.84      0.91      0.87       432

    accuracy                           0.82       666
   macro avg       0.82      0.79      0.80       666
weighted avg       0.82      0.82      0.82       666



In [835]:
# Get the feature importance array
importances = rf_model.feature_importances_
# List the top 10 most important features
importances_sorted = sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)
importances_sorted[:10]

[(0.9219788304741454, 'BBD'),
 (0.06982143218969893, 'SMA'),
 (0.008199737336155616, 'EMA')]

---
### Features df

In [836]:
# Combine signals
features_df = pd.concat([ETF_df[f'{ticker}'], 
                        SMA_df['SMA_long'], SMA_df['SMA_short'],
                        EMA_df['EMA_long'], EMA_df['EMA_short'],
                        BBD_df['BBD_mid'], BBD_df['BBD_std'], BBD_df['BBD_upper'], BBD_df['BBD_lower']
                       ], axis=1)

In [837]:
features_df.dropna(inplace=True)
features_df.tail()

Unnamed: 0,SPY,SMA_long,SMA_short,EMA_long,EMA_short,BBD_mid,BBD_std,BBD_upper,BBD_lower
2020-12-24,368.9899,347.260294,367.568975,324.284557,356.165678,367.568975,2.43963,370.008605,365.129345
2020-12-28,372.14,347.661094,367.993475,324.615119,356.709822,367.993475,2.460381,370.453856,365.533094
2020-12-29,371.48,348.032794,368.467475,324.938838,357.212948,368.467475,2.136852,370.604327,366.330623
2020-12-30,371.93,348.406594,368.765475,325.26343,357.714265,368.765475,2.185264,370.950739,366.580211
2020-12-31,373.85,348.789594,369.11998,325.599042,358.263908,369.11998,2.406666,371.526646,366.713314


In [838]:
# Construct training start and end dates
#creating the train and test sets
# X = features_df.copy()
# X.drop('BBD_upper', axis=1, inplace=True)
# y = features_df['BBD_upper'].values.reshape(-1,1)