# The Ultimate Algo Trader (Neurobot 1.0)

## Introduction:
This jupyter notebook contains code for developing and testing a dynamic algorithmic trader based on Python and Machine learning strategy.

## Steps:
1. **Data preperation**: Access historical market data via Alpaca API and preprocess it for analysis.
2. **Strategy creation**: Code algorithmic trading strategy based on "TBD".
3. **Backtesting-Optimization**: Backtest the strategy using historical data and fine-tune parameters for better performance.
4. **Risk/Reward**: Calculate both the risk and the reward based on the entry price, position size, stop-loss and target price
5. **live Trading(Optional)**: Implement the strategy for paper trading on Alpaca.

**Tools and Libaries** 
- Python, Pandas, Numpy, "TBD"

### Imports and Dependencies

In [1]:
import os
import numpy as np
import random
import datetime
import seaborn as sns
import pandas as pd
import yfinance as yf
import hvplot.pandas
import matplotlib.pyplot as plt
import alpaca_trade_api as tradeapi
from dotenv import load_dotenv
from scipy.interpolate import interp1d
from alpaca_trade_api.rest import REST, TimeFrame
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import mean_squared_error
import pandas_ta as ta
from pandas.tseries.offsets import DateOffset
from sklearn.preprocessing import StandardScaler


# Initialize python files and import functions
import stock_data as data
import algo_strategy as strategy
import nn_models as model
import backtesting as backtest
import feature_list as feature
import utils as visual

import warnings
warnings.filterwarnings('ignore')


In [2]:
load_dotenv()

True

In [3]:
# Set API credentials
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")
base_url = 'https://paper-api.alpaca.markets'

# ticker list based on monthly highest dollar volume rank(liquidity) 
tickers_list = data.get_top_tickers(sp500_url = os.getenv("SP500_URL"))
tickers = [ticker[1] for ticker in tickers_list]

# Call the function to fetch stock data
stock_data = data.fetch_stock_data(tickers, alpaca_api_key, alpaca_secret_key, base_url)
stock_df = stock_data.dropna()

[*********************100%%**********************]  503 of 503 completed


In [4]:
print(tickers)

['ZTS', 'CRL', 'CHD', 'CHTR', 'CI', 'CLX', 'IPG', 'IP', 'COF', 'COR', 'CTLT', 'CDW', 'CTVA', 'INCY', 'CZR', 'DAY', 'DE', 'DFS', 'DIS', 'DLR', 'CE', 'CDNS', 'HST', 'CMS', 'AMGN', 'AMT', 'AMZN', 'APD', 'KDP', 'K', 'AVY', 'AZO', 'BALL', 'CBRE', 'BAX', 'BDX', 'JBL', 'BIO', 'BKNG', 'ITW', 'IRM', 'CAT', 'ICE', 'DOC', 'DOV', 'GPC', 'FRT', 'FSLR', 'FTNT', 'FTV', 'GEHC', 'GEN', 'GNRC', 'HIG', 'GPN', 'DPZ', 'HES', 'HD', 'HAS', 'HBAN', 'HII', 'HLT', 'HOLX', 'HRL', 'FOXA', 'HPE', 'HPQ', 'FI', 'DUK', 'ECL', 'EFX', 'EG', 'EL', 'HWM', 'EMR']


#### Feauture Engineering - Time series Analysis

In [5]:
daily_returns = feature.calculate_pct_change(stock_df)

In [6]:
daily_returns_lagged = feature.daily_returns_lagged(stock_df)

In [7]:
cumulative_returns = feature.cumulative_returns(stock_df)

In [8]:
twenty_sma = feature.simple_moving_averge(stock_df)

In [9]:
sma_ema_long_short = feature.sma_ema_long_short(stock_df)

In [10]:
don_chaian = feature.don_chaian(stock_df)

In [11]:
standard_deviation = feature.standard_deviation(stock_df)

In [12]:
Upper_lower_bollinger = feature.calculate_bollinger_bands(stock_df)

In [13]:
weighted_moving_average = feature.weighted_moving_average(stock_df)

In [14]:
atr = feature.atr(stock_df, n=14)

In [15]:
commodity_channel_index = feature.commodity_channel_index(stock_df)

### Data Processing and Cleaning

In [28]:
# Drop NAN values
stock_df.dropna(inplace=True)
stock_df


Unnamed: 0_level_0,symbol,open,high,low,close,volume,Daily Returns,Daily Returns Lagged,Cumulative Returns,SMA,...,EMA Fast,DCL,DCM,DCU,STD,Upper Bollinger Band,Lower Bollinger Band,WMA,ATR,CCI
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-12-09 05:00:00+00:00,AMGN,141.50,144.58,141.000,143.45,5013557,0.020343,0.016103,0.873683,144.9925,...,155.335153,84.77,126.730,168.69,2.055006,149.102513,140.882487,143.386941,3.145516,190064.350315
2016-12-12 05:00:00+00:00,AMGN,142.92,146.04,142.790,145.76,3951491,0.016103,0.017906,0.887752,144.8285,...,155.116549,84.77,126.730,168.69,1.833990,148.496480,141.160520,143.861553,3.159447,191900.373323
2016-12-13 05:00:00+00:00,AMGN,146.83,149.97,146.550,148.37,4651473,0.017906,0.002157,0.903648,144.9260,...,154.962988,84.77,126.730,168.69,1.969853,148.865706,140.986294,144.763242,3.299521,197626.015783
2016-12-14 05:00:00+00:00,AMGN,148.16,149.55,147.420,148.69,3436687,0.002157,0.011433,0.905597,145.0075,...,154.820627,84.77,126.680,168.59,2.092681,149.192862,140.822138,145.548594,3.143585,197879.474548
2016-12-15 05:00:00+00:00,AMGN,149.18,150.47,148.780,150.39,3298278,0.011433,-0.006782,0.915951,145.1655,...,154.720367,84.77,126.680,168.59,2.370204,149.905908,140.425092,146.516875,2.961774,199126.637899
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-02-22 05:00:00+00:00,ZTS,189.31,196.67,188.540,196.66,3339687,0.043954,0.002797,1.197759,190.9690,...,185.990248,182.07,191.995,201.92,4.384510,199.738019,182.199981,190.640260,4.820727,254866.892124
2024-02-23 05:00:00+00:00,ZTS,197.35,198.21,194.815,197.21,2391200,0.002797,-0.000862,1.201108,191.4635,...,186.212421,182.07,191.995,201.92,4.507297,200.478095,182.448905,191.954208,4.630630,261675.573923
2024-02-26 05:00:00+00:00,ZTS,196.90,197.99,195.460,197.04,2098518,-0.000862,0.009947,1.200073,191.7395,...,186.426829,182.07,191.995,201.92,4.676759,201.093019,182.385981,192.971366,4.350546,262145.577281
2024-02-27 05:00:00+00:00,ZTS,197.65,199.25,194.040,199.00,1984613,0.009947,0.004724,1.212010,192.0515,...,186.675803,182.07,191.995,201.92,4.948663,201.948826,182.154174,194.177093,4.465140,261053.875694


### Algorithim Buy/Sell  Best signal selection

In [None]:
# Initialize Signals 
signals = {
    "signal_one": strategy.simple_momentum(stock_data),
    "signal_two": strategy.dmac_strategy(stock_data),
    "signal_three": strategy.finta_strategy(stock_data)}

# Function for simple winning trading strategy 
def simple_winning(signals):
    best_strategy = max(signals, key=signals.get)
    winning_signal = signals[best_strategy]
    return winning_signal

# Function for strategy that combines signals using a majority vote
def majority_vote(signals):
    buy_signal = sum(1 for signal in signals.values() if signal == 1)    # "Buy"
    sell_signal = sum(1 for signal in signals.values() if signal == -1)  # "Sell"
    return 1 if buy_signal > sell_signal else -1 if sell_signal > buy_signal else 0  # "Buy", "Sell", or "Hold"

# Add signals to stock_df
stock_df['Majority_vote'] = majority_vote(signals)
stock_df['Simple_winning'] = simple_winning_(signals)

# Plot winning signal and hybrid


### PCA - dimension reduction

### Train Test Split (Dateoffset)

In [None]:
# Select the start of the training period
training_begin = X.index.min()

# Select the ending period for the training data with an offset of 5.6 years
training_end = X.index.min() + DateOffset(months=68)

# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]
X_train

# Generate the X_test and y_test DataFrames
test_begin = training_end + DateOffset(months=29)
X_test = X.loc[test_begin:]
y_test = y.loc[test_begin:]

X_test


### Standardize The Data

In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()
 
# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)
 
# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### Machine Learning Best Model Selection

In [None]:
# Initialize Models
models = {
    "model_one": model.lstm_model(),
    "model_two": model.lr_model(),
    "model_three": model.random_tree() }

def select_best_model(models, X_train, y_train):
    # Define initial best score and best model
    best_model_name = None
    best_model = None
    best_accuracy = 0.0
    best_mse = float('inf')
    all_scores = {}
    all_mses = {}
    
    # Define the scoring metrics you want to use
    scoring = ['accuracy', 'precision', 'recall', 'f1_score']
    
    # Train and evaluate each model using cross-validation
    for name, model in models.items():
       
        metric_scores = {}
        
        # Evaluate model using cross-validation for each scoring metric
        for metric in scoring:
            scores = cross_val_score(model, X_train, y_train, cv=3, scoring=metric)
            
            metric_scores[metric] = scores
            
        # Calculate the mean scores
        mean_scores = {metric: scores.mean() for metric, scores in metric_scores.items()}
        mse_scores = -cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
        mean_mse = mse_scores.mean()
        
        # Store the scores for the current model
        all_scores[name] = metric_scores
        all_mses[name] = mse_scores
        
        # Update best model if current model performs better based on accuracy or MSE
        if mean_mse < best_mse or (mean_mse == best_mse and mean_scores['precision'] > best_accuracy):
            best_accuracy = mean_scores['precision']
            best_mse = mean_mse
            best_model_name = name
            best_model = model
    
    return best_model_name, best_model, all_scores, all_mses

best_model_name, best_model, all_scores, all_mses = select_best_model(models, X_train, y_train)


### Backtesting - Trained Model against Test Model 

In [None]:


# Print the selected best model name
print("Selected Best Model:", best_model_name)

# Print the cross-validation scores for each model and each metric
for name, scores in all_scores.items():
    print("Model:", name)
    for metric, metric_scores in scores.items():
        print(f"Mean {metric.capitalize()} Score:", metric_scores.mean())

# Print the cross-validation MSE scores for each model
for name, mse_scores in all_mses.items():
    print("Model:", name)
    print("Cross-Validation MSE Scores:", mse_scores)

# Use the trained model to predict the trading signals for the testing data.
testing_signal_predictions = svm_model.predict(X_test_scaled)

# Evaluate the model's ability to predict the trading signal for the testing data
testing_report = classification_report(y_test, testing_signal_predictions)

In [None]:
# Display the report
print(testing_report)

# Visually Compare the Actual and Predicted Returns
# Create a predictions DataFrame
predictions_df = pd.DataFrame(index=X_test.index)

predictions_df["predicted_signal"] = testing_signal_predictions

predictions_df["actual_returns"] = trading_df["actual_returns"]

predictions_df["trading_algorithm_returns"] = (
    predictions_df["actual_returns"] * predictions_df["predicted_signal"]
)

# Review the DataFrame
predictions_df.head()

### Fundamental Analysis (Predictions and Plotting)

In [None]:
 Import required libraries
import pandas as pd
import numpy as np
import hvplot.pandas
from pathlib import Path

# Setting these options will allow for reviewing more of the DataFrames
pd.set_option("display.max_rows", 2000)
pd.set_option("display.max_columns", 2000)
pd.set_option("display.width", 1000)
# Read in CSV file in from the resources folder into a Pandas DataFrame
# Set the date as the DateTimeIndex
NVDA = pd.read_csv(
    Path("../Resources/META.csv"),
    parse_dates = True, 
    infer_datetime_format = True
)

# Review the DataFrame
META.head()
# Plot the DataFrame with hvplot["close"].hvplot()
META["Close"].hvplot
#COMPARE the SMA 50 plot line to an EMA50 plot line 
# Create a DataFrame with the index and Close column from the dataset
signals_df = META.loc[:, ["Close"]].copy()

# Set the short window and long windows
short_window = 50

# Create a short window SMA
signals_df["SMA50"] = signals_df["Close"].rolling(window=short_window).mean()

# Create a short window EMA
signals_df["EMA50"] = signals_df["Close"].ewm(span=short_window).mean()

# Review the DataFrame
signals_df.iloc[45:55, :]
#PLOT
# Visualize close price for the investment
security_close = signals_df[["Close"]].hvplot(
    line_color='Blue',
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize moving averages
moving_avgs = signals_df[["SMA50", "EMA50"]].hvplot(
    ylabel='Price in $',
    width=1000,
    height=400
)

# Overlay plots
sma_ema_plot = security_close * moving_avgs
sma_ema_plot

#CREATE A TRADING ALGO!!!!!!
# Create a DataFrame with the index and Close column from the dataset
ema_signals_df = META.loc[:, ["Close"]].copy()

# Set the short window and long windows
short_window = 50
long_window = 100

# Generate the short and long moving averages (50 and 100 days, respectively)
ema_signals_df["EMA50"] = signals_df["Close"].ewm(span=short_window).mean()
ema_signals_df["EMA100"] = signals_df["Close"].ewm(span=long_window).mean()

# Review the DataFrame
ema_signals_df.head()
# Set the Signal column
ema_signals_df["Signal"] = 0.0

# Generate the trading signal 1 or 0,
# where 1 is when the EMA50 is greater than (or crosses over) the EMA100
# where 0 is when the EMA50 is under the EMA100
ema_signals_df["Signal"][short_window:] = np.where(
    ema_signals_df["EMA50"][short_window:] > ema_signals_df["EMA100"][short_window:], 1.0, 0.0
)

# Calculate the points in time at which a position should be taken, 1 or -1
ema_signals_df["Entry/Exit"] = ema_signals_df["Signal"].diff()

# Review the DataFrame
ema_signals_df.head()
#PLOT!!!!!!
# Visualize entry position relative to close price
entry = ema_signals_df[ema_signals_df["Entry/Exit"] == 1.0]["Close"].hvplot.scatter(
    color='Blue',
    marker='^',
    size=200,
    legend=False,
    ylabel='Price in $',
    xlabel= 'date',
    width=1000,
    height=400
)

# Visualize exit position relative to close price
exit = ema_signals_df[ema_signals_df["Entry/Exit"] == -1.0]["Close"].hvplot.scatter(
    color='Green',
    marker='v',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize close price for the investment
security_close = ema_signals_df[["Close"]].hvplot(
    line_color='Blue',
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize moving averages
moving_avgs = ema_signals_df[["EMA50", "EMA100"]].hvplot(
    ylabel='Price in $',
    width=1000,
    height=400
)

# Overlay plots
entry_exit_plot = security_close * moving_avgs * entry * exit
entry_exit_plot

#INCORPORATE THE FINTA PYTHON LIBRARY into the DMAC trading ALGORITHM 
# Import the finta library
from finta import TA
# Read in CSV file in from the resources folder into a Pandas DataFrame
# Set the date as the DateTimeIndex
NVDA_df= pd.read_csv(
    Path("../Resources/META.csv"),
    
    parse_dates = True, 
    infer_datetime_format = True
)

# Review the DataFrame
META_df.head()
#Generate the EMA Technical Indicator using FinTA
# Set the short window and long windows
short_window = 50
long_window = 100

# Add the EMA technical indicators for the short and long windows
META_df["Short"] = TA.EMA(NVDA_df, short_window)
META_df["Long"] = TA.EMA(NVDA_df, long_window)

# Review the DataFrame
META.iloc[45:105, :]
# Set the Signal column
META_df["Signal"] = 0.0

# Generate the trading signal 1 or 0,
# where 1 is when the Short window is greater than (or crosses over) the Long Window
# where 0 is when the Short window is under the Long window
META_df["Signal"][short_window:] = np.where(
    META_df["Short"][short_window:] > NVDA_df["Long"][short_window:], 1.0, 0.0
)

# Calculate the points in time at which a position should be taken, 1 or -1
META_df["Entry/Exit"] = META_df["Signal"].diff()

# Review the DataFrame
META_df.iloc[95:105, :]
# Visualize entry position relative to close price
entry = META_df[META_df["Entry/Exit"] == 1.0]["Close"].hvplot.scatter(
    color="purple",
    marker="^",
    size=200,
    legend=False,
    ylabel="Price in $",
    width=1000,
    height=400
)

# Visualize exit position relative to close price
exit = NVDA_df[NVDA_df["Entry/Exit"] == -1.0]["Close"].hvplot.scatter(
    color="orange",
    marker="v",
    size=200,
    legend=False,
    ylabel="Price in $",
    width=1000,
    height=400
)

# Visualize close price for the investment
security_close = META_df[["Close"]].hvplot(
    line_color="Yellow",
    ylabel="Price in $",
    width=1000,
    height=400
)

# Visualize moving averages
moving_avgs = META_df[["Short", "Long"]].hvplot(
    ylabel="Price in $",
    width=1000,
    height=400
)

# Overlay plots
entry_exit_plot = security_close * moving_avgs * entry * exit
entry_exit_plot




 










In [None]:
stock_df[['Daily Returns Lagged', 'Daily Returns']].corr()

# maybe not to use lagged strategy 
# consider DMAC? 

In [None]:
stock_df['Cumulative Returns'].plot()

### Risk management and Rewards

#### Notes: 
- This notebook is for educational and experimental purposes only.