# The Ultimate Algo Trader (Neurobot 1.0)

## Introduction:
This jupyter notebook contains code for developing and testing a dynamic algorithmic trader based on Python and Machine learning strategy.

## Steps:
1. **Data preperation**: Access historical market data via Alpaca API and preprocess it for analysis.
2. **Strategy creation**: Code algorithmic trading strategy based on "TBD".
3. **Backtesting-Optimization**: Backtest the strategy using historical data and fine-tune parameters for better performance.
4. **Risk/Reward**: Calculate both the risk and the reward based on the entry price, position size, stop-loss and target price
5. **live Trading(Optional)**: Implement the strategy for paper trading on Alpaca.

**Tools and Libaries** 
- Python, Pandas, Numpy, "TBD"

#### Notes: 
- This notebook is for educational and experimental purposes only.

### Imports and Dependencies

In [1]:
import os
import numpy as np
import random
import datetime
import seaborn as sns
import pandas as pd
import yfinance as yf
import hvplot.pandas
import matplotlib.pyplot as plt
import alpaca_trade_api as tradeapi
from dotenv import load_dotenv
from scipy.interpolate import interp1d
from alpaca_trade_api.rest import REST, TimeFrame
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import mean_squared_error


# Initialize python files and import functions
import stock_data as data
import algo_strategy as strategy
import nn_models as model
import backtesting as backtest

import warnings
warnings.filterwarnings('ignore')


In [2]:
load_dotenv()

True

### Data Processing and collection

In [3]:
# Fetch list of tickers
# Get select top picks based on monthly highest performing stocks in sp500 
#ticker_list = data.get_clusters_from_sp500(sp500_url = os.getenv("SP500_URL"))

# fetch_stock_data based on top picks from ticker_list
stock_data = data.fetch_stock_data('2018-01-12', '2024-03-14', tickers= ['NVDA'], timeframe='1Day')

In [4]:
# data cleaning and organization
stock_df = stock_data #.rename(columns={'NVDA': 'NVDA close'})
stock_df['Daily Returns'] = stock_df['NVDA']['close'].pct_change()
stock_df = stock_df.dropna()
stock_df

Unnamed: 0_level_0,NVDA,NVDA,NVDA,NVDA,Daily Returns
Unnamed: 0_level_1,open,close,high,volume,Unnamed: 5_level_1
timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2018-01-16,224.050,220.11,227.51,17986928,-0.011892
2018-01-17,220.700,224.72,225.12,13942387,0.020944
2018-01-18,223.940,224.44,226.64,10756754,-0.001246
2018-01-19,228.090,230.11,231.09,15935244,0.025263
2018-01-22,230.430,233.69,233.76,14260753,0.015558
...,...,...,...,...,...
2024-03-08,951.379,875.28,974.00,113705642,-0.055477
2024-03-11,864.290,857.74,887.97,67836412,-0.020039
2024-03-12,880.490,919.13,919.60,66807515,0.071572
2024-03-13,910.550,908.88,915.04,63571289,-0.011152


#### Feauture Engineering - Time series Analysis

In [5]:
stock_df['Cumulative Returns'] = (1 + stock_df['Daily Returns']).cumprod()

In [6]:
def df_to_windowed_df(stock_df, first_date_str, last_date_str, n=3):
    first_date = pd.to_datetime(first_date_str)
    last_date = pd.to_datetime(last_date_str)
    target_date = first_date
    dates = []
    X, Y = [], []
    last_time = False
    while True:
        df_subset = stock_df.loc[:target_date].tail(n + 1)
        if len(df_subset) != n + 1:
            print(f'Error: Window of size {n} is too large for date {target_date}')
            return
        values = df_subset['NVDA']['close'].to_numpy()  # Adjusted to access close price
        x, y = values[:-1], values[-1]
        dates.append(target_date)
        X.append(x)
        Y.append(y)
        next_week = stock_df.loc[target_date:target_date + pd.Timedelta(days=7)]
        next_datetime_str = str(next_week.head(2).tail(1).index.values[0])
        next_date_str = next_datetime_str.split('T')[0]
        year_month_day = next_date_str.split('-')
        year, month, day = map(int, year_month_day)
        next_date = datetime.datetime(day=day, month=month, year=year)
        if last_time:
            break
        target_date = next_date
        if target_date == last_date:
            last_time = True
    ret_df = pd.DataFrame({})
    ret_df['Target Date'] = dates
    X = np.array(X)
    for i in range(0, n):
        ret_df[f'Target-{n - i}'] = X[:, i]
    ret_df['Target'] = Y
    return ret_df

# Example usage
# Assuming 'stock_df' is your DataFrame containing stock data with a 'close' column for NVDA
# Start day second time around: '2021-03-25'
windowed_df = df_to_windowed_df(stock_df,
                                '2019-01-12',
                                '2024-03-14',
                                n=3)
print(windowed_df)

     Target Date  Target-3  Target-2  Target-1  Target
0     2019-01-12    139.83    142.58    145.23  148.83
1     2019-01-15    145.23    148.83    150.44  149.87
2     2019-01-16    148.83    150.44    149.87  148.84
3     2019-01-17    150.44    149.87    148.84  151.72
4     2019-01-18    149.87    148.84    151.72  156.93
...          ...       ...       ...       ...     ...
1296  2024-03-08    859.64    887.00    926.69  875.28
1297  2024-03-11    887.00    926.69    875.28  857.74
1298  2024-03-12    926.69    875.28    857.74  919.13
1299  2024-03-13    875.28    857.74    919.13  908.88
1300  2024-03-14    857.74    919.13    908.88  879.44

[1301 rows x 5 columns]


In [7]:
stock_df['Daily Returns Lagged'] = stock_df['Daily Returns'].shift(-1)
stock_df

Unnamed: 0_level_0,NVDA,NVDA,NVDA,NVDA,Daily Returns,Cumulative Returns,Daily Returns Lagged
Unnamed: 0_level_1,open,close,high,volume,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2018-01-16,224.050,220.11,227.51,17986928,-0.011892,0.988108,0.020944
2018-01-17,220.700,224.72,225.12,13942387,0.020944,1.008803,-0.001246
2018-01-18,223.940,224.44,226.64,10756754,-0.001246,1.007546,0.025263
2018-01-19,228.090,230.11,231.09,15935244,0.025263,1.033000,0.015558
2018-01-22,230.430,233.69,233.76,14260753,0.015558,1.049071,0.022337
...,...,...,...,...,...,...,...
2024-03-08,951.379,875.28,974.00,113705642,-0.055477,3.929269,-0.020039
2024-03-11,864.290,857.74,887.97,67836412,-0.020039,3.850529,0.071572
2024-03-12,880.490,919.13,919.60,66807515,0.071572,4.126118,-0.011152
2024-03-13,910.550,908.88,915.04,63571289,-0.011152,4.080105,-0.032392


In [None]:
# set window sizes based on strategy
short_window = 5
long_window = 100

stock_df['SMA_Fast'] = stock_df['NVDA']['close'].rolling(window=short_window).mean()
stock_df['SMA_Slow'] = stock_df['NVDA']['close'].rolling(window=long_window).mean()

stock_df = stock_df.dropna()
stock_df

# later on -> trial different training windows using DateOffset()

### Algorithim Buy/Sell  Best signal selection

In [None]:
# Initialize Signals 
signals = {
    "signal_one": strategy.strategy_one(stock_data),
    "signal_two": strategy.strategy_two(stock_data),
    "signal_three": strategy.strategy_three(stock_data),
    "signal_four": strategy.strategy_four(stock_data),
    "signal_five": strategy.strategy_five(stock_data) }

# Function for simple winning trading strategy 
def simple_winning(signals):
    best_strategy = max(signals, key=signals.get)
    winning_signal = signals[best_strategy]
    return winning_signal

# Function for strategy that combines signals using a majority vote
def majority_vote(signals):
    buy_signal = sum(1 for signal in signals.values() if signal == 1)    # "Buy"
    sell_signal = sum(1 for signal in signals.values() if signal == -1)  # "Sell"
    return 1 if buy_signal > sell_signal else -1 if sell_signal > buy_signal else 0  # "Buy", "Sell", or "Hold"

# Add signals to stock_df
stock_df['Majority_vote'] = majority_vote(signals)
stock_df['Simple_winning'] = simple_winning_(signals)

# Plot winning signal and hybrid


### Machine Learning Best Model Selection

In [None]:
# Initialize Models
models = {
    "model_one": model.model_one(),
    "model_two": model.model_two(),
    "model_three": model.model_three(),
    "model_four": model.model_four(),
    "model_five": model.model_five() }

def select_best_model(models, X_train, y_train):
    # Define initial best score and best model
    best_model_name = None
    best_model = None
    best_accuracy = 0.0
    best_mse = float('inf')
    all_scores = {}
    all_mses = {}
    
    # Define the scoring metrics you want to use
    scoring = ['accuracy', 'precision', 'recall', 'f1_score']
    
    # Train and evaluate each model using cross-validation
    for name, model in models.items():
       
        metric_scores = {}
        
        # Evaluate model using cross-validation for each scoring metric
        for metric in scoring:
            scores = cross_val_score(model, X_train, y_train, cv=5, scoring=metric)
            
            metric_scores[metric] = scores
            
        # Calculate the mean scores
        mean_scores = {metric: scores.mean() for metric, scores in metric_scores.items()}
        mse_scores = -cross_val_score(model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')
        mean_mse = mse_scores.mean()
        
        # Store the scores for the current model
        all_scores[name] = metric_scores
        all_mses[name] = mse_scores
        
        # Update best model if current model performs better based on accuracy or MSE
        if mean_mse < best_mse or (mean_mse == best_mse and mean_scores['accuracy'] > best_accuracy):
            best_accuracy = mean_scores['accuracy']
            best_mse = mean_mse
            best_model_name = name
            best_model = model
    
    return best_model_name, best_model, all_scores, all_mses

best_model_name, best_model, all_scores, all_mses = select_best_model(models, X_train, y_train)


### Backtesting - Best Model and Best Strategy

### Fundamental Analysis (Predictions and Plotting)

In [None]:
stock_df[['Daily Returns Lagged', 'Daily Returns']].corr()

# maybe not to use lagged strategy 
# consider DMAC? 

In [None]:
stock_df['Cumulative Returns'].plot()

In [None]:
# Print the selected best model name
print("Selected Best Model:", best_model_name)

# Print the cross-validation scores for each model and each metric
for name, scores in all_scores.items():
    print("Model:", name)
    for metric, metric_scores in scores.items():
        print(f"Mean {metric.capitalize()} Score:", metric_scores.mean())

# Print the cross-validation MSE scores for each model
for name, mse_scores in all_mses.items():
    print("Model:", name)
    print("Cross-Validation MSE Scores:", mse_scores)

### Risk management and Rewards

### Logic to place trade (Optional)

In [None]:
# From Algo Trading 3 live 

# Submit order
api.submit_order(
    symbol="META", 
    qty=number_of_shares, 
    side=orderSide, 
    time_in_force="gtc", 
    type="limit", 
    limit_price=limit_amount
