In [50]:
# use alpaca as data api 
import os 
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt 
from io import StringIO

import QuantLib as ql 

import plotly.graph_objects as go

import redis

import sys 
sys.path.append('..')

from src.kata_alpaca_engine.engine_utilities import (
acquire_credentials,
download_data)

from src.kata_alpaca_engine.ingestion_engine import RedisTableUtility, Redis

In [51]:
SECRETS_PATH = "./src/.secrets"
START_DATE = "2016-01-01"

In [52]:
df = download_data(SECRETS_PATH, START_DATE)

In [53]:
connection =  redis.Redis(host='red', port=6379)

In [54]:
connection

Redis<ConnectionPool<Connection<host=red,port=6379,db=0>>>

In [55]:
red_table = RedisTableUtility(connection)


In [56]:
red_table.get_tags('_NVDA')

['earnings:2018-08-16/present', 'ex-dividend:2016/present']

In [57]:
stock_splits = red_table.get('_NVDA_STOCK_SPLIT', '2024-07-26')
stock_splits

Unnamed: 0,dates,split
0,2024-06-10,10.0
1,2021-07-20,4.0
2,2007-09-11,1.5
3,2006-04-07,2.0


In [58]:
# stock_splits = red_table.get('_NVDA_STOCK_SPLIT', '2024-07-26')

df = download_data(SECRETS_PATH, START_DATE)
def adjust_price_on_stock_split(df):
    """
    
    """

    for i in range(stock_splits.shape[0]):
        split_index = stock_splits.iloc[i]
        date, split = split_index.dates, split_index.split
        date = datetime.strptime(date,  '%Y-%m-%d')
        stock_split_check = df['timestamp'].dt.date < date.date()
        df.loc[stock_split_check, 'price'] = df.loc[stock_split_check, 'price'] / split

adjust_price_on_stock_split(df)

fig = go.Figure([go.Scatter(x=df['timestamp'], 
                            y=df['price'], mode='markers')])

fig.show()

red_table.set('_NVDA_HISTORICAL', '2024-07-29', df)

True

In [59]:
df = red_table.get('_NVDA_HISTORICAL', '2024-07-29')

In [60]:
from typing import List, Tuple, Callable, Union
import numpy as np 
import pandas as pd

from datetime import datetime 
from src.kata_models.GaussianProcessModel import RegressionModel

class BackTestData():
    """
    class to handle back testing of data. 
    
    """
    data_full: Tuple[np.ndarray, np.ndarray]
    data: List[Tuple[np.ndarray, np.ndarray]]
    prediction_data: List[np.ndarray]
    prediction_data_price: List[np.ndarray]
    model_fits: List[RegressionModel]
    calendar = ql.UnitedStates(ql.UnitedStates.NYSE)
    df: pd.DataFrame

    def __init__(self, 
                 df: pd.DataFrame):
        """
        
        """

        self.data_full = df['timestamp'].to_numpy(),df['price'].to_numpy() 
        self.model_fits = []
        self.data = [] 
        self.df = df
        self.prediction_data_price = [] 
        self.prediction_data = [] 

        self.prediction_fit = [] 
        self.prediction_fit_sigma = []
        
    @property
    def dates_full(self):
        return pd.to_datetime(self.data_full[0]).to_pydatetime()

    
    @property
    def prices_full(self):
        return self.data_full[1]
    
    @staticmethod
    def convert_ql_to_datetime(date: datetime) -> ql.Date: 
        return datetime(date.year(), date.month(), date.dayOfMonth())

    @staticmethod
    def convert_datetime_to_ql(date: datetime) -> ql.Date:
        return ql.Date(date.day, date.month, date.year)

    def create_projections(self, start_date: ql.Date, end_date: ql.Date) -> List:
        # Define today's date and the end date
        
        # Generate business days
        business_days = []
        current_date = start_date
        
        while current_date <= end_date:
            if self.calendar.isBusinessDay(current_date):
                business_days.append(current_date)
            current_date = self.calendar.advance(current_date, ql.Period(1, ql.Days))
        
        # Convert QuantLib Dates to datetime objects
        business_days_datetime = [datetime(current_date.year(), current_date.month(), current_date.dayOfMonth()) for current_date in business_days]
        
        projected_datetimes = []
        # Print the datetime objects
        for dt in business_days_datetime:
            projected_datetimes.append(dt)

        return projected_datetimes

    def create_backtest_data(self, _from: datetime, _to: datetime, predict_to: ql.Date, store_prices=False):
        """

        """
        # get subset of data 

        dates = self.dates_full        
        prices = self.prices_full
        
        date_projected = self.create_projections(start_date=self.convert_datetime_to_ql(_to), 
                                end_date=predict_to)
        
        date_projected = np.array(date_projected)

        subset_key = (_from <= dates) & (dates <= _to)
        
        self.data.append((np.array(dates[subset_key]), prices[subset_key]))
        
        self.prediction_data.append(date_projected)
        
        if store_prices: 
            date_set =  set([dt.date() for dt in date_projected])
            matching_prices = self.df[self.df['timestamp'].dt.date.isin(date_set)]['price']
            self.prediction_data_price.append(matching_prices.values)
            
                
    def fit_model(self, dataset_index: Union[int,str], model_function: Callable): 
        
        dates, prices = self.data[dataset_index]
        dates_array = np.array(list(map(lambda x: float(x.days), np.array(dates - dates[0]))))

        model_base = model_function(dates_array, prices) 
        model_base.fit()
        
        self.model_fits.append(model_base)
    
    def predict(self, dataset_index: Union[int,str]):

        dates = self.prediction_data[dataset_index]
        anchor = self.data[dataset_index][0][0]
        dates = np.array(list(map(lambda x: float(x.days), np.array(np.array(dates) - anchor))))

        dates, yfit, ysigma = self.model_fits[dataset_index].predict(Xt=dates)
        return yfit, ysigma

    def training_bands(self, dataset_index: Union[int,str], band_factor=1.96):

        dates, _ = self.data[dataset_index]
        dates_array = np.array(list(map(lambda x: float(x.days), np.array(dates - dates[0]))))
        dates, yfit, ysigma = self.model_fits[dataset_index].predict(Xt=dates_array)
        yfit, lower, upper = self.create_bands(yfit, ysigma, band_factor=band_factor)
        return  yfit, lower, upper

    def create_bands(self, yfit, ysigma, band_factor = 1.96):
        lower_bound = yfit - ysigma * band_factor
        upper_bound = yfit + ysigma * band_factor
        return yfit, lower_bound, upper_bound
    

In [61]:
btest = BackTestData(df)

In [62]:
btest.create_backtest_data(
    datetime(year=2022, month=1, day=1),
    datetime(year=2022, month=6, day=1), 
    ql.Date(6,6,2022),store_prices=True
)

In [63]:
btest.fit_model(0, lambda x,y: RegressionModel(x,y))


In [41]:
yfit, ysigma = btest.predict(0) 
test_yfit , lower, upper = btest.create_bands(yfit, ysigma)
training_f, lower_f, upper_f = btest.training_bands(0)


In [42]:

prediction_markers = go.Scatter(x=btest.prediction_data[0], 
                            y=btest.prediction_data_price[0], 
                            mode='markers',
                            marker=dict(color='red'),
                            name='test_data')

training_markers = go.Scatter(x=btest.data[0][0], 
                              y = btest.data[0][1],
                            mode='markers',
                            marker=dict(color='darkgreen'),
                            name='training_data')

xt = btest.prediction_data[0]
bands = [
go.Scatter(x=xt, y=test_yfit, line=dict(color='black', width=2), name='test_fit'),
go.Scatter(x=xt, y=upper, line=dict(color='black', width=2), name='Upper Band'),
go.Scatter(x=xt, y=lower, line=dict(color='black', width=2), name='Lower Band'),
go.Scatter(x=xt, y=upper, fill=None, mode='lines', line=dict(color='rgba(0,100,80,0.2)'), showlegend=False),
go.Scatter(x=xt, y=lower, fill='tonexty', mode='lines', line=dict(color='rgba(0,100,80,0.2)'), showlegend=False),
]

x = btest.data[0][0]
training_fit_bands = [
go.Scatter(x=x, y=training_f, name='training_fit'),
go.Scatter(x=x, y=upper_f, line=dict(color='black', width=2), name='Upper Band'),
go.Scatter(x=x, y=lower_f, line=dict(color='black', width=2), name='Lower Band'),
go.Scatter(x=x, y=upper_f, fill=None, mode='lines', line=dict(color='rgba(0,100,80,0.2)'), showlegend=False),
go.Scatter(x=x, y=lower_f, fill='tonexty', mode='lines', line=dict(color='rgba(0,100,80,0.2)'), showlegend=False),
] 


fig = go.Figure([prediction_markers, training_markers, *bands, *training_fit_bands])

fig.show()

In [64]:
dataset_index = -1
btest.create_backtest_data(
    datetime(year=2024, month=1, day=1),
    datetime(year=2024, month=7, day=31), 
    ql.Date(2,8,2024),store_prices=True
)
btest.fit_model(dataset_index, lambda x,y: RegressionModel(x,y))

yfit, ysigma = btest.predict(dataset_index) 
test_yfit , lower, upper = btest.create_bands(yfit, ysigma)
training_f, lower_f, upper_f = btest.training_bands(dataset_index)



prediction_markers = go.Scatter(x=btest.prediction_data[dataset_index], 
                            y=btest.prediction_data_price[dataset_index], 
                            mode='markers',
                            marker=dict(color='red'),
                            name='test_data')

training_markers = go.Scatter(x=btest.data[dataset_index][0], 
                              y = btest.data[dataset_index][1],
                            mode='markers',
                            marker=dict(color='darkgreen'),
                            name='training_data')

xt = btest.prediction_data[dataset_index]
bands = [
go.Scatter(x=xt, y=test_yfit, line=dict(color='black', width=2), name='test_fit'),
go.Scatter(x=xt, y=upper, line=dict(color='black', width=2), name='Upper Band'),
go.Scatter(x=xt, y=lower, line=dict(color='black', width=2), name='Lower Band'),
go.Scatter(x=xt, y=upper, fill=None, mode='lines', line=dict(color='rgba(0,100,80,0.2)'), showlegend=False),
go.Scatter(x=xt, y=lower, fill='tonexty', mode='lines', line=dict(color='rgba(0,100,80,0.2)'), showlegend=False),
]

x = btest.data[dataset_index][0]
training_fit_bands = [
go.Scatter(x=x, y=training_f, name='training_fit'),
go.Scatter(x=x, y=upper_f, line=dict(color='black', width=2), name='Upper Band'),
go.Scatter(x=x, y=lower_f, line=dict(color='black', width=2), name='Lower Band'),
go.Scatter(x=x, y=upper_f, fill=None, mode='lines', line=dict(color='rgba(0,100,80,0.2)'), showlegend=False),
go.Scatter(x=x, y=lower_f, fill='tonexty', mode='lines', line=dict(color='rgba(0,100,80,0.2)'), showlegend=False),
] 


fig = go.Figure([prediction_markers, training_markers, *bands, *training_fit_bands])

fig.show()



The optimal value found for dimension 0 of parameter k1__k1__k2__k1__constant_value is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.


The optimal value found for dimension 0 of parameter k1__k1__k2__k2__sigma_0 is close to the specified lower bound 1e-05. Decreasing the bound and calling fit again may find a better value.



In [44]:
# dump historical earnings report 

In [32]:
import pandas as pd

# Creating the DataFrame
data = {
    "Report Date": [
        "Aug 28, 2024", "May 22, 2024", "Feb 21, 2024", "Nov 21, 2023", 
        "Aug 23, 2023", "May 24, 2023", "Feb 22, 2023", "Nov 16, 2022", 
        "Aug 24, 2022", "May 25, 2022", "Feb 16, 2022", "Nov 17, 2021", 
        "Aug 18, 2021", "May 26, 2021", "Feb 24, 2021", "Nov 18, 2020", 
        "Aug 19, 2020", "May 21, 2020", "Feb 13, 2020", "Nov 14, 2019", 
        "Aug 15, 2019", "May 16, 2019", "Feb 14, 2019", "Nov 15, 2018", 
        "Aug 16, 2018"
    ],
    "Fiscal Quarter": [
        "2025 (Q2)", "2025 (Q1)", "2024 (Q4)", "2024 (Q3)", 
        "2024 (Q2)", "2024 (Q1)", "2023 (Q4)", "2023 (Q3)", 
        "2023 (Q2)", "2023 (Q1)", "2022 (Q4)", "2022 (Q3)", 
        "2022 (Q2)", "2022 (Q1)", "2021 (Q4)", "2021 (Q3)", 
        "2021 (Q2)", "2021 (Q1)", "2020 (Q4)", "2020 (Q3)", 
        "2020 (Q2)", "2020 (Q1)", "2019 (Q4)", "2019 (Q3)", 
        "2019 (Q2)"
    ],
    "Forecast / EPS": [
        "0.64 / -", "0.56 / 0.61", "0.46 / 0.52", "0.34 / 0.40", 
        "0.21 / 0.27", "0.09 / 0.11", "0.08 / 0.09", "0.07 / 0.06", 
        "0.05 / 0.05", "0.13 / 0.14", "0.12 / 0.13", "0.11 / 0.12", 
        "0.10 / 0.10", "0.08 / 0.09", "0.07 / 0.08", "0.06 / 0.07", 
        "0.05 / 0.05", "- / 0.45", "- / 0.47", "- / 0.45", 
        "- / 0.31", "- / 0.22", "- / 0.23", "- / 0.49", 
        "- / 0.44"
    ],
    "Last Year's EPS": [
        0.27, 0.109, 0.088, 0.058, 
        0.051, 0.136, 0.132, 0.117, 
        0.104, 0.092, 0.078, 0.073, 
        0.054, 0.048, 0.047, 0.044, 
        0.031, 0.22, 0.23, 0.492, 
        0.44, 0.495, 0.392, 0.332, 
        0.23
    ],
    "EPS YoY Change": [
        "—", "461.47% (+0.50)", "486.36% (+0.43)", "593.10% (+0.34)", 
        "429.41% (+0.22)", "-19.85% (-0.03)", "-33.33% (-0.04)", "-50.43% (-0.06)", 
        "-50.96% (-0.05)", "47.83% (+0.04)", "69.23% (+0.05)", "60.27% (+0.04)", 
        "92.59% (+0.05)", "74.19% (+0.03)", "65.96% (+0.03)", "65.91% (+0.03)", 
        "74.19% (+0.02)", "104.55% (+0.23)", "105.22% (+0.24)", "-9.55% (-0.05)", 
        "-29.55% (-0.13)", "-55.56% (-0.28)", "-41.33% (-0.16)", "48.19% (+0.16)", 
        "91.30% (+0.21)"
    ]
}

# Convert to DataFrame
nvda_earnings_report = pd.DataFrame(data)



In [10]:
nvda_earnings_report
earnings_datetime = nvda_earnings_report['Report Date'].apply(lambda x: datetime.strptime(x, "%b %d, %Y"))

In [11]:
timestamp = earnings_datetime.apply(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'))
nvda_earnings_report['timestamp'] = timestamp

In [14]:
report = nvda_earnings_report[['timestamp', 'Fiscal Quarter', 'Forecast / EPS', 'EPS YoY Change']]

In [15]:
red_table.set('_NVDA', 'earnings:2018-08-16/present', report)

True

In [46]:
dataset_index = -1

from_ = datetime(year=2024, month=1, day=1)
to_ =  datetime(year=2024, month=6, day=10)
project_ = ql.Date(15,6,2024)

btest.create_backtest_data(
    from_,
    to_, 
    project_,store_prices=True
)
btest.fit_model(dataset_index, lambda x,y: RegressionModel(x,y))

yfit, ysigma = btest.predict(dataset_index) 
test_yfit , lower, upper = btest.create_bands(yfit, ysigma)
training_f, lower_f, upper_f = btest.training_bands(dataset_index)



prediction_markers = go.Scatter(x=btest.prediction_data[dataset_index], 
                            y=btest.prediction_data_price[dataset_index], 
                            mode='markers',
                            marker=dict(color='red'),
                            name='test_data')

training_markers = go.Scatter(x=btest.data[dataset_index][0], 
                              y = btest.data[dataset_index][1],
                            mode='markers',
                            marker=dict(color='darkgreen'),
                            name='training_data')

xt = btest.prediction_data[dataset_index]
bands = [
go.Scatter(x=xt, y=test_yfit, line=dict(color='black', width=2), name='test_fit'),
go.Scatter(x=xt, y=upper, line=dict(color='black', width=2), name='Upper Band'),
go.Scatter(x=xt, y=lower, line=dict(color='black', width=2), name='Lower Band'),
go.Scatter(x=xt, y=upper, fill=None, mode='lines', line=dict(color='rgba(0,100,80,0.2)'), showlegend=False),
go.Scatter(x=xt, y=lower, fill='tonexty', mode='lines', line=dict(color='rgba(0,100,80,0.2)'), showlegend=False),
]

x = btest.data[dataset_index][0]
training_fit_bands = [
go.Scatter(x=x, y=training_f, name='training_fit'),
go.Scatter(x=x, y=upper_f, line=dict(color='black', width=2), name='Upper Band'),
go.Scatter(x=x, y=lower_f, line=dict(color='black', width=2), name='Lower Band'),
go.Scatter(x=x, y=upper_f, fill=None, mode='lines', line=dict(color='rgba(0,100,80,0.2)'), showlegend=False),
go.Scatter(x=x, y=lower_f, fill='tonexty', mode='lines', line=dict(color='rgba(0,100,80,0.2)'), showlegend=False),
] 


fig = go.Figure([prediction_markers, training_markers, *bands, *training_fit_bands])


ex_dividend_dates = red_table.get('_NVDA', 'ex-dividend:2016/present')['timestamp'].tolist()
earnings_dates = red_table.get('_NVDA', 'earnings:2018-08-16/present')['timestamp']

for date in ex_dividend_dates: 
  if (date <  to_ and date > from_): 
    fig.add_vline(x=date, line_width=3, line_dash="dash", line_color="grey", name='ex-dividends')

for date in earnings_dates: 
  if (date <  to_ and date > from_): 
    fig.add_vline(x=date, line_width=3, line_dash="dash", line_color="black", name='earnings')



fig.show()


In [9]:
from datetime import datetime

# Data for NVDA dividend dates and amounts since 2016
data = {
    "Ex-Dividend Date": [
        "Jun 11, 2024", "Mar 5, 2024", "Dec 5, 2023", "Sep 6, 2023",
        "Jun 7, 2023", "Mar 7, 2023", "Nov 30, 2022", "Sep 7, 2022",
        "Jun 8, 2022", "Mar 2, 2022", "Dec 1, 2021", "Aug 31, 2021",
        "Jun 9, 2021", "Mar 9, 2021", "Dec 3, 2020", "Sep 1, 2020",
        "Jun 4, 2020", "Feb 27, 2020", "Nov 27, 2019", "Aug 28, 2019",
        "May 30, 2019", "Feb 28, 2019", "Nov 29, 2018", "Aug 29, 2018",
        "May 23, 2018", "Feb 22, 2018", "Nov 22, 2017", "Aug 22, 2017",
        "May 19, 2017", "Feb 22, 2017", "Nov 23, 2016", "Aug 23, 2016",
        "May 24, 2016", "Feb 29, 2016", "Nov 18, 2015", "Aug 18, 2015",
        "May 19, 2015", "Feb 24, 2015", "Nov 19, 2014", "Aug 19, 2014",
        "May 22, 2014", "Feb 20, 2014", "Nov 19, 2013", "Aug 20, 2013",
        "May 21, 2013", "Feb 21, 2013", "Nov 19, 2012"
    ],
    "Amount (USD)": [
        0.01, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04,
        0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04,
        0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.04, 0.038,
        0.038, 0.038, 0.038, 0.035, 0.035, 0.035, 0.035, 0.029,
        0.029, 0.029, 0.029, 0.024, 0.024, 0.021, 0.021, 0.021,
        0.021, 0.021, 0.021, 0.015, 0.015, 0.015, 0.015
    ]
}

# Convert dates to '%Y-%m-%d %H:%M:%S' format
formatted_dates = [datetime.strptime(date, "%b %d, %Y").strftime("%Y-%m-%d %H:%M:%S") for date in data["Ex-Dividend Date"]]

# Update the data dictionary with formatted dates
data["Ex-Dividend Date"] = formatted_dates

df = pd.DataFrame(data)

df = df.rename(columns={'Ex-Dividend Date': 'timestamp', 
                   'Amount (USD)':'price'})

In [11]:
red_table.set('_NVDA', 'ex-dividend:2016/present', df)

True

In [22]:
df

Unnamed: 0,timestamp,price
0,2016-01-04 05:00:00,32.370
1,2016-01-05 05:00:00,32.890
2,2016-01-06 05:00:00,31.530
3,2016-01-07 05:00:00,30.280
4,2016-01-08 05:00:00,29.630
...,...,...
2151,2024-07-23 04:00:00,122.590
2152,2024-07-24 04:00:00,114.250
2153,2024-07-25 04:00:00,112.280
2154,2024-07-26 04:00:00,113.060
