In [1]:
import pandas as pd
import redis
import pickle
from pandas.tseries.offsets import CustomBusinessDay
from pandas.tseries.holiday import USFederalHolidayCalendar    # used to create a business day defined on the US federal holiday calendar that can be added or subtracted to a datetime
from tqdm import tqdm
import datetime

from google.cloud import bigquery

In [2]:
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'creds.json'

In [None]:
BUSINESS_DAY = CustomBusinessDay(calendar=USFederalHolidayCalendar())    # used to skip over holidays when adding or subtracting business days

In [None]:
PROJECT_ID = 'eng-reactor-287421'
DATASET_NAME = 'yield_curves_v2'

In [3]:
def sqltodf(sql,limit = ""):
    bq_client = bigquery.Client()
    if limit != "": 
        limit = f"LIMIT {limit}"
    bqr = bq_client.query(sql + limit).result()
    return bqr.to_dataframe()

In [4]:
query_nelson = f'''SELECT *  FROM `{PROJECT_ID}.{DATASET_NAME}.nelson_siegel_coef_minute` where date >= '2022-08-08' and date <= '2022-08-09' order by date desc'''
query_scalar = f'''SELECT *  FROM `{PROJECT_ID}.{DATASET_NAME}.standardscaler_parameters_daily` order by date desc'''
query_shape = f'''SELECT * FROM `{PROJECT_ID}.{DATASET_NAME}.shape_parameters` order by date desc'''

In [5]:
nelson = sqltodf(query_nelson)
nelson.date = pd.to_datetime(nelson.date)
nelson.set_index('date',inplace=True,drop=True)

In [6]:
scalar = sqltodf(query_scalar)
scalar.date = pd.to_datetime(scalar.date)
scalar.set_index('date',inplace=True,drop=True)
scalar.drop_duplicates(inplace=True)

In [7]:
shape_parameter = sqltodf(query_shape)
shape_parameter.drop_duplicates(inplace=True)
shape_parameter.Date = pd.to_datetime(shape_parameter.Date)
shape_parameter.set_index('Date',inplace=True,drop=True)

In [8]:
nelson

Unnamed: 0_level_0,const,exponential,laguerre
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-08-08 15:59:00,226.211444,-71.887424,-22.013795
2022-08-08 15:58:00,226.227451,-72.145872,-22.356486
2022-08-08 15:57:00,226.360614,-72.699996,-23.184311
2022-08-08 15:56:00,226.607705,-71.469778,-21.919394
2022-08-08 15:55:00,226.373342,-72.697008,-23.157554
...,...,...,...
2022-08-08 09:34:00,223.360622,-92.484317,-43.829926
2022-08-08 09:33:00,227.805140,-70.457915,-21.188488
2022-08-08 09:32:00,227.783545,-70.557751,-21.331671
2022-08-08 09:31:00,227.872530,-70.734984,-21.882909


In [9]:
for index,row in tqdm(nelson.iterrows(), total=nelson.shape[0]):
    temp_nelson_df = row.to_frame().T
    string_date = index.strftime('%Y-%m-%d:%H:%M')
    
    day_before = index - (BUSINESS_DAY * 1)
    if day_before.date() == datetime.datetime(2022,9,5).date() or day_before.date() == datetime.datetime(2022,7,4).date() or day_before.date() == datetime.datetime(2022,6,20).date() or day_before.date() == datetime.datetime(2022,4,15).date() or day_before.date() == datetime.datetime(2022,2,21).date() or day_before.date() == datetime.datetime(2021,12,24).date() or day_before.date() == datetime.datetime(2021,11,29).date() or day_before.date() == datetime.datetime(2021,11,25).date() or day_before.date() == datetime.datetime(2022,4,15).date() or day_before.date() == datetime.datetime(2022,2,21).date() or day_before.date() == datetime.datetime(2021,12,24).date() or day_before.date() == datetime.datetime(2021,11,29).date() or day_before.date() == datetime.datetime(2021,11,11).date() or day_before.date() == datetime.datetime(2021,10,11).date() or day_before.date() == datetime.datetime(2021,9,6).date() :
        day_before = day_before - (BUSINESS_DAY * 1)

    day_before = day_before.strftime('%Y-%m-%d')
    temp_scalar = scalar.loc[day_before]
    if type(temp_scalar) == pd.Series:
        temp_scalar = temp_scalar.to_frame().T
    if len(temp_scalar) > 1:
        temp_scalar = temp_scalar[:1]
    temp_shape_param = shape_parameter.loc[day_before].values[0][0] 
    
    temp_dict = {'nelson_values':temp_nelson_df, 'scalar_values': temp_scalar, 'shape_parameter':temp_shape_param}
    redis_client = redis.Redis(host='10.227.69.60', port=6379, db=0)
    value = pickle.dumps(temp_dict,protocol=pickle.HIGHEST_PROTOCOL)
    redis_client.set(string_date, value)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 379/379 [00:29<00:00, 12.95it/s]


In [3]:
redis_client = redis.Redis(host='10.227.69.60', port=6379, db=0)

In [4]:
from datetime import datetime
test_date = datetime(2022,9,7,13,30)

In [7]:
pickle.loads(redis_client.get("2022-09-09:02:08"))

{'nelson_values':                           const  exponential   laguerre
 date                                                   
 2022-09-09 02:08:00  323.671243   -67.004532 -19.899704,
 'scalar_values':             exponential_mean  exponential_std  laguerre_mean  laguerre_std
 date                                                                      
 2022-09-08           0.46864         0.214801       0.237851       0.04782,
 'shape_parameter': 5.0}

In [56]:
t = pickle.loads(redis_client.get("2022-09-08:12:50"))

In [57]:
t['nelson_values']

Unnamed: 0_level_0,const,exponential,laguerre
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-09-08 12:50:00,326.416137,-65.823655,-18.798284


In [53]:
t['nelson_values'].values

array([[297.46221138, -58.18540721, -17.43028047]])

In [24]:
type(scalar.loc['2022-08-31']) == pd.DataFrame

True

In [None]:
# Checked the entire Redis. You should not run into any more values error. 
# You can use this code in the future to check the redis if you want.
while start_date < datetime(2022,9,12):
    if start_date.time() > time(15,59):
        start_date = start_date + timedelta(days=1)
        start_date = start_date.replace(hour=9, minute=30)
        
    if redis_client.exists(start_date.strftime('%Y-%m-%d:%H:%M')):
        temp_data = pickle.loads(redis_client.get(start_date.strftime('%Y-%m-%d:%H:%M')))
        temp_data = temp_data['nelson_values']
        try:
            if temp_data.values.shape[1] > 3:
                print(f"{start_date} {temp_data}")
        except Exception as e:
            print(start_date)
            raise e

    start_date = start_date + timedelta(minutes=1)