In [1]:
#--------------------------------------------------------------------------
# Imports
#--------------------------------------------------------------------------

import os
import datetime

from windowgenerator import WindowGenerator
from baselinemodel import Baseline

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from sqlalchemy import create_engine, sql

In [2]:
# matplotlib settings
mpl.rcParams['figure.figsize'] = (5, 3)
mpl.rcParams['axes.grid'] = False

In [3]:
#--------------------------------------------------------------------------
# Database connection setup
#--------------------------------------------------------------------------

# get postgres environment variables
PG_HOST = os.getenv('PG_HOST')
PG_PORT = os.getenv('PG_PORT')
PG_DB_NAME = os.getenv('PG_DB_NAME')
PG_USERNAME = os.getenv('PG_USERNAME')
PG_PASSWORD = os.getenv('PG_PASSWORD')

# check for missing environment variables
if PG_HOST == None or PG_PORT == None or PG_DB_NAME == None or PG_USERNAME == None or PG_PASSWORD == None:
    print('[ ERROR ] Environment variables PG_HOST, PG_PORT, PG_DB_NAME, PG_USERNAME, or PG_PASSWORD not found.')

# hard-code table name
PG_TABLE = 'bars_minute_eastern'

# connect to db and open a cursor to perform database operations
conn_string = "postgresql://{}:{}@{}:{}/{}".format(PG_USERNAME, PG_PASSWORD, PG_HOST, PG_PORT, PG_DB_NAME)
db = create_engine(conn_string)
conn = db.connect()

In [4]:
#------------------------------------------------------------------------------
# Pull data from db (minute bars for NVDA from 09:00-)
#------------------------------------------------------------------------------

# create empty dataframe to hold day data
days_df = pd.DataFrame(columns=['date', 'minute_bars'])

# NOTE: db contains minute bars for NVDA,INTC from 2022/06/01 to 2022/07/01 (inclusive)
start_date = datetime.date(2022, 6, 1)
end_date = datetime.date(2022, 7, 1)

# loop over each date and get minute_bars from db
current_date = start_date
delta = datetime.timedelta(days=1)
while current_date <= end_date:
    # pull minute_bars for current_date
    day_minute_bars_df = pd.read_sql_query(sql=sql.text(f'select * from { PG_TABLE } where SYMBOL=\'NVDA\' and date(TIMESTAMP)=\'{ current_date }\''), con=conn)
    
    # add minute_bars to days_df if there is bar data
    if not day_minute_bars_df.empty:
      days_df.loc[len(days_df), days_df.columns] = current_date, day_minute_bars_df

    # go to next day
    current_date += delta

days_df.head()


Unnamed: 0,date,minute_bars
0,2022-06-01,timestamp symbol open c...
1,2022-06-02,timestamp symbol open clo...
2,2022-06-03,timestamp symbol open c...
3,2022-06-06,timestamp symbol open c...
4,2022-06-07,timestamp symbol open c...
