## Imports

In [None]:
%load_ext autoreload
%autoreload 2

import datetime
import logging

import pandas as pd
from pyarrow import parquet
import s3fs

import helpers.dbg as dbg
import helpers.env as env
import helpers.printing as prnt
import helpers.sql as sql

In [None]:
prnt.config_notebook()

# dbg.init_logger(verbosity=logging.DEBUG)
dbg.init_logger(verbosity=logging.INFO)
# dbg.test_logger()
_LOG = logging.getLogger(__name__)

In [None]:
def print_columns(df: pd.DataFrame) -> None:
    print("# Columns")
    print("num_cols=%s" % len(df.columns))
    print(", ".join(df.columns.tolist()))

## Analyze AAPL

In [None]:
display(df["start_time"])

In [None]:
set(df["start_time"].dt.date)

In [None]:
df_tmp = df["start_time"].dt
#df_tmp = df_tmp[(df_tmp >= "2021-07-18") & (df_tmp < "2021-07-19")]
print(df_tmp.date)
(df_tmp.date == pd.Timestamp("2021-07-16")).sum()
#df_tmp = df_tmp[(df_tmp.date == "2021-07-19")]
#df_tmp

In [None]:
df_tmp = df["start_time"]
#date = pd.Timestamp("2021-07-16").date()
date = pd.Timestamp("2021-07-15").date()
mask = (df_tmp.dt.date >= date) & (df_tmp.dt.date < (date + datetime.timedelta(days=1)))
print(mask.sum())
df_tmp = sorted(df_tmp[mask])
print(min(df_tmp), max(df_tmp))

In [None]:
ts = pd.Timestamp("2021-07-15 13:00:00")
#ts = pd.Timestamp("2021-07-15 20:38:00")
ts = ts.tz_localize("UTC")
ts.tz_convert("America/New_York")

## Real-time node

In [None]:
import time

In [None]:
import dataflow_amp.returns.pipeline as darp
import core.dataflow as cdataf
import core_lime.dataflow.nodes.sources as cldns
import core.config as cconfig

dag_builder = darp.ReturnsPipeline()
config = dag_builder.get_config_template()

# Add the source node.
source_config = cconfig.get_config_from_nested_dict(
    {
        "func": cldns.load_single_instrument_data,
        "func_kwargs": {
            "start_date": datetime.date(2010, 6, 29),
            "end_date": datetime.date(2010, 7, 13),
        },
    }
)
config["load_prices"] = source_config
config["resample_prices_to_1min", "func_kwargs", "volume_cols"] = ["volume"]
config["compute_vwap", "func_kwargs", "rule"] = "15T"
config["compute_vwap", "func_kwargs", "volume_col"] = "volume"
        
print(config)

In [None]:
#config = config.copy()
#dag_runner = cdataf.PredictionDagRunner(
#    config, config["meta"]["dag_builder"]
#)

In [None]:
nid = "compute_ret_0"
dag = dag_builder.get_dag(config)
dict_ = dag.run_leq_node(nid, "fit")

In [None]:
import helpers.printing as hprint
df = cldns.load_db_example_data()
print("end_time=[%s, %s]" % (min(df["end_time"]), max(df["end_time"])))
print(df.shape)


datetime_ = pd.Timestamp("2021-07-22 20:01:00-00:00")
print(datetime_)
df = cldns.get_db_data(datetime_)
print("end_time=[%s, %s]" % (min(df["end_time"]), max(df["end_time"])))
print(df.shape)

df.head()

In [None]:
df[["start_time", "end_time", "timestamp_db"]]

In [None]:
for now in cldns.get_now_time():
    print("now=", now)
    execute = cldns.is_dag_to_execute(now)
    if execute:
        print("Time to execute the DAG")
        # Get the data from the DB.
        df = cldns.get_db_data(now)
        print("end_time=[%s, %s]" % (min(df["end_time"]), max(df["end_time"])))
        display(df.head(3))
        print(df.shape)
        