In [8]:
from pprint import pprint

import pandas as pd

from aika import putki
from aika.putki import CalendarChecker
from aika.putki.context import Defaults, GraphContext
from aika.putki.graph import Graph, TaskModule
from aika.putki.runners import LocalRunner
from aika.putki.interface import Dependency
from aika.time.calendars import TimeOfDayCalendar
from aika.time.time_of_day import TimeOfDay
from aika.time.time_range import TimeRange#
from aika.time.timestamp import Timestamp
from aika.utilities.fin.macd import macd

from aika.datagraph.persistence.hash_backed import HashBackedPersistanceEngine
from aika.datagraph.persistence.mongo_backed import MongoBackedPersistanceEngine
from pandas_datareader import data
import typing as t
from pandas.tseries.offsets import BDay, CDay
import pymongo
#this is new code
import yfinance as yf
yf.pdr_override()

## Create your first function. 
This just uses pandas datareader project to pull some stock data from yahoo. 

In [13]:

tod = TimeOfDay.from_str("16:30:00 [America/New_York]")

def pull_google_finance_data(
    tickers : t.List,
    time_range,
):
    finance_data = map(lambda stock: data.get_data_yahoo(stock,start=time_range.start, end=time_range.end),tickers)
    df = pd.concat(finance_data)
    df.index.name = None
    df.index = df.index.map(tod.make_timestamp) # this ensures it has a timezone.

    return df["Adj Close"]

In [14]:
df = pull_google_finance_data(["AAPL", "GOOGL"], TimeRange("2018", "2020"))
df

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


2018-01-02 16:30:00-05:00    40.888069
2018-01-03 16:30:00-05:00    40.880936
2018-01-04 16:30:00-05:00    41.070835
2018-01-05 16:30:00-05:00    41.538445
2018-01-08 16:30:00-05:00    41.384151
                               ...    
2019-12-24 16:30:00-05:00    67.221497
2019-12-26 16:30:00-05:00    68.123497
2019-12-27 16:30:00-05:00    67.732002
2019-12-30 16:30:00-05:00    66.985497
2019-12-31 16:30:00-05:00    66.969498
Name: Adj Close, Length: 1006, dtype: float64

Note the missing days, hour holiday calendar.

In [15]:
holidays = pd.bdate_range(start="2018", end="2020").difference(df.index.date)
holidays

DatetimeIndex(['2018-01-01', '2018-01-15', '2018-02-19', '2018-03-30',
               '2018-05-28', '2018-07-04', '2018-09-03', '2018-11-22',
               '2018-12-05', '2018-12-25', '2019-01-01', '2019-01-21',
               '2019-02-18', '2019-04-19', '2019-05-27', '2019-07-04',
               '2019-09-02', '2019-11-28', '2019-12-25', '2020-01-01'],
              dtype='datetime64[ns]', freq=None)

In [16]:
context_ends_trading_day = GraphContext(
    defaults=Defaults(
        version="research", 
        persistence_engine=HashBackedPersistanceEngine(), 
        time_range= TimeRange("2018", "2020")
    )
)

context_ends_holiday = GraphContext(
    defaults=Defaults(
        version="research2", 
        persistence_engine=HashBackedPersistanceEngine(), 
        time_range= TimeRange("2018", "2019-12-26")
    )
)

In [17]:
close_prices_broken = context_ends_holiday.time_series_task(
    "close_prices",
    pull_google_finance_data,
    tickers=("AAPL", "GOOGL"),
    completion_checker=CalendarChecker(
        TimeOfDayCalendar(time_of_day=tod, freq=BDay())
    )
)
close_prices_broken.run()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


ValueError: The task appeared to run successfully and wrote its output, but according to its completion checker it is not complete.

Note the above error message, the task did write output, but as it did not know that it was a holiday it appears to be missing a day, it was expecting an entry on christmas day, but if we read it we see:

In [18]:
close_prices_broken.read().tail()

2019-12-18 16:30:00-05:00    67.595497
2019-12-19 16:30:00-05:00    67.821999
2019-12-20 16:30:00-05:00    67.560997
2019-12-23 16:30:00-05:00    67.531502
2019-12-24 16:30:00-05:00    67.221497
Name: Adj Close, dtype: float64

To fix this, we need only add a holiday calendar:

In [19]:
close_prices_fixed = context_ends_holiday.time_series_task(
    "close_prices_fixed",
    pull_google_finance_data,
    tickers=("AAPL", "GOOGL"),
    completion_checker=CalendarChecker(
        TimeOfDayCalendar(time_of_day=tod, freq=CDay(weekmask="1111100", holidays=holidays))
    )
)
close_prices_fixed.run()
close_prices_fixed.read().tail()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


2019-12-18 16:30:00-05:00    67.595497
2019-12-19 16:30:00-05:00    67.821999
2019-12-20 16:30:00-05:00    67.560997
2019-12-23 16:30:00-05:00    67.531502
2019-12-24 16:30:00-05:00    67.221497
Name: Adj Close, dtype: float64

Note that both of these tasks wrote the same data, just one correctly knows not to expect data on the day in question.