# This is an example of building a macd signal

In [None]:
from pprint import pprint

import pandas as pd

from aika import putki
from aika.putki import CalendarChecker
from aika.putki.context import Defaults, GraphContext
from aika.putki.graph import Graph, TaskModule
from aika.putki.runners import LocalRunner
from aika.putki.interface import Dependency
from aika.time.calendars import TimeOfDayCalendar
from aika.time.time_of_day import TimeOfDay
from aika.time.time_range import TimeRange#
from aika.time.timestamp import Timestamp
from aika.utilities.fin.macd import macd

from aika.datagraph.persistence.hash_backed import HashBackedPersistanceEngine
from aika.datagraph.persistence.mongo_backed import MongoBackedPersistanceEngine
from pandas_datareader import data
import typing as t
from pandas.tseries.offsets import BDay
import pymongo

## Set up

### Create an engine
We support two kinds of engine at the momemnt, one purely in memory backed by a hash map, and one that stores the data permenantly in a mongodb. You can use either here.

In [None]:
engine = HashBackedPersistanceEngine()
# engine = MongoBackedPersistanceEngine(
#     pymongo.MongoClient(),
#     database_name="research_foo3"
# )


### Create a context
A context is the user interface for creating tasks. It mainly just functinos as a place holder to fill in information that is common to all or nearly all tasks. In this case, the code version, the storage engine, and the time_range

In [None]:
context = GraphContext(
    defaults=Defaults(
        version="research", 
        persistence_engine=engine, 
        time_range= TimeRange("2018", "2020")
    )
)

## Create your first function. 
This just uses pandas datareader project to pull some stock data from yahoo. 

In [None]:
def pull_google_finance_data(
    tickers : t.List,
    time_range,
):
    df = data.DataReader(list(tickers), "yahoo", start=time_range.start, end=time_range.end)
    df.index.name = None
    df.index = df.index.map(Timestamp) # this ensures it has a timezone.
    return df["Adj Close"]

In [None]:
pull_google_finance_data(["AAPL", "GOOGL"], TimeRange("2018", "2020"))

Now instead we can create a task to do that. The task needs a name, the function to run, and the parameters, finally, because this is a "source" node of the graph, we must specify a completion checker. A completion checker specified the expected index for the data, in this case, we are saying that we expect it to have a value every buisiness day at midnight.

In [None]:
close_prices = context.time_series_task(
    "close_prices",
    pull_google_finance_data,
    tickers=("AAPL", "GOOGL"),
    completion_checker=CalendarChecker(
        TimeOfDayCalendar(time_of_day=TimeOfDay.from_str("00:00 [UTC]"))
    ),
)

Before we run the task it will evaluate as "not complete", and after we run it it will evaluate as "complete". Further, we pull the data from the engine and display it with the read command.

In [None]:
display(close_prices.complete())
close_prices.run()
display(close_prices.complete())
close_prices.read()

## Lets do some macd using the library functions

Macd calculatsions are index preserving, as are most time series operations, so here we do not need to specify a completion checkier, it is inferred from its parent task, in this case close prices. Note as well here that we can store two different datasets in the same "node" of the data graph, all that is required is that their parameters are different.

In [None]:
macd_one = context.time_series_task(
    "macd",
    macd,
    prices=close_prices,
    fast_span=10,
    slow_span=20,
    vol_span=30
)

macd_two = context.time_series_task(
    "macd",
    macd,
    prices=close_prices,
    fast_span=20,
    slow_span=40,
    vol_span=60
)
macd_one.completion_checker

In [None]:
display(macd_one.complete())
macd_one.run()
macd_one.read()

In [None]:
display(macd_two.complete())
macd_two.run()
display(macd_two.complete())
macd_two.read()

## Branching Engines

Sometimes we will have one engine that already contains the data that we need, and want to run some experiments that run in a different engine.

In [None]:
engine_two = HashBackedPersistanceEngine()
macd_three = context.time_series_task(
    "macd",
    macd,
    prices=close_prices,
    fast_span=20,
    slow_span=40,
    vol_span=60,
    persistence_engine=engine_two
)

In [None]:
macd_three.run()
display(macd_three.complete())

Now the new data set is in the engine below.

In [None]:
display(engine.exists(macd_three.output))
display(engine_two.exists(macd_three.output))

In [None]:
def describe(data):
    return data.describe()

In [None]:
describe_two = context.static_task(
    "macd.describe",
    describe,
    data=macd_two,
)
describe_two.run()
describe_two.read()

In [None]:
describe_three = context.static_task(
    "macd.describe",
    describe,
    data=macd_three,
)
describe_three.run()