In [None]:
import numpy as np
from typing import TypeVar, Callable, Tuple
from rl.markov_process import TransitionStep

S = TypeVar('S')

def TDC(steps: Iterator[TransitionStep[S]],
        features: Callable[S],
        alpha: float,
        beta:  float,
        gamma: float
       ) -> Iterator[Tuple[np.ndarray,np.ndarray]] :
    # initialize w and theta
    w = np.random.uniform(0,1,len(features))
    theta = np.random.uniform(0,1,len(features))
    
    # iterate and update
    for step in steps:
        x = [f(step.state) for f in features]
        x_ = [f(step.next_state) for f in features]
        delta = step.reward + gamma * w @ x_ - w @ x 
        # update w
        w -= alpha * delta * x + alpha * gamma * x_ * (theta @ x)
        # update theta
        theta -= beta * (delta - theta @ x) * x
        yield (w,theta)
    
    