In [629]:
import numpy as np
import plotly.graph_objects as go

import matplotlib.pyplot as plt
from functools import reduce
from math import ceil

class Node:
    #I desire that Model.X is a list
    def __init__(self, id, model, parent = None, Q = {}, cost = {},t=0):
        #time
        self.t =t
        self.id = id
        self.parent = parent
        self.Q = Q
        self.cost = cost
        self.w = {}
        self.children = []
        self.policy = {}
        self.terminal = True
        self.model = model
        self.name = str(id)
        self.one_step = None
        #kernel calculator must be associated to the one step, takes in same input as one_step. COuld honestly combine the two into one function with the a tuple output.
        self.one_step_kernel_calculator = None
        #kernel is of the form {state:[{state:prob}]
        self.kernel = {}
        self.new_beg = False
    
    def add_child(self, node):
        self.children.append(node)
        node.parent = self
        self.terminal = False
    
    def reset_w(self):
        self.w={}
    #for value iteration
    def change_w(self,new_w,gamma):
        for key in new_w.keys():
            self.w[key]=new_w[key]*(gamma**self.t) 
    #consider a node to be its own descendant
    def hasDescendant(self,nodes):
        if self in nodes:
            return True
        for child in self.children:
            if (child.hasDescendant(nodes)):
                return True
        return False
    def get_w(self):
        if self.w:
            return self.w
        if self.terminal:
            for x in self.model.X:
                self.w[x] = self.cost[x]
            return self.w
        self.calc_policy()
        return self.w    

    def calc_policy(self):
        """
            Calculate the optimal policy and value for the maximal subtree rooted here
        """
        if self.terminal:
            raise Error("calc_policy ran on terminal nodes!")
        def net_one_step(x, u):
            res = self.cost[u][x] + self.one_step([(child.Q[u][x], child.get_w()) for child in self.children])
            return res
        self.policy = {x: min(self.model.U, key=lambda u: net_one_step(x, u)) for x in self.model.X}
        self.w = {x: net_one_step(x, self.policy[x]) for x in self.model.X}
    def calc_kernel(self):
        for x in self.model.X:
            self.kernel[x] = self.one_step_kernel_calculator([(child.Q[self.policy[x]][x], child.get_w()) for child in self.children])
        
    
    #will output of the form x:([c_1,c_2,...,c_n,deductor). This is to help build the system of equations
    def get_system(self,gamma):
        if self.new_beg:
            returner = {}
            for i in range(0,len(self.model.X)):
                hold = [0 for y in range(0,len(self.model.X)+1)]
                hold[i]=gamma**self.t
                returner[self.model.X[i]]=hold

            return returner
        elif self.terminal:
            returner = {}
            for x in self.model.X:
                hold = [0 for y in self.model.X]
                hold.append(self.cost[x])
                returner[x] = hold
            return returner
                    
                    

        output = {}
        for self_state in self.model.X:
            coefficients = listzeros = [0 for i in range(0,len(self.model.X)+1)]
            for child_index in range(0,len(self.children)):
                for childState in self.kernel[self_state][child_index].keys():
                    state_prob = self.kernel[self_state][child_index][childState]
                    
                    for i in range(0,len(self.model.X)+1):
                       
                       x=self.children[child_index].get_system(gamma)[childState]
                    
                       coefficients[i] = coefficients[i] + ((self.children[child_index].get_system(gamma)[childState])[i]*state_prob)
            coefficients[len(self.model.X)] = coefficients[len(self.model.X)]+self.cost[self.policy[self_state]][self_state]
            output[self_state] = coefficients
        return output           
            
            


    
    def print_tree(self, level = 0):
        print(" " * TAB_SIZE * level + self.name)
        for child in self.children:
            child.print_tree(level+1)


In [630]:
TAB_SIZE = 4
EPS = 1e-9

In [631]:
#changed to also return the kernel
def AVAR(q, w, alpha):
    
    eval = [(w[k], q[k])  for k in q.keys()]
    res = 0
    a = alpha
    for wk, qk in sorted(eval, reverse=True):
        if np.isclose(alpha, 0, atol = EPS):
            break
        if alpha >= qk:
            res += wk*qk
            alpha -= qk
        else:
            res += wk*alpha
            alpha = 0
    return res/a

def AVARKernel(q,w,alpha):
    eval = [(w[k], q[k],k)  for k in q.keys()]
    
    a = alpha
    kernel = {}
    for wk, qk,k in sorted(eval, reverse=True):
        if np.isclose(alpha, 0, atol = EPS):
            break
        if alpha >= qk:
            kernel[k]=qk/a
            alpha -= qk
        else:
            
            kernel[k] = alpha/a
            alpha = 0
            
    return kernel

def add_qw(qw1, qw2):
    # bad error for now
    Q_sum = {}
    w_sum = {}
    for x1, q1 in qw1[0].items():
        for x2, q2 in qw2[0].items():
            qs = q1*q2
            temp = qw1[1][x1] + qw2[1][x2]
            if isinstance(x1, int) or isinstance(x1, np.int32):
                x1 = (x1,)
            if isinstance(x2, int) or isinstance(x2, np.int32):
                x2 = (x2,)
            xs = x1 + x2
            Q_sum[xs] = qs 
            w_sum[xs] = temp
    return (Q_sum, w_sum)

def AVAR_of_sum(list_qw, alpha):
    return AVAR(*reduce(add_qw, list_qw), alpha)
#This kernel is lsightly more complicated, since need to marginalize, so not doing now


def sum_of_AVAR(list_qw, alpha):
    return sum([AVAR(*qw, alpha) for qw in list_qw])
def sum_of_AVAR_kernel(list_qw,alpha):
    return [AVARKernel(*qw, alpha) for qw in list_qw]



In [632]:
class Model:
    def __init__(self, lo, hi, U, alpha):
        """
            State space X = [lo, hi] of interval size = 1
            Action space U
            VaR calculation alpha
            Assume that 0 is root node
        """
        self.X = range(lo, hi + 1)
        self.lo = lo
        self.hi = hi
        self.U = U
        self.alpha = alpha
        self.nodes = [Node(0, self)]
        self.root = self.nodes[0]
        self.construct_graph()
        self.construct_risks()
         
    def construct_graph(self):
        raise NotImplementedError("construct_graph has not been properly implemented!")
    
    def construct_risks(self):
        raise NotImplementedError("construct_risks has not been properly implemented!")
        

    
    
    def bound(self, q):
        q_res = {x : 0 for x in self.X}
        for k, qk in q.items():
            q_res[max(self.lo, min(k, self.hi))] += qk

        return q_res
    
    def draw_edge(self, parent_i, child_i):
        if max(parent_i, child_i) >= len(self.nodes):
            # add nodes appropriately
            self.nodes += [Node(i, self) for i in range(len(self.nodes), max(parent_i, child_i) + 1)]
        self.nodes[parent_i].add_child(self.nodes[child_i])





In [633]:
def valueIteration(root, need_to_reset, begs, gamma,count):
    
    
    for i in range(0,count):
        
        old_w = root.get_w()
        
        for node in need_to_reset:
            node.reset_w()
        for node in begs:
            node.change_w(old_w,gamma)
    return root.get_w()

In [634]:


#need costs to discounted in the model be gamma updated.
#implements newton method in the finite case
def policyIteration(root, model, need_to_reset,begs,gamma,max_count):
    
    new_w = {}
    old_policy={}
    
    new_policy=root.calc_policy()
    
    count = -1
    while new_policy!= old_policy:
        count = count+ 1
        old_policy=new_policy
        
        
        
        root.calc_kernel()
        pre_sys = root.get_system(gamma)
        LHS=[]
        RHS=[]
        for i in range(0,len(model.X)):
            state=model.X[i]
            LHS_state = [x for x in pre_sys[state][0:len(model.X)]]
            LHS_state[i] = -1+LHS_state[i]
            RHS_state = -pre_sys[state][len(model.X)]
            LHS.append(LHS_state)
            RHS.append(RHS_state)
        
        np_RHS = np.array(RHS)
        
        np_LHS = np.array(LHS)

        #solution = np.linalg.inv(np_LHS).dot(np_RHS)
        solution = np.linalg.solve(np_LHS,np_RHS)
        for i in range(0,len(model.X)):
            new_w[model.X[i]] = solution[i]
        if count>max_count:
            print("sad")
            new_w
        for node in need_to_reset:
            node.reset_w()
        for node in begs:
            node.change_w(new_w,gamma)
        new_policy=root.calc_policy()
    return new_w

        
        

    

In [635]:


class infinite_RDModel(Model):
    def __init__ (self, lo, hi, U, alpha, investment_cost = 1,gamma=.9):
        """
            q0(x, u), q1(x, u)
            c0(x, u), c1(x)
        """
        self.gamma = gamma
        self.n = 2
        self.investment_cost = investment_cost
        self.new_begs=[]
        self.non_new_begs = []
        super().__init__(lo, hi, U, alpha)

    def construct_graph(self):
       
        """
            customize graph structure here
        """
        for i in range(1):
            self.draw_edge(2*i, 2*i + 1)
            self.draw_edge(2*i, 2*i + 2)
        #self.draw_edge(2*1, 2*1 + 1)
        self.new_begs=[self.nodes[2]]
        self.non_new_begs = [x for x in self.nodes if x not in self.new_begs]
        self.need_to_reset =  [x for x in self.non_new_begs if x.hasDescendant(self.new_begs)]

    
    def construct_risks(self):
        """
            set Q, c, and one_step for each node
        """
        def q0(x, u, t):
            if u == 0:
                return {x-2: 0.2, x-1: 0.2, x: 0.2, x+1: 0.2, x+2: 0.2}
            # u == 1
            return {x+1: 0.4, x+2: 0.2, x+3: 0.4}

        def q1(x, u, t):
            if u == 0:
                return {x-1: 0.6, x: 0.2, x+1: 0.2}
            # u == 1
            return {x-1: 0.2, x: 0.4, x+1: 0.4}

        def c0(x, u, t):
            if u == 0:
                return 0
            return self.investment_cost

        def c1(x, u, t):
            # x = state
            # a = action of this node
            return np.exp(-x/20)*(self.gamma**t)
        
        #added this here
        self.nodes[2].new_beg = True



        for i in range(self.n):
            self.nodes[i].t = ceil(float(self.nodes[i].id)/2)
        for node in self.nodes:
            node.t = ceil(float(node.id)/2)
            if node.terminal:
                node.cost = {x : c1(x, None, node.t) for x in self.X}
                node.Q = {u : {x : self.bound(q1(x, u, node.t)) for x in self.X} for u in self.U}
            else:
                node.cost = {u : {x : c0(x, u, node.t) for x in self.X} for u in self.U}
                node.Q = {u : {x : self.bound(q0(x, u, node.t)) for x in self.X} for u in self.U}
            #!customize one step here
            node.one_step = lambda list_qw : sum_of_AVAR(list_qw, self.alpha)
            node.one_step_kernel_calculator = lambda list_qw : sum_of_AVAR_kernel(list_qw, self.alpha)
        
    # customized functions for this particular model
    def policy_change(self, policy):
        res = self.lo
        for k, v in policy.items():
            if v == 1:
                res = k
        return res
    
    def modelValueIteration(self,count):
        return valueIteration(self.root,self.need_to_reset, self.new_begs,self.gamma,count)
    
    def modelPolicyIteration(self,max_count):
        return policyIteration(self.root, self, self.need_to_reset,self.new_begs,self.gamma,max_count)

In [636]:
myModel = infinite_RDModel(-20,20,[0, 1], .1)
#myModel.root.print_tree()
#myModel.modelValueIteration(100)
#print(myModel.nodes[2].get_system(.9))
pol_val= myModel.modelPolicyIteration(10)
print(pol_val)



{-20: 24.464536456131366, -19: 24.46453645613137, -18: 24.345221503902494, -17: 24.124342153553503, -16: 23.817590104931543, -15: 23.43881793008161, -14: 23.000236152024854, -13: 22.5125899735458, -12: 21.985317722303726, -11: 21.426692870424812, -10: 20.843951299462987, -9: 20.243405313146805, -8: 19.630545748773276, -7: 19.010133401773093, -6: 18.386280855329716, -5: 17.762525696615146, -4: 17.141896001972597, -3: 16.52696888411949, -2: 15.919922814162998, -1: 15.322584359014781, 0: 14.736469909851726, 1: 14.162822918866555, 2: 13.602647109030542, 3: 13.056736074359852, 4: 12.52569964570642, 5: 12.009987358905962, 6: 11.50990932777963, 7: 11.025654793615212, 8: 10.557308595000533, 9: 10.104865776932554, 10: 9.668244535698895, 11: 9.247297675870374, 12: 8.841822737625776, 13: 8.451570936347823, 14: 8.076255041797955, 15: 7.715556311030428, 16: 7.369130577394299, 17: 7.036613587360368, 18: 6.717625667378186, 19: 6.411775794406906, 20: 6.118665136075267}


In [637]:
val_val = myModel.modelValueIteration(100)
print(val_val)

{-20: 24.464536456131384, -19: 24.464536456131384, -18: 24.345221503902504, -17: 24.12434215355351, -16: 23.817590104931547, -15: 23.438817930081612, -14: 23.000236152024858, -13: 22.512589973545804, -12: 21.985317722303733, -11: 21.42669287042482, -10: 20.843951299462994, -9: 20.243405313146813, -8: 19.630545748773283, -7: 19.010133401773096, -6: 18.38628085532972, -5: 17.76252569661515, -4: 17.141896001972604, -3: 16.526968884119498, -2: 15.919922814163003, -1: 15.322584359014787, 0: 14.73646990985173, 1: 14.162822918866556, 2: 13.602647109030544, 3: 13.056736074359852, 4: 12.525699645706418, 5: 12.00998735890596, 6: 11.50990932777963, 7: 11.025654793615212, 8: 10.557308595000533, 9: 10.104865776932554, 10: 9.668244535698895, 11: 9.247297675870374, 12: 8.841822737625776, 13: 8.451570936347823, 14: 8.076255041797955, 15: 7.715556311030428, 16: 7.369130577394299, 17: 7.036613587360368, 18: 6.717625667378186, 19: 6.411775794406906, 20: 6.118665136075266}
