In [1]:
import numpy as np
import plotly.graph_objects as go

class Node:
    def __init__(self, t, id, model, parent = None, Q = None, cost = None):
        self.t = t # Time
        self.id = id
        self.parent = parent
        self.Q = Q
        self.cost = cost
        self.w = dict() # cost
        self.childs = []
        self.policy = dict() # One for each of the posible states 
        self.terminal = True # If it is a terminal node
        self.model = model
        

    def new_child(self, node):
        self.childs.append(node)# Add the new child
        node.parent = self
        self.terminal = False 

    
    def get_w(self):
        if self.w.__len__() == 0:
            if self.terminal:
                for x in self.model.X: 
                    self.w[x] = self.cost(x, 1, self.t) # Cost of terminal nodes in this case
                    # This w is w_{t, s}
            else:
                self.calc_policy()
        
        return self.w

    def calc_policy(self):
        self.policy = {x: min(self.model.U, key=lambda u: self.cost(x, u, self.t) + sum([self.model.AVAR(self.Q(x, u), child.get_w(), self.model.alpha) for child in self.childs])) for x in self.model.X}
        self.w = {x: (self.cost(x, self.policy[x], self.t) + sum([self.model.AVAR(self.Q(x, self.policy[x]), child.get_w(), self.model.alpha) for child in self.childs])) for x in self.model.X}
        #print(f'id: {self.id}, u: {self.policy}')

    def print_tree(self, level = 0):
        print(" " * 4 * level + f'{self.id}, x_{self.t, self.id % 2}')
        for child in self.childs:
            child.print_tree(level = level+1)
            

In [2]:
# A class of the model is created for be able to in a easy way change some variables as T
# or the range of the states

class model1:
    def __init__(self, down, up, T, U = [1, 0], alpha = 0.3, cost_v = 10):
        # State space
        self.X = np.arange(down, up+1)
        self.down = down
        self.up = up
        self.Q_0, self.Q_1 = self.precal_Q(self.X)
        self.T = T
        self.U = U
        self.alpha = alpha
        self.nodes, self.ad_list = self.tree_def()
        self.cost_v = cost_v
        self.root = self.create_tree(self.nodes, self.ad_list)
        self.root.get_w()
        self.change = self.change_s()

    def AVAR(self, q, w, alpha):
        # we recive w = w_{t+1}

        # this is something like {w(1): 20%, w(): 60%}
        eval = [(w[max(self.down, min(k, self.up))], q[k])  for k in q.keys()]
        s = sorted(eval, reverse=True)
        res = 0
        i = 0
        a = alpha
        while(alpha > 0):
            if alpha >= s[i][1]:
                res += s[i][0]*s[i][1]
                alpha -= s[i][1]
            else:
                res += s[i][0]*alpha
                alpha = 0
            i += 1
        return res/a
    
    def precal_Q(self, X):
        Q0 = dict()
        Q1 = dict()
        for x in self.X:
            Q0[x] = dict()
            Q0[x][0] = {x-2: 0.2, x-1: 0.2, x: 0.2, x+1: 0.2, x+2: 0.2}
            Q0[x][1] = {x+1: 0.4, x+2: 0.2, x+3: 0.4}
            
            Q1[x] = dict()
            Q1[x][0] = {x-1: 0.6, x: 0.2, x+1: 0.2}
            Q1[x][1] = {x-1: 0.2, x: 0.4, x+1: 0.4}
        return Q0, Q1


    def Q0(self, x, u):
        return self.Q_0[x][u]

    def Q1(self, x, u):
        return self.Q_1[x][u]
    
    def cost0(self, x, u, t):
        # x = state
        # a = action of this node
        if u == 0:
            return 0
        if u == 1:
            return self.cost_v

    def cost1(self, x, u, t):
        # x = state
        # a = action of this node
        return np.exp(-x/20) - t 
    
    # Create tree

    # ad_list is symilar to: [(0, 1), (0, 2), (1, 3), ...]
    # With the relation that if we have (0,1), then 0 is parent of 1
    # To use this you have to have a node list and a list of which type they are
    def create_tree(self, nodes, ad_list):
        root = nodes[0]
        for ad in ad_list:
            nodes[ad[0]].new_child(nodes[ad[1]])
        return root

    def tree_def(self):
        #adyacent list
        ad_list = []

        #The root
        root = Node(0, id=0, model=self)
        root.Q = self.Q0
        root.cost = self.cost0

        # List of nodes
        nodes = [root]

        # We create the adyacent list for this case
        for i in range(0, self.T):
            new1 = Node(i+1, id=2*i+1, model=self, Q=self.Q1, cost=self.cost1)
            new2 = Node(i+1, id=2*i+2, model=self, Q=self.Q0, cost=self.cost0)
            nodes.append(new1)
            nodes.append(new2)
            ad_list.append((2*i, 2*i+1))
            ad_list.append((2*i, 2*i+2))


        # Add the last node
        new = Node(self.T+1, id = 2*self.T+1, model=self, Q=self.Q1, cost=self.cost1)
        nodes.append(new)


        l = len(ad_list)
        ad_list.append((l, l+1))

        return nodes, ad_list

    def graph_show(self):
        # Crear la figura
        fig = go.Figure()

        for node in self.nodes:
            x = list(node.policy.keys())
            y = list(node.policy.values())
            fig.add_trace(go.Scatter(x=x, y=y, mode='lines', name=f'Time {node.t}'))

        fig.update_layout(
            title='Policies by Node',
            xaxis_title='Policy',
            yaxis_title='Value',
            legend_title='Node ID',
        )

        fig.show()
    
    def change_s(self):
        change = {}
        for node in self.nodes:
            if node.terminal:
                continue
            prev = 1
            for x in self.X:
                if node.policy[x] != prev:
                    #print(node.t, x)
                    change[node.t] = x
                prev = node.policy[x]
        return change
    

In [3]:
# Range for the calues of cost_var
cost_vars = range(1, 23, 2)
alphas = [0.1, 0.2, 0.3, 0.4, 0.5] 

# Create the frames for each value of cost_var
frames = []
for cost_var in cost_vars:
    models = []
    changes = []
    for t in range(50, 105, 5):
        new = model1(down=-50, up=50, T=t, U=[1, 0], alpha=0.3, cost_v=cost_var)
        models.append(new)
        changes.append(new.change)
    
    # Create pieces for the actual frame
    frame_data = []
    t = 50
    for i, c in enumerate(changes):
        x = list(c.keys())
        y = list(c.values())
        frame_data.append(go.Scatter(x=x, y=y, mode='markers', name=f'T = {t + i*5}'))
    
    frames.append(go.Frame(data=frame_data, name=f'Cost {cost_var}'))


In [4]:
initial_frame = frames[0].data

# Create the figure with the frames and the slider
fig = go.Figure(
    data=initial_frame,
    layout=go.Layout(
        title='Changes by Time with Cost Slider',
        xaxis=dict(title='Time'),
        yaxis=dict(title='Value of Change'),
        yaxis_range=[-50, 60],
        updatemenus=[dict(
            type="buttons",
            showactive=False,
            buttons=[dict(label="Play",
                          method="animate",
                          args=[None, dict(frame=dict(duration=500, redraw=True), fromcurrent=True, mode="immediate")]),
                     dict(label="Pause",
                          method="animate",
                          args=[[None], dict(frame=dict(duration=0, redraw=False), mode="immediate")])]
        )]
    ),
    frames=frames
)

# Configure the sliders
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_layout(
    yaxis=dict(dtick=10),
    xaxis=dict(dtick=10),
    sliders=[{
        'steps': [{'args': [[f.name], {'frame': {'duration': 300, 'redraw': True}, 'mode': 'immediate'}],
                   'label': f'{f.name}',
                   'method': 'animate'} for f in frames],
        'transition': {'duration': 300},
    }]
)
fig.show()

In [14]:
# Range for the calues of cost_var
cost_vars = range(1, 23, 2)
alphas = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.6] 

# Create the frames for each value of cost_var
frames = []
for alpha in alphas:
    models = []
    changes = []
    for t in range(50, 105, 5):
        new = model1(down=-50, up=50, T=t, U=[1, 0], alpha=alpha, cost_v=5)
        models.append(new)
        changes.append(new.change)
    
    # Create pieces for the actual frame
    frame_data = []
    t = 50
    for i, c in enumerate(changes):
        x = list(c.keys())
        y = list(c.values())
        frame_data.append(go.Scatter(x=x, y=y, mode='markers', name=f'T = {t + i*5}'))
    
    frames.append(go.Frame(data=frame_data, name=f'Alpha {alpha}'))


In [16]:
initial_frame = frames[0].data

# Create the figura with the frames and the slider
fig = go.Figure(
    data=initial_frame,
    layout=go.Layout(
        title='Changes by Time with Alpha Slider and cost = 5',
        xaxis=dict(title='Time'),
        yaxis=dict(title='Value of Change'),
        yaxis_range=[-50, 60],
        updatemenus=[dict(
            type="buttons",
            showactive=False,
            buttons=[dict(label="Play",
                          method="animate",
                          args=[None, dict(frame=dict(duration=500, redraw=True), fromcurrent=True, mode="immediate")]),
                     dict(label="Pause",
                          method="animate",
                          args=[[None], dict(frame=dict(duration=0, redraw=False), mode="immediate")])]
        )]
    ),
    frames=frames
)

# Configure the sliders
fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
fig.update_layout(
    yaxis=dict(dtick=10),
    xaxis=dict(dtick=10),
    sliders=[{
        'steps': [{'args': [[f.name], {'frame': {'duration': 300, 'redraw': True}, 'mode': 'immediate'}],
                   'label': f'{f.name}',
                   'method': 'animate'} for f in frames],
        'transition': {'duration': 300},
    }]
)
fig.show()

In [7]:
import plotly.io as pio
pio.write_html(fig, file='figure.html')

In [8]:
# Crear un slider para alpha tambien
# Hacer la parte que Hung me explico de avars 