In [239]:
import sys

import numpy as np
import pandas as pd
import mdptoolbox as mdp

import itertools as it
import pprint as pp
from scipy.stats import binom

In [240]:
np.set_printoptions(threshold=sys.maxsize)

In [241]:
# NOTATION

# rplant - renewable plant 
# fplant - fossil fuel plant
# RES - referring to renewable plant
# FF - referring to fossil fuel plant

In [242]:
# VARIABLES

N_YEARS = 5
N_TECHSTAGES = 3
N_PLANTS = 5

# Starting price of carbon per ton.
C_CO2_INIT = 40
# Initial construction costs of a renewable plant per kW (average of solar PV and onshore wind).
C_CAP_RES = [1284, 746, 456]
# Annual operation & maintenance costs of a fossil fuel plant per kW (average of coal and natural gas).
C_OM_FF = 68.8
# Annual emissions of a fossil fuel plant in kg CO2 per kWh (average of coal and natural gas).
FF_EMIT = 2.03

# Probability that tech stage advances to the next given the current stage is not the highest. 
# Assume it is only possible to advance by 1 at a time.
P_ADV_TECHSTAGE = 0.25

# Probability that a renewable plant "fails" at the end of the year.
# A plant that fails is replaced in the next year for the same cost as building a new plant.
RPLANT_LIFE = 25
P_RPLANT_FAIL = 1 / RPLANT_LIFE

# Discount rate (average of solar PV and onshore wind in North America).
DISC_RATE = 0.06

In [243]:
# COST FUNCTION

def calc_cost(t, v, r, a):
    carbontax = C_CO2_INIT * (1.05 ** t)
    cost_fplants = (N_PLANTS - a) * (C_OM_FF + FF_EMIT * carbontax)
    # Assume renewable plants cost nothing after construction.
    cost_rplants = a * C_CAP_RES[v]
    total = cost_rplants + cost_fplants
    return round(total)

In [244]:
# STATE SPACE

# State space includes:
    # T = Time 
        # Range 2020 to 2050 years
    # (Carbon tax) 
        # Deterministic given time
        # Starting at $40/ton increases 5% per year
    # V = Tech "stage" 
        # Represents how advanced current energy technologies are
    # N_r = Number of renewable power plants 
        # Out of N total plants
        # Number of fossil fuel plants is N - N_r
        
S = (N_YEARS+1) * (N_TECHSTAGES) * (N_PLANTS+1)

# Create mapping between state and unique integer ID.
def enumerate_states(n_years, n_techstages, n_plants):
    state_to_id = {}
    idx = 0
    iter_states = it.product(np.arange(n_years+1), np.arange(n_techstages), np.arange(n_plants+1))
    for state in iter_states:
        (t, v, r) = state
        state_to_id[state] = idx
        idx += 1
    return state_to_id

In [245]:
state_to_id = enumerate_states(N_YEARS, N_TECHSTAGES, N_PLANTS)
id_to_state = {v: k for k, v in state_to_id.items()}
#pp.pprint(state_to_id)

In [246]:
# ACTION SPACE

# Possible actions:
    # 0 -- Do nothing
    # 1...N -- Convert 1...N fossil fuel plants to renewable plants
# An invalid action attempts to convert more fossil fuel plants than remain. 
    
A = N_PLANTS + 1

In [302]:
# TRANSITION PROBABILITIES

transitions = np.zeros([A, S, S])

def fill_trans_donothing(transitions, a):
    iter_states = it.product(np.arange(N_YEARS), np.arange(N_TECHSTAGES), np.arange(N_PLANTS))
    for state in iter_states:
        (t, v, r) = state
        state_curr = state
        idx_curr = state_to_id[state_curr]
        #assert np.sum(transitions[a][idx_curr]) == 0, np.sum(transitions[a][idx_curr])
        transitions[a][idx_curr].fill(0.0)
        rplants_existing = np.arange(r+1)
        # FAILURE LOOP
        # Any number of existing renewable plants may fail (at end of year).
        for e in rplants_existing:
            prob_fail = binom.pmf(e, r, P_RPLANT_FAIL)
            state_next = (t+1, v, r-e)
            idx_next = state_to_id[state_next]
            if v < N_TECHSTAGES - 1:
                state_next_v = (t+1, v+1, r-e)
                idx_next_v = state_to_id[state_next_v]
                # Tech stage may remain the same.
                transitions[a][idx_curr][idx_next] = (1.0-P_ADV_TECHSTAGE) * prob_fail
                # Tech stage may advance (assume only possible to advance by 1).
                transitions[a][idx_curr][idx_next_v] = P_ADV_TECHSTAGE * prob_fail
            else:
                # Tech stage must remain the same.
                transitions[a][idx_curr][idx_next] = prob_fail
        print("DO NOTHING", state, np.sum(transitions[a][idx_curr]))
#         print(state, np.sum(transitions[a][idx_curr]), ":", transitions[a][idx_curr])
        assert np.isclose(np.sum(transitions[a][idx_curr]), 1.0), np.sum(transitions[a][idx_curr])

def fill_trans_other(transitions):
    iter_states = it.product(np.arange(N_YEARS), np.arange(N_TECHSTAGES), np.arange(N_PLANTS))
    for state in iter_states:
        (t, v, r) = state
        state_curr = state
        idx_curr = state_to_id[state_curr]
        rplants_existing = np.arange(r+1)
        # ACTION LOOP
        # From 1 up to number of fossil fuel plants remaining may be converted.
        for a in np.arange(1, A):
            if a > N_PLANTS-r:
                print("invalid: ", a, state)
                print(transitions[a][idx_curr])
                # Dummy transitions for invalid actions. 
                fill_trans_donothing(transitions, a)
                continue
            else:
                # FAILURE LOOP
                # Any number of existing renewable plants may fail (at end of year).
                for e in rplants_existing:
                    prob_fail = binom.pmf(e, r, P_RPLANT_FAIL)
                    state_next_r = (t+1, v, r+a-e)
                    idx_next_r = state_to_id[state_next_r]
                    if v < N_TECHSTAGES - 1:
                        state_next_r_v = (t+1, v+1, r+a-e)
                        idx_next_r_v = state_to_id[state_next_r_v]
                        # Tech stage may remain the same.
                        transitions[a][idx_curr][idx_next_r] = (1.0-P_ADV_TECHSTAGE) * prob_fail
                        # Tech stage may advance.
                        transitions[a][idx_curr][idx_next_r_v] = P_ADV_TECHSTAGE * prob_fail
                    else:
                        # Tech stage must remain the same.
                        transitions[a][idx_curr][idx_next_r] = prob_fail
            print("OTHER", a, state, np.sum(transitions[a][idx_curr]))
#             print(state, "--", np.sum(transitions[a][idx_curr]), ":", transitions[a][idx_curr])
            assert np.isclose(np.sum(transitions[a][idx_curr]), 1.0), np.sum(transitions[a][idx_curr])

In [303]:
fill_trans_donothing(transitions, 0)
#print(transitions[0]) 18 22 23 28

DO NOTHING (0, 0, 0) 1.0
DO NOTHING (0, 0, 1) 1.0
DO NOTHING (0, 0, 2) 1.0
DO NOTHING (0, 0, 3) 1.0
DO NOTHING (0, 0, 4) 1.0
DO NOTHING (0, 1, 0) 1.0
DO NOTHING (0, 1, 1) 1.0
DO NOTHING (0, 1, 2) 1.0
DO NOTHING (0, 1, 3) 0.9999999999999999
DO NOTHING (0, 1, 4) 1.0
DO NOTHING (0, 2, 0) 1.0
DO NOTHING (0, 2, 1) 1.0
DO NOTHING (0, 2, 2) 1.0
DO NOTHING (0, 2, 3) 1.0
DO NOTHING (0, 2, 4) 1.0
DO NOTHING (1, 0, 0) 1.0
DO NOTHING (1, 0, 1) 1.0
DO NOTHING (1, 0, 2) 1.0
DO NOTHING (1, 0, 3) 0.9999999999999999
DO NOTHING (1, 0, 4) 1.0
DO NOTHING (1, 1, 0) 1.0
DO NOTHING (1, 1, 1) 1.0
DO NOTHING (1, 1, 2) 1.0
DO NOTHING (1, 1, 3) 1.0
DO NOTHING (1, 1, 4) 1.0
DO NOTHING (1, 2, 0) 1.0
DO NOTHING (1, 2, 1) 1.0
DO NOTHING (1, 2, 2) 1.0
DO NOTHING (1, 2, 3) 1.0
DO NOTHING (1, 2, 4) 1.0
DO NOTHING (2, 0, 0) 1.0
DO NOTHING (2, 0, 1) 1.0
DO NOTHING (2, 0, 2) 1.0
DO NOTHING (2, 0, 3) 1.0
DO NOTHING (2, 0, 4) 1.0
DO NOTHING (2, 1, 0) 1.0
DO NOTHING (2, 1, 1) 1.0
DO NOTHING (2, 1, 2) 1.0
DO NOTHING (2, 1, 3)

In [304]:
fill_trans_other(transitions)
# print(transitions[1][5])

OTHER 1 (0, 0, 0) 1.0
OTHER 2 (0, 0, 0) 1.0
OTHER 3 (0, 0, 0) 1.0
OTHER 4 (0, 0, 0) 1.0
OTHER 5 (0, 0, 0) 1.0
OTHER 1 (0, 0, 1) 1.0
OTHER 2 (0, 0, 1) 1.0
OTHER 3 (0, 0, 1) 1.0
OTHER 4 (0, 0, 1) 1.0
invalid:  5 (0, 0, 1)
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
DO NOTHING (0, 0, 0) 1.0
DO NOTHING (0, 0, 1) 1.0
DO NOTHING (0, 0, 2) 1.0
DO NOTHING (0, 0, 3) 1.0
DO NOTHING (0, 0, 4) 1.0
DO NOTHING (0, 1, 0) 1.0
DO NOTHING (0, 1, 1) 1.0
DO NOTHING (0, 1, 2) 1.0
DO NOTHING (0, 1, 3) 0.9999999999999999
DO NOTHING (0, 1, 4) 1.0
DO NOTHING (0, 2, 0) 1.0
DO NOTHING (0, 2, 1) 1.0
DO NOTHING (0, 2, 2) 1.0
DO NOTHING (0, 2, 3) 1.0
DO NOTHING (0, 2, 4) 1.0
DO NOTHING (1, 0, 0) 1.0
DO NOTHING (1, 0, 1) 1.0
DO NOTHING

DO NOTHING (3, 2, 4) 1.0
DO NOTHING (4, 0, 0) 1.0
DO NOTHING (4, 0, 1) 1.0
DO NOTHING (4, 0, 2) 1.0
DO NOTHING (4, 0, 3) 1.0
DO NOTHING (4, 0, 4) 1.0
DO NOTHING (4, 1, 0) 1.0
DO NOTHING (4, 1, 1) 1.0
DO NOTHING (4, 1, 2) 1.0
DO NOTHING (4, 1, 3) 0.9999999999999998
DO NOTHING (4, 1, 4) 0.9999999999999999
DO NOTHING (4, 2, 0) 1.0
DO NOTHING (4, 2, 1) 1.0
DO NOTHING (4, 2, 2) 1.0
DO NOTHING (4, 2, 3) 1.0
DO NOTHING (4, 2, 4) 1.0
invalid:  4 (0, 0, 3)
[0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
 4.80000e-05 3.45600e-03 8.29440e-02 6.63552e-01 0.00000e+00 0.00000e+00
 1.60000e-05 1.15200e-03 2.76480e-02 2.21184e-01 0.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00
 0.00000e+00 0.00000e+00 0.00000e+00 

DO NOTHING (0, 1, 0) 1.0
DO NOTHING (0, 1, 1) 1.0
DO NOTHING (0, 1, 2) 1.0
DO NOTHING (0, 1, 3) 0.9999999999999999
DO NOTHING (0, 1, 4) 1.0
DO NOTHING (0, 2, 0) 1.0
DO NOTHING (0, 2, 1) 1.0
DO NOTHING (0, 2, 2) 1.0
DO NOTHING (0, 2, 3) 1.0
DO NOTHING (0, 2, 4) 1.0
DO NOTHING (1, 0, 0) 1.0
DO NOTHING (1, 0, 1) 1.0
DO NOTHING (1, 0, 2) 1.0
DO NOTHING (1, 0, 3) 0.9999999999999999
DO NOTHING (1, 0, 4) 1.0
DO NOTHING (1, 1, 0) 1.0
DO NOTHING (1, 1, 1) 1.0
DO NOTHING (1, 1, 2) 1.0
DO NOTHING (1, 1, 3) 1.0
DO NOTHING (1, 1, 4) 1.0
DO NOTHING (1, 2, 0) 1.0
DO NOTHING (1, 2, 1) 1.0
DO NOTHING (1, 2, 2) 1.0
DO NOTHING (1, 2, 3) 1.0
DO NOTHING (1, 2, 4) 1.0
DO NOTHING (2, 0, 0) 1.0
DO NOTHING (2, 0, 1) 1.0
DO NOTHING (2, 0, 2) 1.0
DO NOTHING (2, 0, 3) 1.0
DO NOTHING (2, 0, 4) 1.0
DO NOTHING (2, 1, 0) 1.0
DO NOTHING (2, 1, 1) 1.0
DO NOTHING (2, 1, 2) 1.0
DO NOTHING (2, 1, 3) 0.9999999999999999
DO NOTHING (2, 1, 4) 1.0
DO NOTHING (2, 2, 0) 1.0
DO NOTHING (2, 2, 1) 1.0
DO NOTHING (2, 2, 2) 1.0
DO NO

AssertionError: 2.0

In [None]:
# REWARD MATRIX

rewards = np.zeros([A, S, S])

def fill_rewards(rewards, S):
    for a in np.arange(A):
        for s in np.arange(S):
            state = id_to_state[s]
            idx = state_to_id[state]
            # Sanity check for integer id.
            assert(idx == s)
            (t, v, r) = state
            if a > N_PLANTS-r:
                # For invalid actions, assume cost for highest action possible.
                cost = calc_cost(t, v, r, N_PLANTS-r)
            else:
                cost = calc_cost(t, v, r, a)
            rewards[a][idx].fill(cost)     

In [None]:
fill_rewards(rewards, S)
#print(rewards) 

In [305]:
# UNIT TESTS

state_curr = (0, 0, 0)
idx_curr = state_to_id[state_curr]
state_next = (1, 0, 1)
idx_next = state_to_id[state_next]
prob_A1 = (1.0-P_ADV_TECHSTAGE)
print(state_curr, " to ", state_next, " with action 1: ", transitions[1][idx_curr][idx_next], ", ", prob_A1)
assert(transitions[1][idx_curr][idx_next] == prob_A1)

(0, 0, 0)  to  (1, 0, 1)  with action 1:  0.75 ,  0.75


In [291]:
mdp_v0 = mdp.mdp.MDP(transitions, rewards, DISC_RATE, 0.001, 100)

StochasticError: 'PyMDPToolbox - The transition probability matrix is not stochastic.'