In [1]:
import numpy as np
np.set_printoptions(suppress=True)
import time
from numba import njit,guvectorize,float64
import scipy.optimize as opt
from matplotlib import pyplot as plt

#Set
t = np.arange(1, 101)
NT = len(t)

In [2]:
import tensorflow as tf
tf1 = tf.constant("hellow")
sess = tf.Session()
print(sess.run(tf1))

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


b'hellow'


In [3]:
#Parameters
fosslim = 6000 # Maximum cumulative extraction fossil fuels (GtC); denoted by CCum
tstep  = 5 # Years per Period
ifopt  = 0 # Indicator where optimized is 1 and base is 0

In [4]:
#Preferences

elasmu = 1.45 #  Elasticity of marginal utility of consumption
prstp = 0.015 #   Initial rate of social time preference per year 

#** Population and technology
gama  = 0.300 #   Capital elasticity in production function         /.300 /
pop0  = 7403   # Initial world population 2015 (millions)          /7403 /
popadj = 0.134 #  Growth rate to calibrate to 2050 pop projection  /0.134/
popasym = 11500 # Asymptotic population (millions)                 /11500/
dk  = 0.100 #     Depreciation rate on capital (per year)           /.100 /
q0  = 105.5 #     Initial world gross output 2015 (trill 2010 USD) /105.5/
k0  = 223 #     Initial capital value 2015 (trill 2010 USD)        /223  /
a0  = 5.115 #     Initial level of total factor productivity       /5.115/
ga0  = 0.076 #    Initial growth rate for TFP per 5 years          /0.076/
dela  = 0.005 #   Decline rate of TFP per 5 years                  /0.005/

#** Emissions parameters
gsigma1  = -0.0152 # Initial growth of sigma (per year)            /-0.0152/
dsig  = -0.001 #   Decline rate of decarbonization (per period)    /-0.001 /
eland0 = 2.6 #  Carbon emissions from land 2015 (GtCO2 per year)   / 2.6   /
deland = 0.115 # Decline rate of land emissions (per period)        / .115  /
e0 = 35.85 #    Industrial emissions 2015 (GtCO2 per year)       /35.85  /
miu0  = 0.03 #   Initial emissions control rate for base case 2015  /.03    /

#** Carbon cycle
#* Initial Conditions
mat0 = 851 #  Initial Concentration in atmosphere 2015 (GtC)       /851  /
mu0  = 460 #  Initial Concentration in upper strata 2015 (GtC)     /460  /
ml0  = 1740 #  Initial Concentration in lower strata 2015 (GtC)    /1740 /
mateq = 588 # mateq Equilibrium concentration atmosphere  (GtC)    /588  /
mueq  = 360 # mueq Equilibrium concentration in upper strata (GtC) /360  /
mleq = 1720 # mleq Equilibrium concentration in lower strata (GtC) /1720 /

#* Flow paramaters, denoted by Phi_ij in the model
b12  = 0.12 #    Carbon cycle transition matrix                     /.12  /
b23  = 0.007 #   Carbon cycle transition matrix                    /0.007/
#* These are for declaration and are defined later
b11  = None   # Carbon cycle transition matrix
b21  = None  # Carbon cycle transition matrix
b22  = None  # Carbon cycle transition matrix
b32  = None  # Carbon cycle transition matrix
b33  = None  # Carbon cycle transition matrix
sig0  = None  # Carbon intensity 2010 (kgCO2 per output 2005 USD 2010)

#** Climate model parameters
t2xco2  = 3.1 # Equilibrium temp impact (oC per doubling CO2)    / 3.1 /
fex0  = 0.5 #   2015 forcings of non-CO2 GHG (Wm-2)              / 0.5 /
fex1  = 1.0 #   2100 forcings of non-CO2 GHG (Wm-2)              / 1.0 /
tocean0  = 0.0068 # Initial lower stratum temp change (C from 1900) /.0068/
tatm0  = 0.85 #  Initial atmospheric temp change (C from 1900)    /0.85/
c1  = 0.1005 #     Climate equation coefficient for upper level  /0.1005/
c3  = 0.088 #     Transfer coefficient upper to lower stratum    /0.088/
c4  = 0.025 #     Transfer coefficient for lower level           /0.025/
fco22x  = 3.6813 # eta in the model; Eq.22 : Forcings of equilibrium CO2 doubling (Wm-2)   /3.6813 /

#** Climate damage parameters
a10  = 0 #     Initial damage intercept                         /0   /
a20  = None #     Initial damage quadratic term
a1  = 0 #      Damage intercept                                 /0   /
a2  = 0.00236 #      Damage quadratic term                     /0.00236/
a3  = 2.00 #      Damage exponent                              /2.00   /

#** Abatement cost
expcost2 = 2.6 # Theta2 in the model, Eq. 10 Exponent of control cost function             / 2.6  /
pback  = 550 #   Cost of backstop 2010$ per tCO2 2015          / 550  /
gback  = 0.025 #   Initial cost decline backstop cost per period / .025/
limmiu  = 1.2 #  Upper limit on control rate after 2150        / 1.2 /
tnopol  = 45 #  Period before which no emissions controls base  / 45   /
cprice0  = 2 # Initial base carbon price (2010$ per tCO2)      / 2    /
gcprice  = 0.02 # Growth rate of base carbon price per year     /.02  /

#** Scaling and inessential parameters
#* Note that these are unnecessary for the calculations
#* They ensure that MU of first period's consumption =1 and PV cons = PV utilty
scale1  = 0.0302455265681763 #    Multiplicative scaling coefficient           /0.0302455265681763 /
scale2  = -10993.704 #    Additive scaling coefficient       /-10993.704/;

In [5]:
#* Parameters for long-run consistency of carbon cycle 
#(Question)
b11 = 1 - b12
b21 = b12*mateq/mueq
b22 = 1 - b21 - b23
b32 = b23*mueq/mleq
b33 = 1 - b32

#* Further definitions of parameters
a20 = a2
sig0 = e0/(q0*(1-miu0)) #From Eq. 14
lam = fco22x/ t2xco2 #From Eq. 25

l = np.zeros(NT)
l[0] = pop0 #Labor force
al = np.zeros(NT) 
al[0] = a0
gsig = np.zeros(NT) 
gsig[0] = gsigma1
sigma = np.zeros(NT)
sigma[0]= sig0
ga = ga0 * np.exp(-dela*5*(t-1)) #TFP growth rate dynamics, Eq. 7
pbacktime = pback * (1-gback)**(t-1) #Backstop price
etree = eland0*(1-deland)**(t-1) #Emissions from deforestration
rr = 1/((1+prstp)**(tstep*(t-1))) #Eq. 3
#The following three equations define the exogenous radiative forcing; used in Eq. 23  
forcoth = np.full(NT,fex0)
forcoth[0:18] = forcoth[0:18] + (1/17)*(fex1-fex0)*(t[0:18]-1)
forcoth[18:NT] = forcoth[18:NT] + (fex1-fex0)
optlrsav = (dk + .004)/(dk + .004*elasmu + prstp)*gama #Optimal long-run savings rate used for transversality (Question)
cost1 = np.zeros(NT)
cumetree = np.zeros(NT)
cumetree[0] = 100
cpricebase = cprice0*(1+gcprice)**(5*(t-1)) 

In [6]:
@njit('(float64[:], int32)')
def InitializeLabor(il,iNT):
    for i in range(1,iNT):
        il[i] = il[i-1]*(popasym / il[i-1])**popadj

@njit('(float64[:], int32)')        
def InitializeTFP(ial,iNT):
    for i in range(1,iNT):
        ial[i] = ial[i-1]/(1-ga[i-1])
        
@njit('(float64[:], int32)')        
def InitializeGrowthSigma(igsig,iNT):
    for i in range(1,iNT):
        igsig[i] = igsig[i-1]*((1+dsig)**tstep)
        
@njit('(float64[:], float64[:],float64[:],int32)')        
def InitializeSigma(isigma,igsig,icost1,iNT):
    for i in range(1,iNT):
        isigma[i] =  isigma[i-1] * np.exp(igsig[i-1] * tstep)
        icost1[i] = pbacktime[i] * isigma[i]  / expcost2 /1000
        
@njit('(float64[:], int32)')        
def InitializeCarbonTree(icumetree,iNT):
    for i in range(1,iNT):
        icumetree[i] = icumetree[i-1] + etree[i-1]*(5/3.666)


In [7]:
"""
Functions of the model
"""

"""
First: Functions related to emissions of carbon and weather damages
"""

# Retuns the total carbon emissions; Eq. 18
@njit('float64(float64[:],int32)') 
def fE(iEIND,index):
    return iEIND[index] + etree[index]

#Eq.14: Determines the emission of carbon by industry EIND
@njit('float64(float64[:],float64[:],float64[:],int32)') 
def fEIND(iYGROSS, iMIU, isigma,index):
    return isigma[index] * iYGROSS[index] * (1 - iMIU[index])

#Cumulative industrial emission of carbon
@njit('float64(float64[:],float64[:],int32)') 
def fCCA(iCCA,iEIND,index):
    return iCCA[index-1] + iEIND[index-1] * 5 / 3.666

#Cumulative total carbon emission
@njit('float64(float64[:],float64[:],int32)')
def fCCATOT(iCCA,icumetree,index):
    return iCCA[index] + icumetree[index]

#Eq. 22: the dynamics of the radiative forcing
@njit('float64(float64[:],int32)')
def fFORC(iMAT,index):
    return fco22x * np.log(iMAT[index]/588.000)/np.log(2) + forcoth[index]

# Dynamics of Omega; Eq.9
@njit('float64(float64[:],int32)')
def fDAMFRAC(iTATM,index):
    return a1*iTATM[index] + a2*iTATM[index]**a3

#Calculate damages as a function of Gross industrial production; Eq.8 
@njit('float64(float64[:],float64[:],int32)')
def fDAMAGES(iYGROSS,iDAMFRAC,index):
    return iYGROSS[index] * iDAMFRAC[index]

#Dynamics of Lambda; Eq. 10 - cost of the reudction of carbon emission (Abatement cost)
@njit('float64(float64[:],float64[:],float64[:],int32)') 
def fABATECOST(iYGROSS,iMIU,icost1,index):
    return iYGROSS[index] * icost1[index] * iMIU[index]**expcost2

#Marginal Abatement cost
@njit('float64(float64[:],int32)')
def fMCABATE(iMIU,index):
    return pbacktime[index] * iMIU[index]**(expcost2-1)

#Price of carbon reduction
@njit('float64(float64[:],int32)')
def fCPRICE(iMIU,index):
    return pbacktime[index] * (iMIU[index])**(expcost2-1)

#Eq. 19: Dynamics of the carbon concentration in the atmosphere
@njit('float64(float64[:],float64[:],float64[:],int32)') 
def fMAT(iMAT,iMU,iE,index):
    if(index == 0):
        return mat0
    else:
        return iMAT[index-1]*b11 + iMU[index-1]*b21 + iE[index-1] * 5 / 3.666

#Eq. 21: Dynamics of the carbon concentration in the ocean LOW level
@njit('float64(float64[:],float64[:],int32)') 
def fML(iML,iMU,index):
    if(index == 0):
        return ml0
    else:
        return iML[index-1] * b33  + iMU[index-1] * b23

#Eq. 20: Dynamics of the carbon concentration in the ocean UP level
@njit('float64(float64[:],float64[:],float64[:],int32)') 
def fMU(iMAT,iMU,iML,index):
    if(index == 0):
        return mu0
    else:
        return iMAT[index-1]*b12 + iMU[index-1]*b22 + iML[index-1]*b32

#Eq. 23: Dynamics of the atmospheric temperature
@njit('float64(float64[:],float64[:],float64[:],int32)') 
def fTATM(iTATM,iFORC,iTOCEAN,index):
    if(index == 0):
        return tatm0
    else:
        return iTATM[index-1] + c1 * (iFORC[index] - (fco22x/t2xco2) * iTATM[index-1] - c3 * (iTATM[index-1] - iTOCEAN[index-1]))

#Eq. 24: Dynamics of the ocean temperature
@njit('float64(float64[:],float64[:],int32)')
def fTOCEAN(iTATM,iTOCEAN,index):
    if(index == 0):
        return tocean0
    else:
        return iTOCEAN[index-1] + c4 * (iTATM[index-1] - iTOCEAN[index-1])

"""
Second: Function related to economic variables
"""

#The total production without climate losses denoted previously by YGROSS
@njit('float64(float64[:],float64[:],float64[:],int32)')
def fYGROSS(ial,il,iK,index):
    return ial[index] * ((il[index]/1000)**(1-gama)) * iK[index]**gama

#The production under the climate damages cost
@njit('float64(float64[:],float64[:],int32)')
def fYNET(iYGROSS, iDAMFRAC, index):
    return iYGROSS[index] * (1 - iDAMFRAC[index])

#Production after abatement cost
@njit('float64(float64[:],float64[:],int32)')
def fY(iYNET,iABATECOST,index):
    return iYNET[index] - iABATECOST[index]

#Consumption Eq. 11
@njit('float64(float64[:],float64[:],int32)')
def fC(iY,iI,index):
    return iY[index] - iI[index]

#Per capita consumption, Eq. 12
@njit('float64(float64[:],float64[:],int32)')
def fCPC(iC,il,index):
    return 1000 * iC[index] / il[index]

#Saving policy: investment
@njit('float64(float64[:],float64[:],int32)')
def fI(iS,iY,index):
    return iS[index] * iY[index] 

#Capital dynamics Eq. 13
@njit('float64(float64[:],float64[:],int32)')
def fK(iK,iI,index):
    if(index == 0):
        return k0
    else:
        return (1-dk)**tstep * iK[index-1] + tstep * iI[index-1]

#Interest rate equation; Eq. 26 added in personal notes
@njit('float64(float64[:],int32)')
def fRI(iCPC,index):
    return (1 + prstp) * (iCPC[index+1]/iCPC[index])**(elasmu/tstep) - 1

#Periodic utility: A form of Eq. 2
@njit('float64(float64[:],float64[:],int32)')
def fCEMUTOTPER(iPERIODU,il,index):
    return iPERIODU[index] * il[index] * rr[index]

#The term between brackets in Eq. 2
@njit('float64(float64[:],float64[:],int32)')
def fPERIODU(iC,il,index):
    return ((iC[index]*1000/il[index])**(1-elasmu) - 1) / (1 - elasmu) - 1

#utility function
@guvectorize([(float64[:], float64[:])], '(n), (m)')
def fUTILITY(iCEMUTOTPER, resUtility):
    resUtility[0] = tstep * scale1 * np.sum(iCEMUTOTPER) + scale2


In [8]:

"""
In this part we implement the objective function
"""

# * Control rate limits
MIU_lo = np.full(NT,0.01)
MIU_up = np.full(NT,limmiu)
MIU_up[0:29] = 1
MIU_lo[0] = miu0
MIU_up[0] = miu0
MIU_lo[MIU_lo==MIU_up] = 0.99999*MIU_lo[MIU_lo==MIU_up]
bnds1=[]
for i in range(NT):
    bnds1.append((MIU_lo[i],MIU_up[i]))
# * Control variables
lag10 = t > NT - 10
S_lo = np.full(NT,1e-1)
S_lo[lag10] = optlrsav
S_up = np.full(NT,0.9)
S_up[lag10] = optlrsav
S_lo[S_lo==S_up] = 0.99999*S_lo[S_lo==S_up]
bnds2=[]
for i in range(NT):
    bnds2.append((S_lo[i],S_up[i]))
    
# Arbitrary starting values for the control variables:
S_start = np.full(NT,0.2)
S_start[S_start < S_lo] = S_lo[S_start < S_lo]
S_start[S_start > S_up] = S_lo[S_start > S_up]
MIU_start = 0.99*MIU_up
MIU_start[MIU_start < MIU_lo] = MIU_lo[MIU_start < MIU_lo]
MIU_start[MIU_start > MIU_up] = MIU_up[MIU_start > MIU_up]

K = np.zeros(NT)
YGROSS = np.zeros(NT)
EIND = np.zeros(NT)
E = np.zeros(NT)
CCA = np.zeros(NT)
CCATOT = np.zeros(NT)
MAT = np.zeros(NT)
ML = np.zeros(NT)
MU = np.zeros(NT)
FORC = np.zeros(NT)
TATM = np.zeros(NT)
TOCEAN = np.zeros(NT)
DAMFRAC = np.zeros(NT)
DAMAGES = np.zeros(NT)
ABATECOST = np.zeros(NT)
MCABATE = np.zeros(NT)
CPRICE = np.zeros(NT)
YNET = np.zeros(NT)
Y = np.zeros(NT)
I = np.zeros(NT)
C = np.zeros(NT)
CPC = np.zeros(NT)
RI = np.zeros(NT)
PERIODU = np.zeros(NT)
CEMUTOTPER = np.zeros(NT)

In [9]:
#The objective function
#It returns the utility as scalar
def fOBJ(x,sign,iI,iK,ial,il,iYGROSS,isigma,iEIND,iE,iCCA,iCCATOT,icumetree,iMAT,iMU,iML,iFORC,iTATM,iTOCEAN,iDAMFRAC,iDAMAGES,iABATECOST,icost1,iMCABATE,
         iCPRICE,iYNET,iY,iC,iCPC,iPERIODU,iCEMUTOTPER,iRI,iNT):
    
    iMIU = x[0:iNT]
    iS = x[iNT:(2*iNT)]
    
    for i in range(iNT):
        iK[i] = fK(iK,iI,i)
        iYGROSS[i] = fYGROSS(ial,il,iK,i)
        iEIND[i] = fEIND(iYGROSS, iMIU, isigma,i)
        iE[i] = fE(iEIND,i)
        iCCA[i] = fCCA(iCCA,iEIND,i)
        iCCATOT[i] = fCCATOT(iCCA,icumetree,i)
        iMAT[i] = fMAT(iMAT,iMU,iE,i)
        iML[i] = fML(iML,iMU,i)
        iMU[i] = fMU(iMAT,iMU,iML,i)
        iFORC[i] = fFORC(iMAT,i)
        iTATM[i] = fTATM(iTATM,iFORC,iTOCEAN,i)
        iTOCEAN[i] = fTOCEAN(iTATM,iTOCEAN,i)
        iDAMFRAC[i] = fDAMFRAC(iTATM,i)
        iDAMAGES[i] = fDAMAGES(iYGROSS,iDAMFRAC,i)
        iABATECOST[i] = fABATECOST(iYGROSS,iMIU,icost1,i)
        iMCABATE[i] = fMCABATE(iMIU,i)
        iCPRICE[i] = fCPRICE(iMIU,i)
        iYNET[i] = fYNET(iYGROSS, iDAMFRAC, i)
        iY[i] = fY(iYNET,iABATECOST,i)
        iI[i] = fI(iS,iY,i)
        iC[i] = fC(iY,iI,i)
        iCPC[i] = fCPC(iC,il,i)
        iPERIODU[i] = fPERIODU(iC,il,i)
        iCEMUTOTPER[i] = fCEMUTOTPER(iPERIODU,il,i)
        iRI = fRI(iCPC,i)
        
    resUtility = np.zeros(1)
    fUTILITY(iCEMUTOTPER, resUtility)
    
    return sign*resUtility[0]

In [10]:
InitializeLabor(l,NT)
InitializeTFP(al,NT)
InitializeGrowthSigma(gsig,NT)
InitializeSigma(sigma,gsig,cost1,NT)
InitializeCarbonTree(cumetree,NT)

In [11]:
class DQN:
    def __init__(self, learning_rate, gamma, n_features, n_actions, epsilon, parameter_changing_pointer, memory_size):

        self.learning_rate = learning_rate
        self.gamma = gamma
        self.n_features = n_features
        self.n_actions = n_actions
        self.epsilon = epsilon
        self.batch_size = 100
        self.experience_counter = 0
        self.experience_limit = memory_size
        self.replace_target_pointer = parameter_changing_pointer
        self.learning_counter = 0
        self.memory = np.zeros([self.experience_limit, self.n_features * 2 + 2])

        self.build_networks()
        p_params = tf.get_collection('primary_network_parameters')
        t_params = tf.get_collection('target_network_parameters')
        self.replacing_target_parameters = [tf.assign(t, p) for t, p in zip(t_params, p_params)]

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())
        
    def build_networks(self):
        # primary network
        hidden_units = 300
        self.s = tf.placeholder(tf.float32, [None, self.n_features])
        self.qtarget = tf.placeholder(tf.float32, [None, self.n_actions])

        with tf.variable_scope('primary_network'):
            c = ['primary_network_parameters', tf.GraphKeys.GLOBAL_VARIABLES]
            with tf.variable_scope('layer1'):
                w1 = tf.get_variable('w1', [self.n_features, hidden_units],
                                     initializer=tf.contrib.layers.xavier_initializer(),
                                     dtype=tf.float32, collections=c)
                b1 = tf.get_variable('b1', [1, hidden_units], initializer=tf.contrib.layers.xavier_initializer(),
                                     dtype=tf.float32, collections=c)
                l1 = tf.nn.relu(tf.matmul(self.s, w1) + b1)

            with tf.variable_scope('layer2'):
                w2 = tf.get_variable('w2', [hidden_units, self.n_actions],
                                     initializer=tf.contrib.layers.xavier_initializer(),
                                     dtype=tf.float32, collections=c)
                b2 = tf.get_variable('b2', [1, self.n_actions], initializer=tf.contrib.layers.xavier_initializer(),
                                     dtype=tf.float32, collections=c)
                self.qeval = tf.matmul(l1, w2) + b2

        with tf.variable_scope('loss'):
            self.loss = tf.reduce_mean(tf.squared_difference(self.qtarget, self.qeval))

        with tf.variable_scope('optimiser'):
            self.train = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)

        # target network
        self.st = tf.placeholder(tf.float32, [None, self.n_features])

        with tf.variable_scope('target_network'):
            c = ['target_network_parameters', tf.GraphKeys.GLOBAL_VARIABLES]
            with tf.variable_scope('layer1'):
                w1 = tf.get_variable('w1', [self.n_features, hidden_units],
                                     initializer=tf.contrib.layers.xavier_initializer(),
                                     dtype=tf.float32, collections=c)
                b1 = tf.get_variable('b1', [1, hidden_units], initializer=tf.contrib.layers.xavier_initializer(),
                                     dtype=tf.float32, collections=c)
                l1 = tf.nn.relu(tf.matmul(self.st, w1) + b1)

            with tf.variable_scope('layer2'):
                w2 = tf.get_variable('w2', [hidden_units, self.n_actions],
                                     initializer=tf.contrib.layers.xavier_initializer(),
                                     dtype=tf.float32, collections=c)
                b2 = tf.get_variable('b2', [1, self.n_actions], initializer=tf.contrib.layers.xavier_initializer(),
                                     dtype=tf.float32, collections=c)
                self.qt = tf.matmul(l1, w2) + b2
                
        with tf.variable_scope('loss'):
            self.loss = tf.reduce_mean(tf.squared_difference(self.qtarget, self.qeval))
    
    def target_params_replaced(self):
        self.sess.run(self.replacing_target_parameters)
        
    def store_experience(self, obs, a, r, obs_):
        index = self.experience_counter % self.experience_limit
        self.memory[index, :] = np.hstack((obs, [a, r], obs_))
        self.experience_counter += 1
        
    def epsilon_greedy(self, obs):
        if np.random.uniform(low=0, high=1) < self.epsilon:
            return np.argmax(self.sess.run(self.qeval, feed_dict={self.s: obs[np.newaxis, :]}))
        else:
            return np.random.choice(self.n_actions)

    def fit(self):
        if self.experience_counter < self.experience_limit:
            indices = np.random.choice(self.experience_counter, size=self.batch_size)
        else:
            indices = np.random.choice(self.experience_limit, size=self.batch_size)
        
        batch = self.memory[indices, :]
        qt, qeval = self.sess.run([self.qt, self.qeval],
                                  feed_dict={self.st: batch[:, -self.n_features:], self.s: batch[:, :self.n_features]})

        qtarget = qeval.copy()
        batch_indices = np.arange(self.batch_size, dtype=np.int32)
        actions = self.memory[indices, self.n_features].astype(int)
        rewards = self.memory[indices, self.n_features + 1]
        qtarget[batch_indices, actions] = rewards + self.gamma * np.max(qt, axis=1)
        _ = self.sess.run(self.train, feed_dict={self.s: batch[:, :self.n_features], 
                                                 self.qtarget: qtarget})

        if self.epsilon < 0.95:
            self.epsilon += 0.002

        if self.learning_counter % self.replace_target_pointer == 0:
            self.target_params_replaced()

        self.learning_counter += 1

In [12]:
class OPTIM:
    def __init__(self, n_features):
        self.n_features = n_features
        
    def step(self, action):
        global RE, REX
        obs_ = obs
        if action==0:
            obs_[steps%199] -= 0.051
        elif action==2:
            obs_[steps%199] += 0.051
        if steps%199<100:
            if obs_[steps%199] > MIU_up[steps%199]:
                obs_[steps%199] = MIU_up[steps%199]
            elif obs_[steps%199] < MIU_lo[steps%199]:
                obs_[steps%199] = MIU_lo[steps%199]
        else:
            if obs_[steps%199] > S_up[steps%199-100]:
                obs_[steps%199] = S_up[steps%199-100]
            elif obs_[steps%199] < S_lo[steps%199-100]:
                obs_[steps%199] = S_lo[steps%199-100] 
        reward = fOBJ(obs_,1,I,K,al,l,YGROSS,sigma,EIND,E,CCA,CCATOT,cumetree,MAT,MU,ML,FORC,TATM,TOCEAN,DAMFRAC,DAMAGES,ABATECOST,cost1,MCABATE,
         CPRICE,YNET,Y,C,CPC,PERIODU,CEMUTOTPER,RI,NT)
        done = bool(steps == 199)
        #if int(len(x)/2)==100:
            #print(x)
        if RE<reward:
            RE = reward
            OBS = obs_
        return obs_, reward, done
              
    def reset(self):
        return np.concatenate([MIU_start,S_start])
    
        


In [13]:
n_features = 200
dqn = DQN(learning_rate=0.001, gamma=0.9, n_features=n_features, 
          n_actions=3, epsilon=0.0, 
          parameter_changing_pointer=500, memory_size=50000)

MAX_EPISODES = 1000
total_steps = 0
env = OPTIM(n_features=n_features)
RE = 0
REX = np.full(200,0)

for episode in range(MAX_EPISODES):
    steps = 0
    obs = env.reset()
    
    while True:
        # greedy action
        action = dqn.epsilon_greedy(obs)

        obs_, R, done = env.step(action)
        
        #store in replay buffer
        dqn.store_experience(obs, action, R, obs_)
        
        if total_steps > 3000:
            dqn.fit()
            
        obs = obs_
        
        if done:
            break
        
        total_steps += 1
        steps += 1
        
    print("Episode {} with Reward : {} at epsilon {} in steps {}".format(episode + 1, R, dqn.epsilon, steps))
    


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Colocations handled automatically by placer.
Episode 1 with Reward : 4323.239269255351 at epsilon 0.0 in steps 199
Episode 2 with Reward : 4326.487446614721 at epsilon 0.0 in steps 199
Episode 3 with Reward : 4362.293345155747 at epsilon 0.0 in steps 199
Episode 4 with Reward : 4327.102443710146 at epsilon 0.0 in steps 199
Episode 5 with Reward : 4349.186528058499 at epsilon 0.0 in steps 199
Episode 6 with Reward : 4330.900488380905 at epsilon 0.0 in steps 199
Episode 7 with Reward : 4332.152128276304 at epsilon 0.0 in steps 199
Episode 8 with Reward : 4325.386555794967 at epsilon 0.0 in steps 199
Episode 9 with Reward : 4315.612275540287 at epsilon 0.0 in steps 199
Episode 10 with Reward : 4336.910852323532 at epsilon 0.0 in s

Episode 92 with Reward : 4210.813025832562 at epsilon 0.9500000000000007 in steps 199
Episode 93 with Reward : 4217.8519284604845 at epsilon 0.9500000000000007 in steps 199
Episode 94 with Reward : 4228.603972233692 at epsilon 0.9500000000000007 in steps 199
Episode 95 with Reward : 4210.531038042085 at epsilon 0.9500000000000007 in steps 199
Episode 96 with Reward : 4206.986519598113 at epsilon 0.9500000000000007 in steps 199
Episode 97 with Reward : 4221.379580379238 at epsilon 0.9500000000000007 in steps 199
Episode 98 with Reward : 4214.7906149417595 at epsilon 0.9500000000000007 in steps 199
Episode 99 with Reward : 4221.693547223711 at epsilon 0.9500000000000007 in steps 199
Episode 100 with Reward : 4239.343971471017 at epsilon 0.9500000000000007 in steps 199
Episode 101 with Reward : 4211.8500555755145 at epsilon 0.9500000000000007 in steps 199
Episode 102 with Reward : 4231.740100198835 at epsilon 0.9500000000000007 in steps 199
Episode 103 with Reward : 4210.221792661065 at e

Episode 187 with Reward : 4210.905824829648 at epsilon 0.9500000000000007 in steps 199
Episode 188 with Reward : 4215.026622344285 at epsilon 0.9500000000000007 in steps 199
Episode 189 with Reward : 4214.136153921087 at epsilon 0.9500000000000007 in steps 199
Episode 190 with Reward : 4212.516739195997 at epsilon 0.9500000000000007 in steps 199
Episode 191 with Reward : 4210.8701237009245 at epsilon 0.9500000000000007 in steps 199
Episode 192 with Reward : 4211.193226919746 at epsilon 0.9500000000000007 in steps 199
Episode 193 with Reward : 4216.253768393406 at epsilon 0.9500000000000007 in steps 199
Episode 194 with Reward : 4211.128958542331 at epsilon 0.9500000000000007 in steps 199
Episode 195 with Reward : 4211.960702926182 at epsilon 0.9500000000000007 in steps 199
Episode 196 with Reward : 4211.796566762587 at epsilon 0.9500000000000007 in steps 199
Episode 197 with Reward : 4210.9092511646395 at epsilon 0.9500000000000007 in steps 199
Episode 198 with Reward : 4230.3229090732

Episode 282 with Reward : 4344.339518938103 at epsilon 0.9500000000000007 in steps 199
Episode 283 with Reward : 4322.581145249949 at epsilon 0.9500000000000007 in steps 199
Episode 284 with Reward : 4311.910923632982 at epsilon 0.9500000000000007 in steps 199
Episode 285 with Reward : 4290.40239969643 at epsilon 0.9500000000000007 in steps 199
Episode 286 with Reward : 4361.1796493999955 at epsilon 0.9500000000000007 in steps 199
Episode 287 with Reward : 4331.686620772409 at epsilon 0.9500000000000007 in steps 199
Episode 288 with Reward : 4271.118306631084 at epsilon 0.9500000000000007 in steps 199
Episode 289 with Reward : 4323.509978088927 at epsilon 0.9500000000000007 in steps 199
Episode 290 with Reward : 4278.005112047707 at epsilon 0.9500000000000007 in steps 199
Episode 291 with Reward : 4238.15670731317 at epsilon 0.9500000000000007 in steps 199
Episode 292 with Reward : 4302.718601817231 at epsilon 0.9500000000000007 in steps 199
Episode 293 with Reward : 4281.0656222731195

Episode 377 with Reward : 4213.56644134212 at epsilon 0.9500000000000007 in steps 199
Episode 378 with Reward : 4279.819945199732 at epsilon 0.9500000000000007 in steps 199
Episode 379 with Reward : 4294.981557193156 at epsilon 0.9500000000000007 in steps 199
Episode 380 with Reward : 4246.384414687958 at epsilon 0.9500000000000007 in steps 199
Episode 381 with Reward : 4232.004626954184 at epsilon 0.9500000000000007 in steps 199
Episode 382 with Reward : 4311.5771198719685 at epsilon 0.9500000000000007 in steps 199
Episode 383 with Reward : 4230.729848639452 at epsilon 0.9500000000000007 in steps 199
Episode 384 with Reward : 4297.610990968351 at epsilon 0.9500000000000007 in steps 199
Episode 385 with Reward : 4273.8620510995315 at epsilon 0.9500000000000007 in steps 199
Episode 386 with Reward : 4307.764557441575 at epsilon 0.9500000000000007 in steps 199
Episode 387 with Reward : 4245.082179094654 at epsilon 0.9500000000000007 in steps 199
Episode 388 with Reward : 4262.95401177755

Episode 472 with Reward : 4294.5204774289505 at epsilon 0.9500000000000007 in steps 199
Episode 473 with Reward : 4287.377379283691 at epsilon 0.9500000000000007 in steps 199
Episode 474 with Reward : 4227.199925771967 at epsilon 0.9500000000000007 in steps 199
Episode 475 with Reward : 4268.247295291914 at epsilon 0.9500000000000007 in steps 199
Episode 476 with Reward : 4275.408604526232 at epsilon 0.9500000000000007 in steps 199
Episode 477 with Reward : 4252.751771275194 at epsilon 0.9500000000000007 in steps 199
Episode 478 with Reward : 4284.8298630593945 at epsilon 0.9500000000000007 in steps 199
Episode 479 with Reward : 4309.328340675813 at epsilon 0.9500000000000007 in steps 199
Episode 480 with Reward : 4296.019987954351 at epsilon 0.9500000000000007 in steps 199
Episode 481 with Reward : 4319.03588969665 at epsilon 0.9500000000000007 in steps 199
Episode 482 with Reward : 4275.075385006021 at epsilon 0.9500000000000007 in steps 199
Episode 483 with Reward : 4269.5146626265 

Episode 567 with Reward : 4287.10127030166 at epsilon 0.9500000000000007 in steps 199
Episode 568 with Reward : 4289.341920091418 at epsilon 0.9500000000000007 in steps 199
Episode 569 with Reward : 4291.563532435455 at epsilon 0.9500000000000007 in steps 199
Episode 570 with Reward : 4293.356906785988 at epsilon 0.9500000000000007 in steps 199
Episode 571 with Reward : 4283.4429198822545 at epsilon 0.9500000000000007 in steps 199
Episode 572 with Reward : 4291.357635830333 at epsilon 0.9500000000000007 in steps 199
Episode 573 with Reward : 4285.034511656602 at epsilon 0.9500000000000007 in steps 199
Episode 574 with Reward : 4333.420313300823 at epsilon 0.9500000000000007 in steps 199
Episode 575 with Reward : 4301.9408248138825 at epsilon 0.9500000000000007 in steps 199
Episode 576 with Reward : 4271.1203495930895 at epsilon 0.9500000000000007 in steps 199
Episode 577 with Reward : 4327.039222149055 at epsilon 0.9500000000000007 in steps 199
Episode 578 with Reward : 4293.2415648584

Episode 662 with Reward : 4269.159481311228 at epsilon 0.9500000000000007 in steps 199
Episode 663 with Reward : 4286.929570011031 at epsilon 0.9500000000000007 in steps 199
Episode 664 with Reward : 4288.138977781287 at epsilon 0.9500000000000007 in steps 199
Episode 665 with Reward : 4290.763935945797 at epsilon 0.9500000000000007 in steps 199
Episode 666 with Reward : 4265.9264330253645 at epsilon 0.9500000000000007 in steps 199
Episode 667 with Reward : 4332.988537993724 at epsilon 0.9500000000000007 in steps 199
Episode 668 with Reward : 4267.941877195528 at epsilon 0.9500000000000007 in steps 199
Episode 669 with Reward : 4251.390584157925 at epsilon 0.9500000000000007 in steps 199
Episode 670 with Reward : 4316.269732213474 at epsilon 0.9500000000000007 in steps 199
Episode 671 with Reward : 4344.692412643468 at epsilon 0.9500000000000007 in steps 199
Episode 672 with Reward : 4286.339513523981 at epsilon 0.9500000000000007 in steps 199
Episode 673 with Reward : 4292.37343416919

Episode 757 with Reward : 4310.6973830152765 at epsilon 0.9500000000000007 in steps 199
Episode 758 with Reward : 4334.529586453684 at epsilon 0.9500000000000007 in steps 199
Episode 759 with Reward : 4334.685075588741 at epsilon 0.9500000000000007 in steps 199
Episode 760 with Reward : 4286.838894608767 at epsilon 0.9500000000000007 in steps 199
Episode 761 with Reward : 4304.238996618737 at epsilon 0.9500000000000007 in steps 199
Episode 762 with Reward : 4343.624570595841 at epsilon 0.9500000000000007 in steps 199
Episode 763 with Reward : 4308.70171167013 at epsilon 0.9500000000000007 in steps 199
Episode 764 with Reward : 4301.147501342784 at epsilon 0.9500000000000007 in steps 199
Episode 765 with Reward : 4300.242032534774 at epsilon 0.9500000000000007 in steps 199
Episode 766 with Reward : 4320.416091222753 at epsilon 0.9500000000000007 in steps 199
Episode 767 with Reward : 4290.092176705981 at epsilon 0.9500000000000007 in steps 199
Episode 768 with Reward : 4285.190328639055

Episode 852 with Reward : 4325.517748864211 at epsilon 0.9500000000000007 in steps 199
Episode 853 with Reward : 4330.643547476257 at epsilon 0.9500000000000007 in steps 199
Episode 854 with Reward : 4249.660192281859 at epsilon 0.9500000000000007 in steps 199
Episode 855 with Reward : 4292.3248212206945 at epsilon 0.9500000000000007 in steps 199
Episode 856 with Reward : 4265.616136523078 at epsilon 0.9500000000000007 in steps 199
Episode 857 with Reward : 4329.116669630866 at epsilon 0.9500000000000007 in steps 199
Episode 858 with Reward : 4336.937297073344 at epsilon 0.9500000000000007 in steps 199
Episode 859 with Reward : 4282.294174691338 at epsilon 0.9500000000000007 in steps 199
Episode 860 with Reward : 4273.633775952343 at epsilon 0.9500000000000007 in steps 199
Episode 861 with Reward : 4299.997968686401 at epsilon 0.9500000000000007 in steps 199
Episode 862 with Reward : 4305.207814845811 at epsilon 0.9500000000000007 in steps 199
Episode 863 with Reward : 4292.44004510132

Episode 947 with Reward : 4321.376398228913 at epsilon 0.9500000000000007 in steps 199
Episode 948 with Reward : 4284.633895916839 at epsilon 0.9500000000000007 in steps 199
Episode 949 with Reward : 4297.736819942995 at epsilon 0.9500000000000007 in steps 199
Episode 950 with Reward : 4347.053734370773 at epsilon 0.9500000000000007 in steps 199
Episode 951 with Reward : 4306.243987782989 at epsilon 0.9500000000000007 in steps 199
Episode 952 with Reward : 4335.030523518091 at epsilon 0.9500000000000007 in steps 199
Episode 953 with Reward : 4299.548394226129 at epsilon 0.9500000000000007 in steps 199
Episode 954 with Reward : 4312.617911124553 at epsilon 0.9500000000000007 in steps 199
Episode 955 with Reward : 4327.253645256176 at epsilon 0.9500000000000007 in steps 199
Episode 956 with Reward : 4316.497272249739 at epsilon 0.9500000000000007 in steps 199
Episode 957 with Reward : 4305.038983239076 at epsilon 0.9500000000000007 in steps 199
Episode 958 with Reward : 4269.874305985913

In [14]:
obs_

array([0.0299997 , 0.99      , 0.99      , 1.        , 1.        ,
       1.        , 1.        , 1.        , 0.939     , 0.939     ,
       0.939     , 0.99      , 0.939     , 0.939     , 0.939     ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       0.939     , 0.99      , 0.99      , 0.939     , 0.939     ,
       0.939     , 0.939     , 1.        , 1.        , 1.2       ,
       1.2       , 1.137     , 1.188     , 1.188     , 1.188     ,
       1.188     , 1.188     , 1.137     , 1.2       , 1.2       ,
       1.2       , 1.2       , 1.137     , 1.137     , 1.137     ,
       1.137     , 1.137     , 1.137     , 1.188     , 1.188     ,
       1.2       , 1.2       , 1.2       , 1.2       , 1.2       ,
       1.137     , 1.137     , 1.137     , 1.137     , 1.137     ,
       1.137     , 1.137     , 1.137     , 1.188     , 1.188     ,
       1.188     , 1.2       , 1.2       , 1.137     , 1.137     ,
       1.137     , 1.137     , 1.137     , 1.188     , 1.188  