In [148]:
# !pip install tensorflow==2.3.0
# !pip install keras
# !pip install keras-rl2

In [149]:
from gym import Env
from gym.spaces import Discrete, Box, Dict
import pandas as pd
import numpy as np
import random 
import time

In [150]:
# Create a virtual environment actions
def reset():
    global P, M, It, s
    dummy_array = np.zeros(shape=(P,8))
    df = pd.DataFrame(dummy_array,columns=['x','y','Day','Susceptible','Exposed','Infectious','Recovered','GG'])
    df = df.astype({'x':int,'y':int,'Day':int,'Susceptible':bool,'Exposed':int,'Infectious':int,'Recovered':bool,'GG':bool})
    df['Susceptible'] = True
    #Appending infectious population in 
    dfupdate=df.sample(M)
    dfupdate['Infectious'] = np.random.randint(1,It, size=len(dfupdate))
    dfupdate['Susceptible'] = False
    df.update(dfupdate)
    update_list = dfupdate.index.tolist() 
    #Dispersing people randomly among grid
    df['x'] = np.random.randint(0,s, size=len(df))
    df['y'] = np.random.randint(0,s, size=len(df))

    return df

def update_pos(p, df):
    global S
    df.loc[p,'x'] = max(min(df.loc[p,'x']+random.choice(range(-1,2)),S),0) #make valid movements in the grid
    df.loc[p,'y'] = max(min(df.loc[p,'y']+random.choice(range(-1,2)),S),0) 
    
def coor_around(p, df):
    return [(df.loc[p, 'x'] + a, df.loc[p, 'y'] + b) for a in range(-1,2) for b in range(-1, 2)]

def one_day(df, action = 0):

    # start_time = time.time()
    global P, M, It, S, death_rate, expose_rate
    policy_match = {0: 1, 1:0.75, 2:0.25} # assign action to policy
    moves_under_policy = int(round(Mt * policy_match[action], 0))
    for mt in range(moves_under_policy):
        for p in range(len(df)):
            if df.loc[p,'GG'] == False: #If the person is not dead
                update_pos(p, df)

                if (df.loc[p,'Infectious'] > 0) and (df.loc[p,'Recovered'] == False): #If a person is in infectious state
                    
                    if df.loc[p,'Infectious'] - random.choice(range(0,7)) >= It: #If the infectious days are completed
                        if random.choice(range(0,death_rate)) > (death_rate-2): #If the person dies(with probability distribution 1:4)                           
                            df.loc[p,'Infectious'] = 0
                            df.loc[p,'GG'] = True #Kill the person
                        else: #If the person survives
                            df.loc[p,'Infectious'] = 0
                            df.loc[p,'Recovered'] = True #Recover the person
                    elif mt+1 == Mt:
                        df.loc[p,'Infectious'] = df.loc[p,'Infectious'] + 1 #Increase the infectious day counter
                        
                elif (df.loc[p,'Exposed'] > 0) and (df.loc[p,'Infectious'] == 0): #If a person is in exposed state 
                    if (df.loc[p,'Exposed'] - random.choice(range(0,2))) >= Et: #If the person has reached the exposed day limit?  7
                        df.loc[p,'Exposed'] = 0
                        df.loc[p,'Infectious'] = 1 #Increase the infectious day counter, now the person is infectious
                    elif mt+1 == Mt:
                        df.loc[p,'Exposed'] = df.loc[p,'Exposed'] + 1 #Increase the exposed day counter
                        
                elif df.loc[p,'Susceptible'] == True: #If the person is in susceptible state
                    infected_set = list(zip(df[df.Infectious > 0].x, df[df.Infectious > 0].y))
                    if len(set(infected_set) & set(coor_around(p, df))) > 0:
                        if random.choice(range(0,expose_rate)) > (expose_rate-2):
                            df.loc[p,'Exposed'] = 1
                            df.loc[p,'Susceptible'] = False
    # print("--- %s seconds ---" % (time.time() - start_time))
    
    return df # time.time() - start_time #


def economy_gain(df):
    economy_gain = len(df[(df.GG == False) & (df.Infectious == 0)]) * round(random.uniform(0.8,1), 2)
    return economy_gain

def current_state(df):
    inf = len(df.loc[df['Infectious'] > 0])
    exposed = len(df.loc[df['Exposed'] > 0]) 
    recovered = len(df.loc[df['Recovered'] == True])
    sus = len(df.loc[df['Susceptible'] == True])
    gg = df.loc[df['GG'] == True].GG.count()
    
    return np.array([recovered,sus, exposed, inf, gg])



In [151]:
#Inputs
s = 50 #size of the grid
N = 1000 #size of population
M = round(N * 0.07) #Number of infectious population
Et = 2 #Number of days staying exposed
It = 21 #Number of days staying infectious
Mt = 5 #Number of daily movements
D = 200 #Number of days
death_rate = 100
expose_rate = 5

#Initialization
S = N - M #Susceptible population
E = 0 #Exposed population
I = M #Number of infectious population 
R = 0 #Recovered population
P = S + E + I + R #Total population
economy = 0 #Daily economic transaction

In [152]:
def_observation_space = Box(low = np.array([0,0,0,0,0]), high = np.array([P,P,P,P,P], dtype = int))

# Create the virtual environment for RL
class CoronaPolicy(Env):
    def __init__(self):
        self.action_space = Discrete(3)
        
        self.observation_space = def_observation_space # Box(low = 0, high = P, shape = (5,1), dtype = int )
        # Dict(recovered=Discrete(P+1), sus=Discrete(P+1), exposed=Discrete(P+1),inf=Discrete(P+1),gg=Discrete(P+1))
        
        self.state = np.array([R, S, E, I, 0])
        
        self.day = 0
        
        self.df = reset()
        
    def step(self, action):
        
        self.df = one_day(self.df, action)
        
        self.state = current_state(self.df)
        
        self.day = self.day + 1
        
        reward = economy_gain(self.df)
        
        if self.day <= D:
            done = False
        else:
            done = True
            
        info = {}
        
        return self.state, reward, done, info
    
    def render(self):
        pass
    
    def reset(self):
        self.observation_space = def_observation_space
        
        self.state = np.array([R, S, E, I, 0])
        
        self.day = 0
        
        self.df = reset()
        
        
        return self.state
        



In [153]:
env = CoronaPolicy()

In [154]:
# episodes = 10
# for episode in range(1, episodes+1):
#     state = env.reset()
#     done = False
#     economy = 0
    
#     while not done:
#         action = env.action_space.sample()
#         n_state, reward, done, info = env.step(action)
#         economy += reward
        
#     print(f'Episode: {episode} Score: {economy}')

# Deep Learning Model with Keras

In [155]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [156]:
def build_model(states, actions):
    model = Sequential()
    model.add(Dense(10, activation = 'relu',input_shape = states))
    model.add(Dense(10, activation = 'relu'))
    model.add(Dense(10, activation = 'relu'))
    model.add(Dense(actions, activation = 'linear'))
    return model

In [157]:
del model

In [158]:
states = env.observation_space.shape
actions = env.action_space.n
model = build_model(states, actions)
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_16 (Dense)             (None, 10)                60        
_________________________________________________________________
dense_17 (Dense)             (None, 10)                110       
_________________________________________________________________
dense_18 (Dense)             (None, 10)                110       
_________________________________________________________________
dense_19 (Dense)             (None, 3)                 33        
Total params: 313
Trainable params: 313
Non-trainable params: 0
_________________________________________________________________


# Build Agent with Keras-RL

In [159]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from tensorflow import constant

In [160]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit = 1000, window_length = 1)
    dqn = DQNAgent(model = model, memory = memory, policy = policy, 
                   nb_actions = actions, nb_steps_warmup = 10, target_model_update = 1e-2)
                  
    return dqn

In [165]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr = 1e-3), metrics = ['mae'])
dqn.fit(env, nb_steps = 2000, visualize = False, verbose = 1  )

Training for 2000 steps ...
Interval 1 (0 steps performed)
   11/10000 [..............................] - ETA: 9:02:43 - reward: 824.3182



 2000/10000 [=====>........................] - ETA: 6:44:04 - reward: 687.6062done, took 6060.772 seconds


<tensorflow.python.keras.callbacks.History at 0x118faf0d0>

In [166]:
# a = np.array([0,100,0,1,0])
# # a = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9]) 
# # print(a)
# a=tf.reshape(a, [1, 5]) 
# # print(c)
# model.predict(a, steps = 1)

In [169]:
scores = dqn.test(env, nb_episodes = 1, visualize = False)
print(np.mean(scores.history['episode_reward']))

Testing for 1 episodes ...


KeyboardInterrupt: 

In [168]:
model.save("model_ann_4")

INFO:tensorflow:Assets written to: model_ann_4/assets


# The implementation of the agent

In [None]:
df = reset()
economy = 0

for day in range(0, D+1):
    state = current_state(df)
    state =tf.reshape(state, [1, 5])
    prediction = model.predict(state, steps = 1)
    action_by_agent = np.argmax(prediction)
    df = one_day(df, action = action_by_agent)
    gain = economy_gain(df)
    economy += gain
    print(f"Day {day}: take action {action_by_agent}, total_reward: {economy}. {prediction}")


Day 0: take action 2, total_reward: 855.6. [[ 8665.851  -1057.5657  8819.136 ]]
Day 1: take action 2, total_reward: 1646.1. [[ 8517.313  -1049.3373  8661.883 ]]
Day 2: take action 2, total_reward: 2511.0. [[ 8347.556  -1039.9342  8482.166 ]]
Day 3: take action 2, total_reward: 3348.0. [[ 8186.288  -1031.0011  8311.4375]]
Day 4: take action 2, total_reward: 4092.0. [[ 8067.4575 -1024.4187  8185.6343]]
Day 5: take action 2, total_reward: 4919.7. [[ 7961.3604  -1018.54205  8073.313  ]]
Day 6: take action 2, total_reward: 5803.2. [[ 7846.7734  -1012.19446  7952.0024 ]]
Day 7: take action 2, total_reward: 6723.9. [[ 7719.4556 -1005.1419  7817.2153]]
Day 8: take action 2, total_reward: 7551.599999999999. [[7600.627   -998.55994 7691.4146 ]]
Day 9: take action 2, total_reward: 8407.199999999999. [[7507.2324 -993.8667 7592.2334]]
Day 10: take action 2, total_reward: 9318.599999999999. [[7466.2285 -994.2193 7547.144 ]]
Day 11: take action 2, total_reward: 10136.999999999998. [[7433.109  -994.50

Day 91: take action 2, total_reward: 77980.49999999997. [[ 8115.302  -1049.5249  8123.7163]]
Day 92: take action 2, total_reward: 78789.59999999998. [[ 8125.13   -1050.0549  8133.14  ]]
Day 93: take action 2, total_reward: 79719.59999999998. [[ 8128.4067 -1050.2318  8136.2812]]
Day 94: take action 2, total_reward: 80537.99999999997. [[ 8131.6836 -1050.4083  8139.423 ]]
Day 95: take action 2, total_reward: 81281.99999999997. [[ 8134.959  -1050.5853  8142.564 ]]
Day 96: take action 2, total_reward: 82193.39999999997. [[ 8134.959  -1050.5853  8142.564 ]]
Day 97: take action 2, total_reward: 83011.79999999996. [[ 8134.959  -1050.5853  8142.564 ]]
Day 98: take action 2, total_reward: 83792.99999999996. [[ 8148.0654 -1051.292   8155.129 ]]
Day 99: take action 2, total_reward: 84546.29999999996. [[ 8148.0654 -1051.292   8155.129 ]]
Day 100: take action 2, total_reward: 85346.09999999996. [[ 8157.8955 -1051.8226  8164.553 ]]
Day 101: take action 2, total_reward: 86117.99999999996. [[ 8157.8955

In [182]:
(max(max(prediction)))

AttributeError: 'numpy.ndarray' object has no attribute 'index'

2

In [184]:
prediction[0]

array([ 8665.851 , -1057.5657,  8819.136 ], dtype=float32)