In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import random 

In [None]:
# Load the model
model = keras.models.load_model("model_ann_3layer")

In [None]:
#Inputs
s = 50 #size of the grid
N = 1000 #size of population
M = round(N * 0.07) #Number of infectious population
Et = 2 #Number of days staying exposed
It = 21 #Number of days staying infectious
Mt = 5 #Number of daily movements
D = 200 #Number of days
death_rate = 100
expose_rate = 5

#Initialization
S = N - M #Susceptible population
E = 0 #Exposed population
I = M #Number of infectious population 
R = 0 #Recovered population
P = S + E + I + R #Total population
economy = 0 #Daily economic transaction

In [None]:
# Create a virtual environment actions
def reset():
    global P, M, It, s
    dummy_array = np.zeros(shape=(P,8))
    df = pd.DataFrame(dummy_array,columns=['x','y','Day','Susceptible','Exposed','Infectious','Recovered','GG'])
    df = df.astype({'x':int,'y':int,'Day':int,'Susceptible':bool,'Exposed':int,'Infectious':int,'Recovered':bool,'GG':bool})
    df['Susceptible'] = True
    #Appending infectious population in 
    dfupdate=df.sample(M)
    dfupdate['Infectious'] = np.random.randint(1,It, size=len(dfupdate))
    dfupdate['Susceptible'] = False
    df.update(dfupdate)
    update_list = dfupdate.index.tolist() 
    #Dispersing people randomly among grid
    df['x'] = np.random.randint(0,s, size=len(df))
    df['y'] = np.random.randint(0,s, size=len(df))

    return df

def update_pos(p, df):
    global S
    df.loc[p,'x'] = max(min(df.loc[p,'x']+random.choice(range(-1,2)),S),0) #make valid movements in the grid
    df.loc[p,'y'] = max(min(df.loc[p,'y']+random.choice(range(-1,2)),S),0) 
    
def coor_around(p, df):
    return [(df.loc[p, 'x'] + a, df.loc[p, 'y'] + b) for a in range(-1,2) for b in range(-1, 2)]

def one_day(df, action = 0):

    # start_time = time.time()
    global P, M, It, S, death_rate, expose_rate
    policy_match = {0: 1, 1:0.75, 2:0.25} # assign action to policy
    moves_under_policy = int(round(Mt * policy_match[action], 0))
    for mt in range(moves_under_policy):
        for p in range(len(df)):
            if df.loc[p,'GG'] == False: #If the person is not dead
                update_pos(p, df)

                if (df.loc[p,'Infectious'] > 0) and (df.loc[p,'Recovered'] == False): #If a person is in infectious state
                    
                    if df.loc[p,'Infectious'] - random.choice(range(0,7)) >= It: #If the infectious days are completed
                        if random.choice(range(0,death_rate)) > (death_rate-2): #If the person dies(with probability distribution 1:4)                           
                            df.loc[p,'Infectious'] = 0
                            df.loc[p,'GG'] = True #Kill the person
                        else: #If the person survives
                            df.loc[p,'Infectious'] = 0
                            df.loc[p,'Recovered'] = True #Recover the person
                    elif mt+1 == Mt:
                        df.loc[p,'Infectious'] = df.loc[p,'Infectious'] + 1 #Increase the infectious day counter
                        
                elif (df.loc[p,'Exposed'] > 0) and (df.loc[p,'Infectious'] == 0): #If a person is in exposed state 
                    if (df.loc[p,'Exposed'] - random.choice(range(0,2))) >= Et: #If the person has reached the exposed day limit?  7
                        df.loc[p,'Exposed'] = 0
                        df.loc[p,'Infectious'] = 1 #Increase the infectious day counter, now the person is infectious
                    elif mt+1 == Mt:
                        df.loc[p,'Exposed'] = df.loc[p,'Exposed'] + 1 #Increase the exposed day counter
                        
                elif df.loc[p,'Susceptible'] == True: #If the person is in susceptible state
                    infected_set = list(zip(df[df.Infectious > 0].x, df[df.Infectious > 0].y))
                    if len(set(infected_set) & set(coor_around(p, df))) > 0:
                        if random.choice(range(0,expose_rate)) > (expose_rate-2):
                            df.loc[p,'Exposed'] = 1
                            df.loc[p,'Susceptible'] = False
    # print("--- %s seconds ---" % (time.time() - start_time))
    
    return df # time.time() - start_time #


def economy_gain(df):
    economy_gain = len(df[(df.GG == False) & (df.Infectious == 0)]) * round(random.uniform(0.8,1), 2)
    return economy_gain

def current_state(df):
    inf = len(df.loc[df['Infectious'] > 0])
    exposed = len(df.loc[df['Exposed'] > 0]) 
    recovered = len(df.loc[df['Recovered'] == True])
    sus = len(df.loc[df['Susceptible'] == True])
    gg = df.loc[df['GG'] == True].GG.count()
    
    return np.array([recovered,sus, exposed, inf, gg])



In [None]:
# Use the agent to make decisions
df = reset()
economy = 0

for day in range(0, D+1):
    state = current_state(df)
    state =tf.reshape(state, [1, 5])
    prediction = model.predict(state, steps = 1)
    action_by_agent = np.argmax(prediction)
    df = one_day(df, action = action_by_agent)
    gain = economy_gain(df)
    economy += gain
    print(f"Day {day}: take action {action_by_agent}, total_reward: {economy}. {prediction}")
