In [1]:
!pip install tensorflow==2.15.0


Collecting tensorflow==2.15.0
  Downloading tensorflow-2.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting ml-dtypes~=0.2.0 (from tensorflow==2.15.0)
  Downloading ml_dtypes-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting wrapt<1.15,>=1.11.0 (from tensorflow==2.15.0)
  Downloading wrapt-1.14.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting tensorboard<2.16,>=2.15 (from tensorflow==2.15.0)
  Downloading tensorboard-2.15.2-py3-none-any.whl.metadata (1.7 kB)
Collecting tensorflow-estimator<2.16,>=2.15.0 (from tensorflow==2.15.0)
  Downloading tensorflow_estimator-2.15.0-py2.py3-none-any.whl.metadata (1.3 kB)
Collecting keras<2.16,>=2.15.0 (from tensorflow==2.15.0)
  Downloading keras-2.15.0-py3-none-any.whl.metadata (2.4 kB)
Downloading tensorflow-2.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (475.2 MB)


**Sepsis Environment**

In [8]:
import gym
from gym import spaces
import numpy as np
import pandas as pd
import tensorflow as tf

# Define the columns for state features and action features
state_cols = ['SOFA']  # Example state columns
action_cols = ['MaxVaso', 'Input4H']  # Medication columns

def predict_medication_effects(model, state, iv_fluid_dosage, vp_dosage, history):
    current_state = state[state_cols].values.reshape(1, -1)  # Shape: (1, 1) if 'SOFA' is the only state feature
    action = np.array([vp_dosage, iv_fluid_dosage]).reshape(1, -1)  # Shape: (1, 2)
    print (action)

    # Concatenate current state and action into a single input array
    model_input = np.concatenate([current_state, action], axis=1)  # Shape: (1, 3)

    # Concatenate historical cases with current input
    model_input = np.concatenate([model_input, history.reshape(1, -1)], axis=1)  # Shape: (1, 12)

    # Predict the next state
    state_change = model.predict(model_input)

    # Update the state with the predicted changes
    next_state = state.copy()
    next_state[state_cols] += state_change[0]

    return next_state, state_change[0][0]

class SepsisEnv(gym.Env):
    def __init__(self, dataset, model_path,action_history_input):
        super(SepsisEnv, self).__init__()
        self.dataset = dataset
        self.model = tf.keras.models.load_model(model_path)  # Load the trained model
        self.current_index = 0
        self.history_size = 3  # Size of history to maintain
        self.action_history = np.array(action_history_input)  # Initialize with example history
        self.action_space = spaces.Discrete(25)  # 5 x 5 = 25 possible actions

        # Calculate observation space size
        self.observation_size = len(state_cols) + len(action_cols) + len(state_cols) * self.history_size
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.observation_size,), dtype=np.float32)

    def reset(self):
        self.current_index = np.random.randint(0, len(self.dataset))
        self.action_history = np.array(action_history_input)  # Reset to example history
        return self._get_observation()

    def _get_observation(self):
        state = self.dataset.iloc[self.current_index][state_cols].values
        current_action = self.dataset.iloc[self.current_index][action_cols].values
        recent_history = self.action_history.flatten()  # Use action history

        return np.concatenate((state, current_action, recent_history))

    def step(self, action,time):
        state = self.dataset.iloc[self.current_index].copy()
        next_state = state.copy()

        dosage_list=self.decode_dosage(action)
        iv_fluid_dosage = dosage_list[1]
        vp_dosage = dosage_list[0]

        next_state, sofa_change = predict_medication_effects(self.model, next_state, iv_fluid_dosage, vp_dosage, self.action_history)

        current_sofa = self.action_history[2][0]
        next_sofa = sofa_change

        if next_sofa >=25 or next_sofa <6 :
            done = True
        else:
            done = False

        reward = self.calculate_reward(current_sofa, next_sofa, done,time)

        # Update action history with current action (vp_dosage, iv_fluid_dosage)
        self.action_history = np.roll(self.action_history, -1, axis=0)  # Remove the oldest entry
        self.action_history[-1] = [sofa_change, vp_dosage, iv_fluid_dosage]  # Add new action at the end
        print("Action history:", self.action_history)

        print("Reward:",reward)
        print("Next SOFA:", next_sofa)
        print("Current SOFA:", current_sofa)



        self.current_index += 1

        print(done)
        # Prepare the observation to return
        observation = self._get_observation()

        # Return observation, reward, done status, and info dictionary
        info = {'predicted_sofa_state': next_state, 'action_applied': action}
        return observation, reward, done, info

    def calculate_reward(self, current_sofa, next_sofa, done, time):
        # Immediate reward based on SOFA score change
        if next_sofa < current_sofa:
            sofa_reward = (current_sofa - next_sofa) * 2
        elif next_sofa > current_sofa:
            sofa_reward = (next_sofa - current_sofa) * -2
        else:
            sofa_reward = 1  # Small reward for maintaining the SOFA score

        # Terminal reward based on episode end and SOFA score
        if done:
            if next_sofa <= 5:
                terminal_reward = 20  # High reward for achieving a low SOFA score at the end
                # Bonus reward for achieving the goal in fewer steps
                step_reward = (40 - time) / 40 * 10  # Adjust the multiplier as needed
            else:
                terminal_reward = -10  # Penalty for high SOFA score at the end
                step_reward = 0  # No step reward if the goal is not achieved
        else:
            terminal_reward = 0
            step_reward = 0  # No step reward if the episode is not done

        # Delayed reward: Smaller rewards for staying alive and progressing through time steps
        # survival_reward = (40 - time) / 40

        return sofa_reward + terminal_reward + step_reward


    def decode_dosage(self, dosage):
        vp_dosages = [0, 0.001, 0.01, 0.1, 1]
        iv_fluid_dosages = [0, 20, 60, 100, 200]

        flattened_array = [
                                    [0, 0], [0, 20], [0, 60], [0, 100], [0, 200],
                                    [0.001, 0], [0.001, 20], [0.001, 60], [0.001, 100], [0.001, 200],
                                    [0.01, 0], [0.01, 20], [0.01, 60], [0.01, 100], [0.01, 200],
                                    [0.1, 0], [0.1, 20], [0.1, 60], [0.1, 100], [0.1, 200],
                                    [1, 0], [1, 20], [1, 60], [1, 100], [1, 200]
                                ]


            # Function to access data by a single index (0-24)

        return flattened_array[dosage]



In [9]:
import tensorflow as tf

# Check TensorFlow version
print("TensorFlow version:", tf.__version__)



TensorFlow version: 2.15.0


**DDQN Agent**

In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
import random

# Load dataset
dataset_path = '/content/drive/MyDrive/RL project/Dataset.csv'
dataset = pd.read_csv(dataset_path)
action_history_input = []

# Number of entries you want to input
num_entries = 3

# Loop to input values
for i in range(num_entries):
    sofa_score = int(input(f"Enter SOFA score for entry {i + 1}: "))
    max_vaso = int(input(f"Enter Max Vaso dosage for entry {i + 1}: "))
    iv_fluid = int(input(f"Enter IV fluid dosage for entry {i + 1}: "))
    action_history_input.append([sofa_score, max_vaso, iv_fluid])
# Select 10% of the dataset

# Initialize environment
env = SepsisEnv(dataset, '/content/drive/MyDrive/RL project/predict_state_model.keras',action_history_input)
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
class DDQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = 0.001
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = self._build_model()



    def _build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))
        return model


    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        print (action)

        return np.argmax(act_values[0])  # returns action





# Initialize DDQN agent
agent = DDQNAgent(state_size, action_size)

# Load trained model weights
agent.model.load_weights("/content/drive/MyDrive/RL project/ddqn_sepsis_170.h5")

# Run inference
state = env.reset()
state = np.reshape(state, [2, state_size])  # Ensure the state has the correct shape
total_reward = 0
done = False
time_steps = 40 # Define the number of time steps for each episode
time=0
while not done:
    action = agent.act(state)
    next_state, reward, done, info = env.step(action,time=time)
    time += 1
    next_state = np.reshape(next_state, [2, state_size])  # Ensure the next_state has the correct shape
    state = next_state
    total_reward += reward
    print(f"Action: {action}, Reward: {reward}, Total Reward: {total_reward}")

print(f"Total Reward after episode: {total_reward}")


Enter SOFA score for entry 1: 10
Enter Max Vaso dosage for entry 1: 0
Enter IV fluid dosage for entry 1: 0
Enter SOFA score for entry 2: 10
Enter Max Vaso dosage for entry 2: 0
Enter IV fluid dosage for entry 2: 0
Enter SOFA score for entry 3: 19
Enter Max Vaso dosage for entry 3: 0
Enter IV fluid dosage for entry 3: 0
[[ 0 60]]
Action history: [[10  0  0]
 [19  0  0]
 [11  0 60]]
Reward: 15.274435043334961
Next SOFA: 11.3627825
Current SOFA: 19
False
Action: 2, Reward: 15.274435043334961, Total Reward: 15.274435043334961
[[ 0.1 20. ]]
Action history: [[19  0  0]
 [11  0 60]
 [10  0 20]]
Reward: 1.4135150909423828
Next SOFA: 10.293242
Current SOFA: 11
False
Action: 16, Reward: 1.4135150909423828, Total Reward: 16.687950134277344
[[1.e-02 2.e+01]]
Action history: [[11  0 60]
 [10  0 20]
 [14  0 20]]
Reward: -8.770832061767578
Next SOFA: 14.385416
Current SOFA: 10
False
Action: 11, Reward: -8.770832061767578, Total Reward: 7.917118072509766
[[0.01 0.  ]]
Action history: [[10  0 20]
 [14 

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
