In [1]:
import pandas as pd
import numpy as np
import gym
from gym import spaces
from stable_baselines3 import PPO
from sklearn.preprocessing import OneHotEncoder

In [13]:
# Prepare symptoms data

diets = pd.read_csv('diets.csv')
medications = pd.read_csv('medications.csv')
precautions = pd.read_csv('precautions_df.csv')
symptom_severity = pd.read_csv('Symptom-severity.csv')
symptoms = pd.read_csv('symtoms_df.csv')
workouts = pd.read_csv('workout_df.csv')

In [14]:
# Prepare symptoms data
symptoms_long = symptoms.melt(id_vars=['Disease'], value_vars=[f'Symptom_{i}' for i in range(1, 5)], var_name='SymptomType', value_name='Symptom')
symptoms_severity = pd.merge(symptoms_long, symptom_severity, on='Symptom')

In [15]:
# One-hot encode symptoms
encoder = OneHotEncoder(sparse_output=False, drop='first')
encoded_symptoms = encoder.fit_transform(symptoms_severity[['Symptom']])
encoded_symptoms_df = pd.DataFrame(encoded_symptoms, columns=encoder.get_feature_names_out(['Symptom']))
symptoms_severity_encoded = pd.concat([symptoms_severity.drop(['Symptom'], axis=1), encoded_symptoms_df], axis=1)


In [16]:
# Define custom healthcare environment class
class HealthcareEnv(gym.Env):
    def __init__(self):
        super(HealthcareEnv, self).__init__()

        # Define action space: actions based on treatments (diets, medications, workouts, precautions)
        self.n_actions = len(diets) + len(medications) + len(workouts)
        self.action_space = spaces.Discrete(self.n_actions)

        # Define observation space (features from the encoded symptom severity data)
        self.observation_space = spaces.Box(low=0, high=1, shape=(len(symptoms_severity_encoded.columns),), dtype=np.float32)

        # Initialize state (random initial state based on symptoms severity)
        self.state = symptoms_severity_encoded.sample(1).iloc[0].values
        self.done = False

    def reset(self):
        # Reset the state to a new random sample of symptom severities
        self.state = symptoms_severity_encoded.sample(1).iloc[0].values
        self.done = False
        return np.array(self.state, dtype=np.float32)

    def step(self, action):
        # Simplified logic: reduce severity based on action (could be expanded)
        if action < len(diets):  # Diet action
            self.state = np.clip(self.state - np.random.rand(len(self.state)) * 0.1, 0, 1)
        elif action < len(diets) + len(medications):  # Medication action
            self.state = np.clip(self.state - np.random.rand(len(self.state)) * 0.2, 0, 1)
        else:  # Workout action
            self.state = np.clip(self.state - np.random.rand(len(self.state)) * 0.15, 0, 1)

        # Reward: lower symptom severity should give a higher reward
        reward = -np.sum(self.state)

        # End the episode if all symptoms are near zero (treatment success)
        self.done = np.all(self.state < 0.1)

        return np.array(self.state, dtype=np.float32), reward, self.done, {}

    def render(self, mode='human'):
        print(f"Current symptom severities: {self.state}")

In [17]:
# Initialize the healthcare environment
env = HealthcareEnv()

In [None]:
# Initialize the PPO model and start training
model = PPO('MlpPolicy', env, verbose=1)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




In [20]:
from sklearn.preprocessing import LabelEncoder
# Assuming your state has a categorical column like 'disease'
label_encoder = LabelEncoder()

In [21]:
# Fit and transform the disease column
symptoms_severity_encoded['Disease'] = label_encoder.fit_transform(symptoms_severity_encoded['Disease'])

In [23]:
# Import necessary libraries
import numpy as np
import pandas as pd
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv  # Corrected import
from sklearn.preprocessing import MultiLabelBinarizer
import gym

# Load your data
symptoms_df = pd.read_csv('symtoms_df.csv')

# Check for any leading/trailing spaces in column names
symptoms_df.columns = symptoms_df.columns.str.strip()

# Preprocess the symptoms: convert symptom columns to lists of symptoms
symptoms_df['symptoms'] = symptoms_df[['Symptom_1', 'Symptom_2', 'Symptom_3', 'Symptom_4']].apply(lambda x: [s.strip() for s in x.tolist() if isinstance(s, str)], axis=1)

# One-hot encode the symptoms using MultiLabelBinarizer
mlb = MultiLabelBinarizer()
symptom_encoded = mlb.fit_transform(symptoms_df['symptoms'])

# Create a new DataFrame with encoded symptoms
encoded_symptoms_df = pd.DataFrame(symptom_encoded, columns=mlb.classes_)

# Concatenate encoded symptoms with the disease column
final_data = pd.concat([symptoms_df[['Disease']], encoded_symptoms_df], axis=1)

# Print the final data to verify encoding
print(final_data)

# Define the custom environment
class HealthcareEnv(gym.Env):
    def __init__(self):
        super(HealthcareEnv, self).__init__()
        self.action_space = gym.spaces.Discrete(2)  # Example: Two possible actions
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(encoded_symptoms_df.shape[1],), dtype=np.float32)
        self.state = None
        self.done = False

    def reset(self):
        # Sample a random state (numeric representation)
        self.state = encoded_symptoms_df.sample(1).values.flatten()  # Flatten to 1D array
        self.done = False
        return np.array(self.state, dtype=np.float32)
        def step(self, action):
        # Your logic to determine the next state and reward goes here
        # reward = 1 if action == 1 else -1  # Example: reward based on the action
           self.done = True  # End the episode after one step
           return self.state, reward, self.done, {}

                                      Disease  abdominal_pain  acidity  \
0                            Fungal infection               0        0   
1                            Fungal infection               0        0   
2                            Fungal infection               0        0   
3                            Fungal infection               0        0   
4                            Fungal infection               0        0   
...                                       ...             ...      ...   
4915  (vertigo) Paroymsal  Positional Vertigo               0        0   
4916                                     Acne               0        0   
4917                  Urinary tract infection               0        0   
4918                                Psoriasis               0        0   
4919                                 Impetigo               0        0   

      altered_sensorium  anxiety  back_pain  blackheads  bladder_discomfort  \
0                     0        0

In [24]:
# Create the environment
env = DummyVecEnv([lambda: HealthcareEnv()])



In [25]:
# Initialize the PPO model
model = PPO('MlpPolicy', env, verbose=1)

Using cpu device


In [27]:
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

# Create a valid environment
env = make_vec_env('CartPole-v1', n_envs=1)
model = PPO('MlpPolicy', env, verbose=1)

# Train the model
model.learn(total_timesteps=10000)


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 22.4     |
|    ep_rew_mean     | 22.4     |
| time/              |          |
|    fps             | 6976     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 26.4        |
|    ep_rew_mean          | 26.4        |
| time/                   |             |
|    fps                  | 4194        |
|    iterations           | 2           |
|    time_elapsed         | 0           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.008967783 |
|    clip_fraction        | 0.108       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.686      |
|    explained_variance   | -0.00633    |
|    learning

<stable_baselines3.ppo.ppo.PPO at 0x303ab7790>

In [28]:
# Test the model with 10 steps
state = env.reset()
for _ in range(10):
    action, _states = model.predict(state)
    next_state, reward, done, _ = env.step(action)
    print(f"Action: {action}, Next State: {next_state}, Reward: {reward}")
    if done:
        state = env.reset()
    else:
        state = next_state

Action: [1], Next State: [[ 0.04119264  0.1478849   0.02753637 -0.23699445]], Reward: [1.]
Action: [0], Next State: [[ 0.04415034 -0.04761941  0.02279648  0.06424551]], Reward: [1.]
Action: [1], Next State: [[ 0.04319795  0.14716841  0.02408139 -0.22115871]], Reward: [1.]
Action: [0], Next State: [[ 0.04614132 -0.04828933  0.01965822  0.07902214]], Reward: [1.]
Action: [1], Next State: [[ 0.04517553  0.14654538  0.02123866 -0.20739432]], Reward: [1.]
Action: [0], Next State: [[ 0.04810644 -0.04887374  0.01709078  0.09191195]], Reward: [1.]
Action: [1], Next State: [[ 0.04712896  0.14599913  0.01892901 -0.19533022]], Reward: [1.]
Action: [1], Next State: [[ 0.05004895  0.3408453   0.01502241 -0.4819823 ]], Reward: [1.]
Action: [0], Next State: [[ 0.05686585  0.14551455  0.00538276 -0.18460283]], Reward: [1.]
Action: [0], Next State: [[ 0.05977614 -0.04968401  0.00169071  0.10977329]], Reward: [1.]


In [29]:
# Save the trained model
model.save("healthcare_recommendation_model")

In [82]:
# Load and test the saved model
model = PPO.load("healthcare_recommendation_model")

In [83]:
# Test again after loading the model
state = env.reset()
for _ in range(10):
    action, _states = model.predict(state)
    next_state, reward, done, _ = env.step(action)
    print(f"Action: {action}, Next State: {next_state}, Reward: {reward}")
    if done:
        state = env.reset()
    else:
        state = next_state

Action: [0], Next State: [[ 0.01089642 -0.19944836 -0.03459849  0.31739366]], Reward: [1.]
Action: [1], Next State: [[ 0.00690745 -0.00385114 -0.02825061  0.01400349]], Reward: [1.]
Action: [1], Next State: [[ 0.00683043  0.19166432 -0.02797054 -0.28745726]], Reward: [1.]
Action: [0], Next State: [[ 0.01066372 -0.0030478  -0.03371969 -0.00372558]], Reward: [1.]
Action: [1], Next State: [[ 0.01060276  0.19254111 -0.0337942  -0.3068539 ]], Reward: [1.]
Action: [0], Next State: [[ 0.01445358 -0.0020834  -0.03993128 -0.02501741]], Reward: [1.]
Action: [1], Next State: [[ 0.01441191  0.19358777 -0.04043163 -0.33002687]], Reward: [1.]
Action: [0], Next State: [[ 0.01828367 -0.000936   -0.04703217 -0.05036334]], Reward: [1.]
Action: [0], Next State: [[ 0.01826495 -0.1953531  -0.04803943  0.22711748]], Reward: [1.]
Action: [1], Next State: [[ 0.01435789  0.00042129 -0.04349708 -0.08032349]], Reward: [1.]


In [84]:
def get_health_recommendation():
    # Ask the user for their symptoms
    print("Please enter your symptoms (comma-separated):")
    user_input = input()
    
    # Preprocess the input to match the expected format
    user_symptoms = [symptom.strip() for symptom in user_input.split(",")]
    
    # Validate symptoms
    valid_symptoms = set(mlb.classes_)
    user_symptoms = [symptom for symptom in user_symptoms if symptom in valid_symptoms]
    if not user_symptoms:
        print("No valid symptoms entered. Please try again.")
        return
    
    # Create a DataFrame to hold the user's symptoms
    user_symptoms_df = pd.DataFrame(columns=mlb.classes_)
    user_symptoms_df.loc[0] = mlb.transform([user_symptoms])[0]

    # Set the environment state to the user's symptoms
    state = user_symptoms_df.values.flatten().astype(np.float32)
    
    # If necessary, reduce state dimensions
    if len(state) > env.observation_space.shape[0]:
        state = state[:env.observation_space.shape[0]]  # Keep only the expected number of features
    
    # Reshape the state for prediction
    state = state.reshape(1, -1)

    # Use the model to predict the best action
    action, _ = model.predict(state)
    
    # Determine the recommendation type
    if action < len(diets):  # Diet recommendation
        recommendation_type = "Diet"
        diet_value = diets.iloc[action]['Diet']  # Replace 'Diet' with actual column name for diet

        # If the diet is a string representation of a list, evaluate it
        if isinstance(diet_value, str):
            try:
                diet_list = eval(diet_value)  # Convert string to list
            except Exception as e:
                print(f"Error parsing Diet: {e}")
                diet_list = [diet_value]  # Fallback to a list with the string
        elif isinstance(diet_value, pd.Series):  # Handle if diet_value is a pandas Series
            diet_list = diet_value.tolist()  # Convert Series to list
        else:
            diet_list = [diet_value]  # Fallback for unexpected types

        recommendation = ", ".join([str(item) for item in diet_list])  # Ensure all items are strings

    elif action < len(diets) + len(medications):  # Medication recommendation
        recommendation_type = "Medication"
        medication_value = medications.iloc[action - len(diets)]['Medication_Name']  # Replace with actual column name for medication

        recommendation = medication_value  # Display the medication recommendation

    else:  # Workout recommendation
        recommendation_type = "Workout"
        workout_value = workouts.iloc[action - len(diets) - len(medications)]['Workout_Type']  # Replace with actual column name for workout

        recommendation = workout_value  # Display the workout recommendation

    # Print the recommendation
    print(f"We recommend the following {recommendation_type}: {recommendation}")


In [86]:
# Run the recommendation function
get_health_recommendation()

Please enter your symptoms (comma-separated):
No valid symptoms entered. Please try again.
