In [None]:
import pandas as pd
import numpy as np
import gym
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import torch


dataset = pd.read_csv('Training.csv')
X = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]

le = LabelEncoder()
y = le.fit_transform(y)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


class DiseaseDiagnosisEnv(gym.Env):
    def __init__(self, X, y):
        super(DiseaseDiagnosisEnv, self).__init__()
        self.X = X
        self.y = y
        self.current_step = 0
        self.action_space = spaces.Discrete(len(np.unique(y)))
        self.observation_space = spaces.Box(low=0, high=1, shape=(X.shape[1],), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        return self.X.iloc[self.current_step].values.astype(np.float32)

    def step(self, action):
        reward = 1 if action == self.y[self.current_step] else -1
        self.current_step += 1
        done = self.current_step >= len(self.X)
        obs = self.X.iloc[self.current_step].values.astype(np.float32) if not done else np.zeros(self.X.shape[1])
        return obs, reward, done, {}


env = DummyVecEnv([lambda: DiseaseDiagnosisEnv(X_train, y_train)])
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=500000)  

def get_predicted_proba(symptoms):
    obs = np.zeros(len(X.columns))
    for symptom in symptoms:
        if symptom in X.columns:
            obs[X.columns.get_loc(symptom)] = 1
    obs_tensor = torch.FloatTensor(obs).unsqueeze(0)
    with torch.no_grad():
        action, _ = model.predict(obs_tensor)
        probs = model.policy.get_distribution(obs_tensor).distribution.probs
    return probs.squeeze().numpy()


def predict_disease(symptoms):
    probs = get_predicted_proba(symptoms)
    top_indices = probs.argsort()[::-1]
    return [(le.inverse_transform([i])[0], probs[i]) for i in top_indices]


medications = pd.read_csv('medications.csv')


precautions = pd.read_csv('precautions_df.csv')

def get_medications(disease):
    return medications[medications['Disease'] == disease]['Medication'].tolist()

def get_precautions(disease):
    disease_precautions = precautions[precautions['Disease'] == disease].iloc[0]
    return [p for p in disease_precautions[['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']] if pd.notna(p)]

def process_input(input_symptoms):
    symptoms = [symptom.strip().replace(' ', '_').lower() for symptom in input_symptoms.split(',')]
    predicted_diseases = predict_disease(symptoms)

    print("\nTop 3 Predicted Diseases:")
    for disease, probability in predicted_diseases[:3]:
        print(f"{disease}: {probability:.2%}")

        medications = get_medications(disease)
        print("Recommended Medications:")
        for i, medication in enumerate(medications, 1):
            print(f"  {i}. {medication}")

        precautions = get_precautions(disease)
        print("Precautions:")
        for i, precaution in enumerate(precautions, 1):
            print(f"  {i}. {precaution}")
        print()


print("\nExample Inputs and Outputs:")
example_inputs = [
    "itching,skin_rash,nodal_skin_eruptions",
    "continuous_sneezing,shivering,chills",
    "joint_pain,stomach_pain,acidity",
    "fever,cough,fatigue",
    "chest_pain,shortness_of_breath,fatigue"
]

for i, input_symptoms in enumerate(example_inputs, 1):
    print(f"\nExample {i}:")
    print(f"Input: {input_symptoms}")
    process_input(input_symptoms)