In [21]:
import numpy as np
import random

# Environment: Diseases and Symptoms
diseases = ['Flu', 'Cold', 'COVID-19', 'Pneumonia']  # List of diseases
symptoms = ['Fever', 'Cough', 'Fatigue', 'Shortness of breath', 'Headache']  # List of symptoms

# Mapping symptoms to diseases
symptom_disease_mapping = {
    'Flu': ['Fever', 'Cough', 'Fatigue', 'Headache'],
    'Cold': ['Cough', 'Headache'],
    'COVID-19': ['Fever', 'Cough', 'Shortness of breath', 'Fatigue'],
    'Pneumonia': ['Fever', 'Cough', 'Shortness of breath']
}

# Actions: Diseases that the AI will diagnose
actions = diseases
q_table = np.zeros((len(symptoms), len(actions)))  # Q-values table

# Reward function: Positive reward for correct diagnosis, negative reward for wrong diagnosis
def get_reward(diagnosis, true_disease):
    if diagnosis == true_disease:
        return 10  # Reward for correct diagnosis
    else:
        return -5  # Penalty for wrong diagnosis

# Simulated patients (with symptoms and true diseases)
patients = [
    {'symptoms': ['Fever', 'Cough'], 'true_disease': 'Flu'},
    {'symptoms': ['Cough', 'Fatigue'], 'true_disease': 'Cold'},
    {'symptoms': ['Fever', 'Cough', 'Shortness of breath'], 'true_disease': 'COVID-19'},
    {'symptoms': ['Fever', 'Cough', 'Shortness of breath'], 'true_disease': 'Pneumonia'}
]

# Displaying patient's symptoms and true disease for visualization
def display_patient(patient):
    print(f"Symptoms: {', '.join(patient['symptoms'])}")
    print(f"True Disease: {patient['true_disease']}")

# Q-learning parameters
learning_rate = 0.1  # Learning rate
discount_factor = 0.9  # Discount factor for future rewards
epsilon = 0.2  # Exploration vs. exploitation trade-off
episodes = 1000  # Number of training episodes

# Q-learning algorithm to train the AI
def q_learning():
    for episode in range(episodes):
        patient = random.choice(patients)  # Randomly select a patient
        symptoms_observed = patient['symptoms']
        true_disease = patient['true_disease']
        
        # AI observes symptoms and makes a diagnosis based on Q-table
        action_idx = random.choice(range(len(actions))) if random.uniform(0, 1) < epsilon else np.argmax(q_table[symptoms.index(symptoms_observed[0])])
        diagnosis = actions[action_idx]
        
        # Calculate reward (correct or incorrect diagnosis)
        reward = get_reward(diagnosis, true_disease)
        
        # Update Q-values using Q-learning formula
        old_q_value = q_table[symptoms.index(symptoms_observed[0]), action_idx]
        future_q_value = np.max(q_table[symptoms.index(symptoms_observed[0]), :])
        q_table[symptoms.index(symptoms_observed[0]), action_idx] = old_q_value + learning_rate * (reward + discount_factor * future_q_value - old_q_value)
        
        if episode % 100 == 0:
            print(f"Episode {episode}/{episodes} - Total Reward: {reward}")

    print("Q-learning Training Complete!")

# Start the Q-learning process
q_learning()

# Test the trained agent
def test_agent():
    patient = random.choice(patients)  # Randomly select a patient for testing
    symptoms_observed = patient['symptoms']
    true_disease = patient['true_disease']
    
    print("Testing with a patient...")
    display_patient(patient)
    
    # AI makes diagnosis based on learned knowledge
    action_idx = np.argmax(q_table[symptoms.index(symptoms_observed[0])])
    diagnosis = actions[action_idx]
    
    print(f"Agent's Diagnosis: {diagnosis}")
    reward = get_reward(diagnosis, true_disease)
    print(f"Reward: {reward}")

# Test the agent
test_agent()


Episode 0/1000 - Total Reward: -5
Episode 100/1000 - Total Reward: -5
Episode 200/1000 - Total Reward: -5
Episode 300/1000 - Total Reward: 10
Episode 400/1000 - Total Reward: -5
Episode 500/1000 - Total Reward: -5
Episode 600/1000 - Total Reward: 10
Episode 700/1000 - Total Reward: 10
Episode 800/1000 - Total Reward: 10
Episode 900/1000 - Total Reward: 10
Q-learning Training Complete!
Testing with a patient...
Symptoms: Cough, Fatigue
True Disease: Cold
Agent's Diagnosis: Cold
Reward: 10
