In [1]:
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Load the dataset
data = pd.read_csv('heart.csv')

In [3]:
# Split data into features (X) and target (y)
X = data.drop(columns=['target'])
y = data['target']

# Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [4]:
# Define the environment for Q-learning
class HeartDiseaseEnv:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.n_samples = X.shape[0]
        self.state = 0  # Start at the first sample
    
    def reset(self):
        self.state = 0
        return self.X[self.state]
    
    def step(self, action):
        correct = (action == self.y[self.state])  # Action is either 0 (no disease) or 1 (disease)
        reward = 1 if correct else -1  # Reward for correct prediction
        self.state += 1
        done = (self.state >= self.n_samples)
        next_state = self.X[self.state] if not done else None
        return next_state, reward, done


In [5]:
# Q-Learning Agent
class QLearningAgent:
    def __init__(self, n_actions, state_size, epsilon=0.1, alpha=0.5, gamma=0.9):
        self.epsilon = epsilon  # Exploration rate
        self.alpha = alpha  # Learning rate
        self.gamma = gamma  # Discount factor
        self.q_table = np.zeros((state_size, n_actions))  # Initialize Q-table
    
    def choose_action(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.choice([0, 1])  # Explore: Choose randomly (0 = no disease, 1 = disease)
        else:
            return np.argmax(self.q_table[state])  # Exploit: Choose the action with the highest Q-value
    
    def update_q_values(self, state, action, reward, next_state):
        best_next_action = np.argmax(self.q_table[next_state]) if next_state is not None else 0
        td_target = reward + self.gamma * self.q_table[next_state, best_next_action] if next_state is not None else reward
        self.q_table[state, action] += self.alpha * (td_target - self.q_table[state, action])


In [6]:
# Run Q-learning on the dataset
def run_ql_heart_disease(X_train, y_train, n_episodes=1000, epsilon=0.1):
    env = HeartDiseaseEnv(X_train, y_train)
    agent = QLearningAgent(n_actions=2, state_size=X_train.shape[1], epsilon=epsilon)
    
    for episode in range(n_episodes):
        state = env.reset()
        done = False
        total_reward = 0
        
        while not done:
            action = agent.choose_action(state)
            next_state, reward, done = env.step(action)
            agent.update_q_values(state, action, reward, next_state)
            state = next_state
            total_reward += reward
        
        if episode % 100 == 0:
            print(f'Episode {episode}, Total Reward: {total_reward}')
    
    return agent.q_table

In [7]:
# Example of running Q-learning
q_table = run_ql_heart_disease(X_train, y_train, n_episodes=1000, epsilon=0.1)
print("Learned Q-table:")
print(q_table)

IndexError: arrays used as indices must be of integer (or boolean) type