In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Load and preprocess the dataset
file_path = 'dataset_for_mp.xlsx'
df = pd.read_excel(file_path)

# Display the column names to ensure correctness
print(df.columns)

# Identify non-numeric columns
non_numeric_columns = ['Patient number']

# Ensure the columns exist in the DataFrame
non_numeric_columns = [col for col in non_numeric_columns if col in df.columns]

df_numeric = df.drop(columns=non_numeric_columns)
scaler = StandardScaler()
df_normalized = scaler.fit_transform(df_numeric)

# Define the state space, action space, and Q-table
num_states = df_normalized.shape[0]
num_actions = 2  # For simplicity, let's assume 2 actions: success (1) or failure (0)
Q = np.zeros((num_states, num_actions))

# Hyperparameters
alpha = 0.1  # Learning rate
gamma = 0.99  # Discount factor
epsilon = 1.0  # Exploration-exploitation trade-off
epsilon_decay = 0.995
min_epsilon = 0.01
num_episodes = 1000
max_steps = 100

# Q-Learning Algorithm
for episode in range(num_episodes):
    state = np.random.randint(0, num_states)
    for step in range(max_steps):
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.randint(0, num_actions)
        else:
            action = np.argmax(Q[state, :])
        
        next_state = (state + 1) % num_states  # Simplified state transition
        reward = np.random.random()  # Simplified reward

        Q[state, action] = Q[state, action] + alpha * (reward + gamma * np.max(Q[next_state, :]) - Q[state, action])
        state = next_state
        
        done = (step == max_steps - 1)  # Simplified termination condition
        if done:
            break

    epsilon = max(min_epsilon, epsilon * epsilon_decay)

print("Q-Table after training:")
print(Q)

Index(['Patient number', 'Idiopathic RM', 'Age', 'AMH', 'FSH', 'LH',
       'Number of miscarriages', 'Estradiol', 'Diabetes', 'Hypothyroidism',
       'Hyperthyroidism', 'Thrombophilic defects', 'APS'],
      dtype='object')
Q-Table after training:
[[10.72738634 23.43103025]
 [14.35967981 23.25400597]
 [11.93791149 23.23365525]
 [10.30320494 23.30810014]
 [12.11481516 23.34148348]
 [10.00013463 23.36909258]
 [ 9.80272839 23.26572526]
 [13.84306037 23.32331152]
 [23.24586641 13.99934146]
 [10.94679775 23.29086855]
 [10.55951532 23.33149656]
 [23.36962316  7.78068253]
 [23.29805386  9.31695246]
 [23.24242266 12.23875106]
 [12.60939121 23.24916414]
 [11.86754605 23.27564057]
 [23.35951045 11.45854092]
 [23.49338039 12.51045131]
 [14.36438972 23.46844204]
 [23.58380561 14.3189651 ]
 [11.86669846 23.57662941]
 [23.49404517 10.85792543]
 [11.13548347 23.36290992]
 [23.24886695 14.75664273]
 [23.32352972 11.40766952]
 [11.55482243 23.2901316 ]
 [23.34838247 12.46088275]
 [23.37145112 11.6324