In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Load and preprocess the dataset
file_path = 'dataset_for_mp.xlsx'
df = pd.read_excel(file_path)

# Display the column names to ensure correctness
print(df.columns)

# Identify non-numeric columns
non_numeric_columns = ['Patient number']

# Ensure the columns exist in the DataFrame
non_numeric_columns = [col for col in non_numeric_columns if col in df.columns]

df_numeric = df.drop(columns=non_numeric_columns)
scaler = StandardScaler()
df_normalized = scaler.fit_transform(df_numeric)

# Define the state space, action space, and Q-table
num_states = df_normalized.shape[0]
num_actions = 2  # For simplicity, let's assume 2 actions: success (1) or failure (0)
Q = np.zeros((num_states, num_actions))

# Hyperparameters
alpha = 0.1  # Learning rate
gamma = 0.99  # Discount factor
epsilon = 1.0  # Exploration-exploitation trade-off
epsilon_decay = 0.995
min_epsilon = 0.01
num_episodes = 1000
max_steps = 100

# Q-Learning Algorithm
for episode in range(num_episodes):
    state = np.random.randint(0, num_states)
    for step in range(max_steps):
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.randint(0, num_actions)
        else:
            action = np.argmax(Q[state, :])
        
        next_state = (state + 1) % num_states  # Simplified state transition
        reward = np.random.random()  # Simplified reward

        Q[state, action] = Q[state, action] + alpha * (reward + gamma * np.max(Q[next_state, :]) - Q[state, action])
        state = next_state
        
        done = (step == max_steps - 1)  # Simplified termination condition
        if done:
            break

    epsilon = max(min_epsilon, epsilon * epsilon_decay)

print("Q-Table after training:")
print(Q)

# Save the scaler and Q-table to disk for later use in the FastAPI app
import joblib
joblib.dump(scaler, 'scaler.pkl')
np.save('Q_table.npy', Q)

Index(['Patient number', 'Idiopathic RM', 'Age', 'AMH', 'FSH', 'LH',
       'Number of miscarriages', 'Estradiol', 'Diabetes', 'Hypothyroidism',
       'Hyperthyroidism', 'Thrombophilic defects', 'APS'],
      dtype='object')
Q-Table after training:
[[23.5302206  11.54884608]
 [11.79488795 23.57478379]
 [14.60249245 23.78847029]
 [11.56548676 23.85043552]
 [23.98898229 12.83897276]
 [12.64216109 23.94407122]
 [12.35663516 23.9220021 ]
 [23.87271569 12.05178102]
 [23.90715574 13.08456845]
 [12.91993968 23.90270367]
 [12.58153329 23.85062158]
 [13.46777908 23.92066555]
 [11.91860264 23.99624227]
 [23.99972834 10.75101682]
 [10.16470343 23.90225527]
 [13.20135436 23.77761888]
 [10.80373203 23.7846032 ]
 [23.82128377 11.7640602 ]
 [23.76751561 12.99136583]
 [12.24613497 23.81666595]
 [13.70533588 23.85746283]
 [23.99761954 13.54545822]
 [14.35000917 24.06604022]
 [24.03679327 11.65869836]
 [24.14825458 12.52253561]
 [24.2291709  11.51239109]
 [24.0398212  13.43989371]
 [11.57742529 24.0406