In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

file_path = 'eadockrl.xlsx'
data = pd.read_excel(file_path)

print("Initial data types:")
print(data.dtypes)

for col in data.columns[1:]:
    data[col] = pd.to_numeric(data[col], errors='coerce')

print("Number of NaN values in each column:")
print(data.isna().sum())

data_cleaned = data.dropna()

print("Number of NaN values in each column after cleaning:")
print(data_cleaned.isna().sum())

gamma = 0.95
alpha = 0.8
epsilon = 0.1

Q = np.zeros((len(data_cleaned), 4))

rewards = data_cleaned.iloc[:, 1:].values * -1
rewards = rewards.astype(np.float64)

all_rewards = []
cumulative_rewards = np.zeros(len(data_cleaned))

for episode in range(1000):
    episode_rewards = []
    for i in range(len(data_cleaned)):
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.randint(0, 4)
        else:
            action = np.argmax(Q[i, :])

        reward = rewards[i, action]

        Q[i, action] = Q[i, action] + alpha * (reward + gamma * np.max(Q[i, :]) - Q[i, action])

        episode_rewards.append(reward)
        cumulative_rewards[i] += reward

    all_rewards.append(np.mean(episode_rewards))

best_actions = np.argmax(Q, axis=1)
best_drugs = data_cleaned.columns[1:][best_actions]
performance_metrics = pd.DataFrame({
    'Variant': data_cleaned['Variant'],
    'Cumulative Reward': cumulative_rewards,
    'Best Drug': best_drugs
})

print("Performance Metrics:")
print(performance_metrics)

plt.figure(figsize=(12, 6))
plt.plot(all_rewards)
plt.title('Average Episode Reward vs. Number of Episodes')
plt.xlabel('Number of Episodes')
plt.ylabel('Average Reward')
plt.grid(True)
plt.tight_layout()
plt.savefig("Average_Episode_Reward_vs_Number_of_Episodes.png", dpi=500)
plt.show()

top_20_indices = np.argsort(cumulative_rewards)[-20:]
top_20_variants = data_cleaned['Variant'].iloc[top_20_indices]
top_20_rewards = cumulative_rewards[top_20_indices]
top_20_best_drugs = best_drugs[top_20_indices]

top_20_df = pd.DataFrame({
    'Variant': top_20_variants,
    'Best Drug': top_20_best_drugs,
    'Cumulative Reward': top_20_rewards
})

print("Top 20 Variants and Their Cumulative Rewards:")
print(top_20_df)

top_20_df.to_csv("Top_20_Variants_and_Their_Cumulative_Rewards.csv", index=False)

plt.figure(figsize=(14, 7))
colors = plt.cm.viridis(np.linspace(0, 1, len(top_20_df)))
bars = plt.bar(top_20_df['Variant'], top_20_df['Cumulative Reward'], color=colors)
plt.title('Top 20 Variants and Their Cumulative Rewards', fontsize=16)
plt.xlabel('Variant', fontsize=14)
plt.ylabel('Cumulative Reward', fontsize=14)
plt.xticks(rotation=45, fontsize=12)
plt.yticks(fontsize=12)

for bar, drug in zip(bars, top_20_df['Best Drug']):
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval/2, drug, ha='center', va='center', rotation=90, color='white', fontsize=10)

plt.grid(True)
plt.tight_layout()
plt.savefig("Top_20_Variants_and_Their_Cumulative_Rewards.png", dpi=300)
plt.show()
