In [None]:
import pip
! pip install xlrd
! pip install openpyxl
! pip install tensorflow

In [None]:
# Import libraries
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam


# Read the Excel sheet
df = pd.read_excel('EmotionsIGTData.xlsx')
df.head()

In [None]:
# Scale IGT scores and Total Money to the range [0, 1]
scaler = MinMaxScaler()
df[['IGT_NET_Raw', 'IGT_NET_1Raw', 'IGT_NET_2Raw', 'IGT_NET_3Raw', 'IGT_NET_4Raw', 'IGT_NET_5Raw', 'Deck_A_Raw', 'Deck_B_Raw', 'Deck_C_Raw', 'Deck_D_Raw', 'Total_Money']] = scaler.fit_transform(df[['IGT_NET_Raw', 'IGT_NET_1Raw', 'IGT_NET_2Raw', 'IGT_NET_3Raw', 'IGT_NET_4Raw', 'IGT_NET_5Raw', 'Deck_A_Raw', 'Deck_B_Raw', 'Deck_C_Raw', 'Deck_D_Raw', 'Total_Money']])

# Define the CustomIGTEnvironment class
class CustomIGTEnvironment:
    def __init__(self, data):
        self.data = data
        self.num_trials = len(data)
        self.current_trial = 0

    def reset(self):
        self.current_trial = 0
        initial_state = np.array([0, 0, 0, 0])  # Initial state, assuming all decks are unchosen
        return initial_state

    def step(self, action):
        if self.current_trial >= self.num_trials:
            raise ValueError("Episode is already done. Call reset() to start a new episode.")

        current_trial_data = self.data.iloc[self.current_trial]
        self.current_trial += 1
        state_space = np.array([current_trial_data['IGT_NET_Raw'], current_trial_data['IGT_NET_1Raw'], current_trial_data['IGT_NET_2Raw'], current_trial_data['IGT_NET_3Raw']])
        
        reward = calculate_reward(state_space, action)
        done = (self.current_trial >= self.num_trials)

        return state_space, reward, done

def calculate_reward(state, action):
    chosen_deck = action
    reward = 0  # Initialize the reward

    # Define reward rules based on the IGT outcomes
    if chosen_deck == 0:
        # Deck A: Always yields $50
        reward = 50
        # Apply frequent large negative consequences ("punishments")
        if np.random.rand() < 0.5:
            reward -= 250
    elif chosen_deck == 1:
        # Deck B: Always yields $50
        reward = 50
    elif chosen_deck == 2:
        # Deck C: Always yields $100
        reward = 100
        # Apply frequent large negative consequences ("punishments")
        if np.random.rand() < 0.5:
            reward -= 250
    elif chosen_deck == 3:
        # Deck D: Always yields $100
        reward = 100

    return reward

In [None]:
# Define the CustomRLAgent class
class CustomRLAgent:
    def __init__(self, state_dim, action_dim, learning_rate=0.001, discount_factor=0.99, epsilon=0.1):
        self.learning_rate = learning_rate
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        self.model = self.build_model(state_dim, action_dim)
        self.target_model = self.build_model(state_dim, action_dim)
        self.epsilon = epsilon
        self.discount_factor = discount_factor
        self.action_dim = action_dim

    def build_model(self, state_dim, action_dim):
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation='relu', input_shape=(state_dim,)),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(action_dim, activation='linear')
        ])
        model.compile(loss='mse', optimizer=self.optimizer)
        return model

    def select_action(self, state):
        state = np.array(state)
        state = state.reshape(1, -1)
        q_values = self.model.predict(state)

        if np.random.random() < self.epsilon:
            return np.random.choice(self.action_dim)
        else:
            return np.argmax(q_values[0])

    def train(self, state, action, reward, next_state, done):
        next_state = np.array(next_state).reshape(1, -1)
        target_q_values = self.target_model.predict(next_state)

        if done:
            target_q_values[0][action] = reward
        else:
            target_q_values[0][action] = reward + self.discount_factor * np.max(target_q_values[0])

        state = np.array(state).reshape(1, -1)

        with tf.GradientTape() as tape:
            q_values = self.model(state)
            loss = tf.keras.losses.mse(q_values, target_q_values)

        gradients = tape.gradient(loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))

        if self.epsilon > 0.01:
            self.epsilon *= 0.9999

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

In [None]:
# Create the environment
env = CustomIGTEnvironment(df)

# Create the agent with the correct state_dim and action_dim
state_dim = 4  # Assuming there are 4 decks
action_dim = 4  # Assuming there are 4 decks
agent = CustomRLAgent(state_dim=state_dim, action_dim=action_dim)

In [None]:
# Train the agent
episode_rewards = []
for episode in range(1000):
    state = env.reset()
    episode_reward = 0

    while True:
        action = agent.select_action(state)
        next_state, reward, done = env.step(action)

        episode_reward += reward

        agent.train(state, action, reward, next_state, done)

        if done:
            break

        state = next_state

    episode_rewards.append(episode_reward)

    # Update the target model
    if episode % 100 == 0:
        agent.update_target_model()

# Evaluate the agent
for episode in range(10):
    state = env.reset()
    total_reward = 0

    while True:
        action = agent.select_action(state)
        next_state, reward, done = env.step(action)

        total_reward += reward

        if done:
            break

        state = next_state

    print('Episode reward:', total_reward)

# Save the model
agent.model.save('trained_model.h5')

In [None]:
import matplotlib.pyplot as plt

# Analyze and plot the training results
plt.figure(figsize=(12, 6))
plt.plot(episode_rewards, marker='o', linestyle='-', color='b')
plt.title('Training Progress')
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.grid(True)
plt.show()


In [None]:
import numpy as np
import tensorflow as tf

# Load your saved model
loaded_model = tf.keras.models.load_model('trained_model.h5')

# Define the hyperparameters to search
learning_rates = [0.001, 0.01, 0.1]
epsilons = [0.1, 0.2, 0.3]
discount_factors = [0.95, 0.99, 0.995]

best_average_reward = -np.inf
best_hyperparameters = {}

# Specify the number of episodes for fine-tuning
num_episodes = 500  # You can adjust this based on your requirements

# Iterate over hyperparameters
for lr in learning_rates:
    for epsilon in epsilons:
        for discount_factor in discount_factors:
            # Clone the loaded model to start with a fresh copy for fine-tuning
            fine_tuned_model = tf.keras.models.clone_model(loaded_model)
            fine_tuned_model.set_weights(loaded_model.get_weights())  # Copy weights

            # Set hyperparameters for fine-tuning
            fine_tuned_model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=lr))
            epsilon_fine_tune = epsilon
            discount_factor_fine_tune = discount_factor

            episode_rewards = []

            # Fine-tune the loaded model
            for episode in range(num_episodes):
                state = env.reset()
                episode_reward = 0

                while True:
                    action = fine_tuned_model.select_action(state)
                    next_state, reward, done = env.step(action)

                    episode_reward += reward

                    # Fine-tune the model (you should define the fine-tuning logic)
                    # Example: fine_tuned_model.train(state, action, reward, next_state, done)

                    if done:
                        break

                    state = next_state

                episode_rewards.append(episode_reward)

                # Update the target model
                if episode % 100 == 0:
                    fine_tuned_model.update_target_model()

            # Calculate the average reward for the fine-tuned model
            average_reward = np.mean(episode_rewards)

            # Print and record results
            print(f"lr={lr}, epsilon={epsilon_fine_tune}, discount_factor={discount_factor_fine_tune}: Average Reward = {average_reward}")

            # Check if this combination of hyperparameters resulted in a better average reward
            if average_reward > best_average_reward:
                best_average_reward = average_reward
                best_hyperparameters = {
                    'learning_rate': lr,
                    'epsilon': epsilon_fine_tune,
                    'discount_factor': discount_factor_fine_tune
                }

print("Best Hyperparameters:", best_hyperparameters)
print("Best Average Reward:", best_average_reward)