In [None]:
import pip
! pip install openpyxl

In [16]:
import tensorflow as tf
import numpy as np
import pandas as pd
import json
import time
import multiprocessing
from tensorflow.keras.models import load_model, clone_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.utils import custom_object_scope
from functools import partial
from tensorflow import keras
from tensorflow.keras.models import load_model


# Read the Excel sheet
df = pd.read_excel('EmotionsIGTData.xlsx', engine='openpyxl')

In [17]:
# Define the CustomIGTEnvironment class
class CustomIGTEnvironment:
    def __init__(self, data):
        self.data = data
        self.num_trials = len(data)
        self.current_trial = 0

    def reset(self):
        self.current_trial = 0
        initial_state = np.array([0, 0, 0, 0])  # Initial state, assuming all decks are unchosen
        return initial_state

    def step(self, action):
        if self.current_trial >= self.num_trials:
            raise ValueError("Episode is already done. Call reset() to start a new episode.")

        current_trial_data = self.data.iloc[self.current_trial]
        self.current_trial += 1
        state_space = np.array([current_trial_data['IGT_NET_Raw'], current_trial_data['IGT_NET_1Raw'],
                                current_trial_data['IGT_NET_2Raw'], current_trial_data['IGT_NET_3Raw']])

        reward = calculate_reward(state_space, action)  # Implement calculate_reward function
        done = (self.current_trial >= self.num_trials)

        return state_space, reward, done

In [18]:
class CustomAdamOptimizer(tf.keras.optimizers.Optimizer):
    def __init__(self, learning_rate=0.001, **kwargs):
        name = kwargs.pop('name', 'CustomAdamOptimizer')
        super(CustomAdamOptimizer, self).__init__(name=name, **kwargs)
        self.learning_rate = learning_rate

    def get_config(self):
        config = super(CustomAdamOptimizer, self).get_config()
        config.update({
            'learning_rate': self.learning_rate,
        })
        return config

    def _create_slots(self, var_list):
        for var in var_list:
            self.add_slot(var, 'm')
            self.add_slot(var, 'v')

    def _resource_apply_dense(self, grad, var, **kwargs):
        var_dtype = var.dtype.base_dtype
        lr_t = tf.cast(self.learning_rate, var_dtype)
        beta1_t = tf.constant(0.9, dtype=var_dtype)
        beta2_t = tf.constant(0.999, dtype=var_dtype)
        epsilon_t = tf.constant(1e-7, dtype=var_dtype)

        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')

        m_t = m.assign(beta1_t * m + (1.0 - beta1_t) * grad)
        v_t = v.assign(beta2_t * v + (1.0 - beta2_t) * grad * grad)

        var_update = var.assign_sub(lr_t * m_t / (tf.sqrt(v_t) + epsilon_t))

        return var_update


In [33]:
# Define a custom_objects dictionary to register the custom optimizer
custom_objects = {'CustomAdamOptimizer': CustomAdamOptimizer}

# Create an instance of the custom optimizer
custom_optimizer_instance = CustomAdamOptimizer(learning_rate=0.001)

# Use the custom optimizer when compiling the model
model = Sequential()
model.add(Dense(64, input_dim=4, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(4, activation='linear'))
model.compile(optimizer=custom_optimizer_instance, loss='mean_squared_error')

env = CustomIGTEnvironment(df)

# Register the custom optimizer class with its name
with custom_object_scope(custom_objects):

  # Define the function to load models with custom optimizer
  def load_model_with_custom_optimizer(model_path):
    def custom_optimizer():
        return CustomAdamOptimizer(learning_rate=0.001)

    print(f"Loading model from path: {model_path}")

    # Register the custom optimizer with its name before loading the model
    with custom_object_scope({'CustomAdamOptimizer': custom_optimizer}):
        loaded_model = keras.models.load_model(model_path)

    print(f"Loaded model from path: {model_path}")
    return loaded_model

# Register the custom optimizer class with its name
with custom_object_scope({'CustomAdamOptimizer': CustomAdamOptimizer}):
    # List of pre-trained model paths
    pretrained_model_paths = [
        'model_0.1_0.1.keras',
        'model_0.1_0.2.keras',
        'model_0.1_0.3.keras',
        'model_0.01_0.1.keras',
        'model_0.01_0.2.keras',
        'model_0.01_0.3.keras',
        'model_0.001_0.1.keras',
        'model_0.001_0.2.keras',
        'model_0.001_0.3.keras'
    ]

# Modify the fine_tune_model function to remove the custom_optimizer_instance parameter
def fine_tune_model(lr, epsilon, discount_factor, num_episodes, batch_size, model_path, env,
                    early_stop_threshold=0.9, early_stop_patience=10,
                    time_limit_minutes=30, output_file='fine_tuning_output.json'):
    start_time = time.time()

    # Load the model for fine-tuning
    loaded_model = load_model_with_custom_optimizer(model_path)

    # Clone the loaded model to start with a fresh copy for fine-tuning
    fine_tuned_model = clone_model(loaded_model)
    fine_tuned_model.set_weights(loaded_model.get_weights())  # Copy weights

    discount_factor_fine_tune = discount_factor

    episode_rewards = []
    training_history = []

    # Early stopping variables
    best_average_reward = -float('inf')
    early_stopping_count = 0

    # Fine-tune the loaded model
    for episode in range(num_episodes):
        state = env.reset()
        episode_reward = 0

        # Create buffers to store batch data
        state_buffer = []
        target_buffer = []

        while True:
            action = np.argmax(fine_tuned_model.predict(state[np.newaxis, :]))
            next_state, reward, done = env.step(action)

            episode_reward += reward

            # Store data for experience replay
            state_buffer.append(state)
            target = reward if done else reward + discount_factor * np.max(fine_tuned_model.predict(next_state[np.newaxis, :]))
            target_buffer.append(target)

            if len(state_buffer) >= batch_size or done:
                # Train the model using the collected batch
                fine_tuned_model.fit(np.array(state_buffer), np.array(target_buffer), epochs=1, verbose=0)

                # Clear the buffers
                state_buffer.clear()
                target_buffer.clear()

            if done:
                break

            state = next_state

        episode_rewards.append(episode_reward)

        # Log training history for this episode
        recent_rewards = episode_rewards[-early_stop_patience:]
        recent_average_reward = np.mean(recent_rewards)
        training_history.append({
            'episode': episode,
            'average_reward': recent_average_reward
        })

        # Early stopping check
        if recent_average_reward >= early_stop_threshold:
            break

        # Check elapsed time and stop if it exceeds the time limit
        elapsed_time_minutes = (time.time() - start_time) / 60.0
        if elapsed_time_minutes > time_limit_minutes:
            print(f"Time limit exceeded ({time_limit_minutes} minutes). Stopping fine-tuning.")
            break

    # Save the fine-tuned model with a unique name based on hyperparameters
    model_save_path = f"fine_tuned_model_lr_{lr}_epsilon_{epsilon}_discount_{discount_factor}.keras"
    fine_tuned_model.save(model_save_path)

    # Save useful output to the output file
    output_data = {
        'lr': lr,
        'epsilon': epsilon,
        'discount_factor': discount_factor_fine_tune,
        'average_reward': recent_average_reward,  # Use recent_average_reward
        'training_history': training_history,
        'elapsed_time_minutes': elapsed_time_minutes
    }

    with open(output_file, 'w') as f:
        json.dump(output_data, f, indent=4)

    return output_data

if __name__ == '__main__':
    # Define hyperparameters for fine-tuning
    learning_rates = [0.001, 0.01, 0.1]
    epsilons = [0.1, 0.2, 0.3]
    discount_factors = [0.9, 0.95, 0.99]
    num_episodes = 100  # Reduced number of episodes for faster fine-tuning
    batch_size = 32  # Increased batch size for faster training
    early_stop_threshold = 0.9
    time_limit_minutes = 30  # Set the time limit for fine-tuning
    output_file = 'fine_tuning_output.json'  # Output file name

    # Use multiprocessing to fine-tune models in parallel
    num_processes = 2  # Use all available CPU cores

    # Create a list of hyperparameters to search for each model
    hyperparameters_to_search = [(lr, epsilon, discount_factor, num_episodes, batch_size, model_path, env)
                                 for lr in learning_rates
                                 for epsilon in epsilons
                                 for discount_factor in discount_factors
                                 for model_path in pretrained_model_paths]

    # Parallelize the fine-tuning process
with multiprocessing.Pool(processes=num_processes) as pool:
    results = pool.starmap(
        fine_tune_model,
        [(lr, epsilon, discount_factor, num_episodes, batch_size, model_path, env, custom_optimizer_instance) for lr, epsilon, discount_factor in hyperparameters_to_search]
    )


    # Find the best hyperparameters for each model
    best_results = []
    for i, pretrained_model_path in enumerate(pretrained_model_paths):
        model_results = results[i * len(hyperparameters_to_search) // len(pretrained_model_paths):
                                (i + 1) * len(hyperparameters_to_search) // len(pretrained_model_paths)]
        best_result = max(model_results, key=lambda x: x['average_reward'])
        best_results.append(best_result)

    # Print and save the best hyperparameters and models for each pre-trained model
    for i, pretrained_model_path in enumerate(pretrained_model_paths):
        best_result = best_results[i]
        print(f"Best Hyperparameters for {pretrained_model_path}:")
        print(f"lr: {best_result['lr']}, epsilon: {best_result['epsilon']}, "
              f"discount_factor: {best_result['discount_factor']}, "
              f"Best Average Reward: {best_result['average_reward']}")
        best_fine_tuned_model = keras.models.load_model(
            f"fine_tuned_model_lr_{best_result['lr']}_epsilon_{best_result['epsilon']}_discount_{best_result['discount_factor']}.h5")
        best_fine_tuned_model.save(f'best_fine_tuned_{pretrained_model_path}.h5')

    print("Fine-tuning is done.")

ValueError: too many values to unpack (expected 3)