In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
import h5py
import pandas as pd

# Add the directory containing the library to sys.path
import os
import sys
library_path = os.path.abspath(r'C:\git\foraging-strategies\code')
if library_path not in sys.path:
    sys.path.append(library_path)
    
from tools_fixed import PatchForager

### **Grid search for best parameters of any given strategy**

In [None]:
### Grid search for best parameters
# Create a directory for the simulation results


df_sum = pd.DataFrame(columns=['x', 'y', 'reward_rate'])
os.makedirs('data', exist_ok=True)

# Parameters of reward in each patch for fixed rew
travel_time = 3
reward_value = [5, 5]
num_rew = [1,5]
first_rew = [0,2]
last_rew = [num_rew[0],num_rew[1]] #this needs to be a fxn of rew or stops, careful setting values
reward_prob = [0.9, 0.9]
indep_var = 'rewards'
type_patches = len(num_rew)
num_patches = 1000
patch_list = [random.randint(0, type_patches-1) for _ in range(num_patches)] #randomly generated list of patched

# Create the reward prob matrix
a = np.zeros((type_patches, max(last_rew)+1), dtype=float)
b = np.zeros((type_patches, 1), dtype=float)
c = np.zeros((type_patches, 1), dtype=float)

# Set depletion curves
for patch_id in range(type_patches):
    a[patch_id, :num_rew[patch_id]] = reward_prob[patch_id] # fixed prob for each patch TODO make more flexible
    b[patch_id] = first_rew[patch_id]
    c[patch_id] = last_rew[patch_id]

d = '_'

forager = PatchForager(travel_time, reward_value, a, b, c, d, prob=True, depl_fxn = 'fixed', indep_var = indep_var)

# mvt_optimal = forager.calculate_optimal_stops(patch_list)
# print('Max reward rate:', mvt_optimal['max_reward_rate'])
# print('Stops', mvt_optimal['optimal_stops'])

# Create an HDF5 file
with h5py.File('data/data.h5', 'w') as hf:
    for x in range(0, 20):
        for y in range (0,20):
            
            strategy_info = {'strategy': 'stops', 'params': {'target_stops': [x, y]}}
            # strategy_info = {'strategy': 'rewards', 'params': {'target_rewards': [x, y]}}
            # strategy_info = {'strategy': 'failures', 'params': {'max_failures': [x, y]}}
            # strategy_info = {'strategy': 'consec_failures', 'params': {'consec_failures': [x, y]}}

            # Run the simulation for each strategy
            data, _ = forager.run_simulation(strategy_info['strategy'], patch_list, **strategy_info['params'])
            
            df_sum.loc[len(df_sum)] = [x, y, _] 

# Pivot the DataFrame to create a matrix
heatmap_data = df_sum.pivot(index = 'y', columns = 'x', values =  'reward_rate')

max_value = heatmap_data.max().max()
max_cell = heatmap_data.stack().idxmax()
print(f"The cell with the largest value is at {max_cell} \n with a value of {max_value}")
# Plot the heatmap
plt.figure(figsize=(5, 4))
sns.heatmap(heatmap_data, cmap='magma')
plt.title('Reward Rate for ' + strategy_info['strategy'])
plt.xlabel('Patch 1 values')
plt.ylabel('Patch 2 values')
plt.show()


In [None]:
# Pivot the DataFrame to create a matrix
heatmap_data = df_sum.pivot(index = 'y', columns = 'x', values =  'reward_rate')

max_value = heatmap_data.max().max()
max_cell = heatmap_data.stack().idxmax()
print(f"The cell with the largest value is at {max_cell} \n with a value of {max_value}")
# Plot the heatmap
plt.figure(figsize=(5, 4))
sns.heatmap(heatmap_data, cmap='magma')
plt.title('Reward Rate for ' + strategy_info['strategy'])
plt.xlabel('Patch 1 values')
plt.ylabel('Patch 2 values')
plt.show()

### **Simulation of different strategies and comparison of reward rate**

In [None]:
df_sum = pd.DataFrame(columns=['simulation', 'strategy', 'reward_rate'])
os.makedirs('data', exist_ok=True)

# Parameters of reward in each patch for fixed rew
travel_time = 3
reward_value = [5, 5, 5]
num_rew = [1,5,0]
first_rew = [0,2,0]
last_rew = [num_rew[0],num_rew[1],0] #this needs to be a fxn of rew or stops, careful setting values
reward_prob = [.9,.9,0]
indep_var = 'stops'
type_patches = len(num_rew)
num_patches = 200
patch_list = [random.randint(0, type_patches-1) for _ in range(num_patches)] #randomly generated list of patched

# Create the reward prob matrix
a = np.zeros((type_patches, max(last_rew)+1), dtype=float)
b = np.zeros((type_patches, 1), dtype=float)
c = np.zeros((type_patches, 1), dtype=float)

# Set depletion curves
for patch_id in range(type_patches):
    a[patch_id, :num_rew[patch_id]] = reward_prob[patch_id] # fixed prob for each patch TODO make more flexible
    b[patch_id] = first_rew[patch_id]
    c[patch_id] = last_rew[patch_id]

d = '_'

forager = PatchForager(travel_time, reward_value, a, b, c, d, prob=True, depl_fxn = 'fixed', indep_var = indep_var)
# mvt_optimal = forager.calculate_optimal_stops(patch_list)
# print('Max reward rate:', mvt_optimal['max_reward_rate'])
# print('Stops', mvt_optimal['optimal_stops'])

# Define the strategies and their parameters
strategy_struct = {
    # 'rewards_opt': {'strategy': 'rewards', 'params': {'target_rewards': [1,2,0]}}, #this seems to run forever sometimes, maybe never leaves?
    # 'patch_type': {'strategy': 'patch_type', 'params': {'target_patches': [{'target_rewards': 3},
    #                                                                         {'target_stops': 6},
    #                                                                         {'target_stops': 0}]}},
    'stops_opt': {'strategy': 'stops', 'params': {'target_stops': [1,5,0]}},
    'failures_opt': {'strategy': 'failures', 'params': {'max_failures': [0,3,0]}},
    'consec_failures_opt': {'strategy': 'consec_failures', 'params': {'consec_failures': [1,3,0]}},
}

# Create an HDF5 file
with h5py.File('data/data.h5', 'w') as hf:
    for i in range(50):
        # Create a group for this simulation
        sim_group = hf.create_group(f'simulation_{i}')
        
        # Run the simulation for each strategy
        for strategy_name, strategy_info in strategy_struct.items():
            data, _ = forager.run_simulation(strategy_info['strategy'], patch_list, **strategy_info['params'])
            
            df_sum.loc[len(df_sum)] = [i, strategy_name, _] 
                       
            # Save results
            data = data.replace({None: np.nan})
            dataset = sim_group.create_dataset(strategy_name, data=data.to_numpy())
            # Save column names as attributes
            dataset.attrs['columns'] = data.columns.tolist()

In [None]:
# # df_cum=pd.DataFrame()
# value = df_sum.loc[df_sum['strategy'] == 'stops_opt'].reward_rate.mean()
# df_sum['reward_rate'] = df_sum['reward_rate'] - value
# df_sum['travel_time'] = travel_time
# df_sum['patches'] = indep_var+'_'+str(num_rew[0])+'_'+str(num_rew[1])
# # df_cum = pd.concat([df_sum, df_cum])

# fig, ax = plt.subplots(figsize=(6, 4))
# sns.boxplot(x ='strategy', y='reward_rate', data=df_sum, hue='patches')
# plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

In [None]:
# Function to read data from HDF5 file
def read_h5_data(file_path, simulation_number, strategy):
    with h5py.File(file_path, 'r') as hf:
        dataset = hf[f'simulation_{simulation_number}/{strategy}']
        return pd.DataFrame(dataset[:], columns=dataset.attrs['columns'])

# Set up the plotting style
plt.style.use('ggplot')

# File path
h5_file_path = 'data/data.h5'

# Get a list of distinct colors from matplotlib colormap
colors = plt.get_cmap('tab10')  # You can change 'tab10' to other colormaps if needed

# Create the color_set dictionary
color_set = {strategy: colors(i) for i, strategy in enumerate(strategy_struct.keys())}

In [None]:
plt.figure(figsize=(6, 4))
for sim_num in range(20):  # Assuming 20 simulations

    for strategy in strategy_struct.keys():
        # Read data for this simulation and strategy
        data = read_h5_data(h5_file_path, sim_num, strategy)
        
        # Plot cumulative reward over time
        plt.plot(data['time'], data['reward'].cumsum(), label=strategy.replace('_', ' ').title(),alpha = .3,
                 lw = 1,color = color_set[strategy])

    plt.xlabel('Time')
    plt.ylabel('Cumulative Reward')
    plt.title(f'Cumulative Reward over Time \n for Different Strategies \n Travel Time: {travel_time}s')
    
    if sim_num==0:
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        
    plt.grid(True)
    plt.tight_layout()
    
    # Save the plot
# plt.savefig(f'figs/cumulative_reward_all.png', bbox_inches='tight', dpi=300)
# plt.close()  # Close the figure to free up memory

In [None]:
# Number of simulations
num_simulations = 20

# Plot: Reward Rate over Time
plt.figure(figsize=(6, 4))
for sim_num in range(num_simulations):
    for strategy in strategy_struct.keys():
        # Read data for this simulation and strategy
        data = read_h5_data(h5_file_path, sim_num, strategy)
        
        # Calculate reward rate
        cumulative_reward = data['reward'].cumsum()
        reward_rate = cumulative_reward / data['time']
        
        # Plot reward rate over time
        plt.plot(data['time'], reward_rate, label=strategy.replace('_', ' ').title() if sim_num == 0 else "", 
                 alpha=0.3, color=color_set[strategy], linewidth = 1)
    if sim_num == 0:
        plt.legend()

plt.xlabel('Time')
plt.ylabel('Reward Rate')
plt.title('Reward Rate over Time for Different Strategies')

plt.grid(True)
plt.tight_layout()

# Save the plot
# plt.savefig('figs/reward_rate_all.png', bbox_inches='tight', dpi=300)

In [None]:
# Number of simulations
num_simulations = 20

# Plot: Time Spent in Each Patch
fig, axs = plt.subplots(1,4, figsize=(12, 4), sharey=True)

for variable, ax_flat in zip(['time_in_patch', 'rewards_in_patch', 'failures_in_patch', 'consecutive_failures'], axs.flatten()):
    ax = ax_flat
    for sim_num in range(num_simulations):
        for strategy in strategy_struct.keys():
            
            # Read data for this simulation and strategy
            data = read_h5_data(h5_file_path, sim_num, strategy)
            
            # Calculate time spent in each patch
            # if variable == 'time_in_patch':
            #     patch_times = data[data['patch_id'] != -1].groupby('patch_id')[variable].max()
            # else:
            #     patch_times = data[data['patch_id'] != -1].groupby('patch_id')[variable].mean()
            
            df_results = data[data['patch_id'] != -1].groupby(['patch_id', 'patch_entry_time'])[variable].max().reset_index()
            patch_times = df_results.groupby(['patch_id'])[variable].mean()            
            # Plot time spent in each patch
            ax.plot(patch_times.index, patch_times.values, label=strategy if sim_num == 0 else "", 
                    marker='o', alpha=0.3, color=color_set[strategy])

            ax.set_xlabel('Patch ID')
            ax.set_xlim([-.9,2.9])
            ax.set_title(variable)
# plt.ylabel('Time Spent in Patch')
# plt.title('Time Spent in Each Patch \n for Different Strategies')
if sim_num == 0:
    plt.legend()
plt.grid(True)
plt.tight_layout()
plt.legend(strategy_struct.keys(), bbox_to_anchor=(2.05,1), loc='upper right')

# plt.savefig(f'figs/patch_stops.png', bbox_inches='tight', dpi=300)

In [None]:
import seaborn as sns
fig, ax = plt.subplots(figsize=(4, 4))
sns.scatterplot(x ='strategy', y='reward_rate', data=df_sum, hue='strategy', palette='tab10')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

### **Simulate and explore within session statistics for different strategies**

In [None]:
class SessionData:
    def __init__(self, simulated_data):
        self.df = simulated_data
        self.variables = ['time_in_patch', 'prob_reward', 'cumulative_patch_reward', 'failures_in_patch', 'consecutive_failures']
    
    def process_last_timesteps(self):
        # Group by patch visit and get the last row of each visit
        grouped = self.df[self.df['patch_id'] != -1].groupby((self.df['patch_id'] != self.df['patch_id'].shift()).cumsum())
        last_timesteps = grouped.last().reset_index(drop=True)
        return last_timesteps
    
    def plot_variables_by_patch(self):
        last_timesteps = self.process_last_timesteps()
        
        # Create a color map for patches
        unique_patches = last_timesteps['patch_id'].unique()
        color_map = plt.get_cmap('tab20')
        color_dict = {patch: color_map(i/len(unique_patches)) for i, patch in enumerate(unique_patches)}
        
        fig, axes = plt.subplots(2, 3, figsize=(14, 8), sharex=True)
        fig.suptitle('Variables at Patch Exit, Colored by Patch', fontsize=16)
        
        for var, ax1 in zip(self.variables, axes.flatten()):
            ax = ax1
            for patch in unique_patches:
                patch_data = last_timesteps[last_timesteps['patch_id'] == patch]
                ax.scatter(patch_data.index, patch_data[var], 
                           c=[color_dict[patch]], label=f'Patch {patch}', s=10)
            
            ax.set_ylabel(var.replace('_', ' ').title())
            ax.grid(True, linestyle='--', alpha=0.3)
            ax.legend()
        
        ax.set_xlabel('Patch Visit Number')
        
        plt.tight_layout()
        plt.show()

    def plot_overall_reward_rate(self):
        # Calculate cumulative sum of rewards
        cumulative_rewards = self.df['reward'].cumsum()
        
        # Use the 'time' column for total time
        total_time = self.df['time']
        
        # Calculate reward rate
        reward_rate = cumulative_rewards / total_time
        
        plt.figure(figsize=(3, 2))
        plt.plot(total_time, reward_rate)
        plt.xlabel('Time')
        plt.ylabel('Overall Reward Rate')
        # plt.title('Overall Reward Rate Throughout the Session')
        plt.grid(True, linestyle='--', alpha=0.7)
        
        plt.tight_layout()
        plt.show()

In [None]:
# Run the simulation for each strategy
results = {}
for strategy_name, strategy_info in strategy_struct.items():
    simulated_data, _ = forager.run_simulation(strategy_info['strategy'], patch_list, **strategy_info['params'])
    results[strategy_name] = data

    print(strategy_name)
    session = SessionData(simulated_data)
    session.plot_variables_by_patch()
    # session.plot_overall_reward_rate()