In [2]:
import numpy as np
import pandas as pd
import scipy
import random
import matplotlib.pyplot as plt
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D

from tools import depletion_func, calc_total_reward_rate, moving_window_avg

In [3]:
# Example usage
type_patches = 2
travel_time = 3
site_stops = [1,9]
reward_value = [5,5]
num_patches = 400

#Three alternative ways to generate a list of patches
patch_list = range(type_patches) #equal distribution of patches
patch_list = [random.randint(0, 1) for _ in range(num_patches)] #randomly generated list of patched
# patch_list = data # import data from session {To Be Implemented..}

# Initial probabilities of reward in each patch
a = [.9, .4]
b = [2.76, 2.76]
c = [.1278, .1278]
d = [0, 0]

total_reward_rate = calc_total_reward_rate(patch_list, travel_time, site_stops, reward_value, a, b, c, d)
print(f"Total reward rate: {total_reward_rate}")

Total reward rate: 0.9886207099272325


In [4]:
class PatchForager:
    def __init__(self, travel_time, reward_value, a, b, c, d, prob=False):
        self.travel_time = travel_time
        self.reward_value = reward_value
        self.a = a
        self.b = b
        self.c = c
        self.d = d
        self.prob = prob

    def depletion_func(self, patch_id, t):
        return self.a[patch_id] * (self.b[patch_id] ** (-self.c[patch_id]*t)+self.d[patch_id])

    def gen_bern(self, p):
        return 1 if random.random() < p else 0

    def forage_mvt(self, patch_list, target_reward_rate):
        total_reward = 0
        total_time = 0
        for patch_id in patch_list:
            patch_reward = 0
            t = 0
            while True:
                prob_reward = self.depletion_func(patch_id, t)
                if self.prob:
                    instantaneous_rate = self.gen_bern(prob_reward) * self.reward_value[patch_id]
                else:
                    instantaneous_rate = prob_reward * self.reward_value[patch_id]
                
                patch_reward += instantaneous_rate
                t += 1
                
                current_rate = patch_reward / (t + self.travel_time)
                if current_rate < target_reward_rate:
                    break
            
            total_reward += patch_reward
            total_time += t + self.travel_time
        
        return total_reward, total_time

    def forage_fixed_rewards(self, patch_list, target_rewards): #The probabilistic version of this doesn't run seemingly because sometimes the agent never gets reward and never leaves a patch and it runs forever
        total_reward = 0
        total_time = 0
        for patch_id in patch_list:
            patch_reward = 0
            rewards_obtained = 0
            t = 0
            while rewards_obtained < target_rewards:
                prob_reward = self.depletion_func(patch_id, t)
                if self.prob:
                    reward = self.gen_bern(prob_reward) * self.reward_value[patch_id]
                else:
                    reward = prob_reward * self.reward_value[patch_id]
                
                patch_reward += reward
                if reward > 0:
                    rewards_obtained += 1
                t += 1
            
            total_reward += patch_reward
            total_time += t + self.travel_time
        
        return total_reward, total_time

    def forage_fixed_failures(self, patch_list, max_failures):
        total_reward = 0
        total_time = 0
        for patch_id in patch_list:
            patch_reward = 0
            patch_failures = 0
            t = 0
            while patch_failures < max_failures:
                prob_reward = self.depletion_func(patch_id, t)
                if self.prob:
                    reward = self.gen_bern(prob_reward) * self.reward_value[patch_id]
                else:
                    reward = prob_reward * self.reward_value[patch_id]
                
                patch_reward += reward
                if reward > 0:
                    patch_failures = 0
                else:
                    patch_failures += 1
                t += 1
            
            total_reward += patch_reward
            total_time += t + self.travel_time
        
        return total_reward, total_time

In [17]:
# Example usage
type_patches = 2
travel_time = 3
site_stops = [1,9]
reward_value = [5,5]
num_patches = 100

#Three alternative ways to generate a list of patches
patch_list = range(type_patches) #equal distribution of patches
patch_list = [random.randint(0, 1) for _ in range(num_patches)] #randomly generated list of patched
# patch_list = data # import data from session {To Be Implemented..}

# Initial probabilities of reward in each patch
a = [.9, .4]
b = [2.76, 2.76]
c = [.1278, .1278]
d = [0, 0]

forager = PatchForager(travel_time, reward_value, a, b, c, d, prob=False)

# Marginal Value Theorem
mvt_reward, mvt_time = forager.forage_mvt(patch_list, target_reward_rate=1)
print(f"MVT Strategy: Total Reward = {mvt_reward}, Total Time = {mvt_time}, Total Rate = {mvt_reward/mvt_time}")

# Fixed number of failures
fixed_failures_reward, fixed_failures_time = forager.forage_fixed_failures(patch_list, max_failures=3)
print(f"Fixed Failures Strategy: Total Reward = {fixed_failures_reward}, Total Time = {fixed_failures_time}, Total Rate = {fixed_failures_reward/fixed_failures_time}")

# Fixed number of rewards
fixed_rewards_reward, fixed_rewards_time = forager.forage_fixed_rewards(patch_list, target_rewards=1)
print(f"Fixed Rewards Strategy: Total Reward = {fixed_rewards_reward}, Total Time = {fixed_rewards_time}, Total Rate = {fixed_rewards_reward/fixed_rewards_time}")


MVT Strategy: Total Reward = 1857.5726841735134, Total Time = 1984, Total Rate = 0.9362765545229402
Fixed Failures Strategy: Total Reward = 2629.8071392822485, Total Time = 574484, Total Rate = 0.004577685608793715
Fixed Rewards Strategy: Total Reward = 320.0, Total Time = 400, Total Rate = 0.8


In [18]:
def run_simulation(forager, strategy, patch_list, **strategy_params):
    data = []
    total_time = 0
    patch_entry_time = 0
    
    for patch_id in patch_list:
        t = 0
        patch_reward = 0
        rewards_in_patch = 0
        failures_in_patch = 0
        
        while True:
            prob_reward = forager.depletion_func(patch_id, t)
            if forager.prob:
                reward = forager.gen_bern(prob_reward) * forager.reward_value[patch_id]
            else:
                reward = prob_reward * forager.reward_value[patch_id]
            
            patch_reward += reward
            total_time += 1
            t += 1
            
            if reward > 0:
                rewards_in_patch += 1
                failures_in_patch = 0
            else:
                failures_in_patch += 1
            
            data.append({
                'time': total_time,
                'patch_id': patch_id,
                'time_in_patch': t,
                'reward': reward,
                'cumulative_patch_reward': patch_reward,
                'prob_reward': prob_reward,
                'rewards_in_patch': rewards_in_patch,
                'failures_in_patch': failures_in_patch,
                'patch_entry_time': patch_entry_time
            })
            
            # Check exit condition based on strategy
            if strategy == 'mvt':
                current_rate = patch_reward / (t + forager.travel_time)
                if current_rate < strategy_params['target_reward_rate']:
                    break
            elif strategy == 'fixed_rewards':
                if rewards_in_patch >= strategy_params['target_rewards']:
                    break
            elif strategy == 'fixed_failures':
                if failures_in_patch >= strategy_params['max_failures']:
                    break
        
        # Add travel time
        for _ in range(forager.travel_time):
            total_time += 1
            data.append({
                'time': total_time,
                'patch_id': -1,  # -1 indicates traveling
                'time_in_patch': 0,
                'reward': 0,
                'cumulative_patch_reward': 0,
                'prob_reward': 0,
                'rewards_in_patch': 0,
                'failures_in_patch': 0,
                'patch_entry_time': None
            })
        
        patch_entry_time = total_time + 1

    return pd.DataFrame(data)

In [19]:
forager = PatchForager(travel_time, reward_value, a, b, c, d, prob=False)

mvt_data = run_simulation(forager, 'mvt', patch_list, target_reward_rate=0.05)
fixed_rewards_data = run_simulation(forager, 'fixed_rewards', patch_list, target_rewards=5)
fixed_failures_data = run_simulation(forager, 'fixed_failures', patch_list, max_failures=3)

# Save to CSV for later analysis
mvt_data.to_csv('mvt_simulation_data.csv', index=False)
fixed_rewards_data.to_csv('fixed_rewards_simulation_data.csv', index=False)
fixed_failures_data.to_csv('fixed_failures_simulation_data.csv', index=False)