# **Task Sequence Generation**

This notebook generates reversal learning task sequences:
- **reversal_ab**: Two-stimulus reversal learning task
- **reversal_abc**: Three-stimulus reversal learning task (Sandra's task) with C having random 50% reward

Sequences are saved as pickle files for use with Gymnasium environments.


In [221]:
import pickle
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder

# Set random seed for reproducibility
np.random.seed(42)


---
Define Stimuli and Rewards


In [222]:
# Define stimulus identities and reward values
stimuli = {"A": 0, "B": 1, "C": 2}
rewards = {"no_reward": 0, "reward": 1}

def generate_random_reward(reward1, reward2, prob):
    """Generate random reward with given probability."""
    if np.random.rand(1) < prob:
        return reward1
    else:
        return reward2


---
Generate Reversal AB Task Sequence


In [223]:
# Parameters for reversal AB task
num_pre_reversal_trials = 100  # A rewarded, B not
num_post_reversal_trials = 100  # A not rewarded, B rewarded

reversal_ab = {
    "stimuli": [],
    "rewards": [],
    "masks": {"reversal": []}
}

# Pre-reversal phase: A rewarded, B not
for _ in range(num_pre_reversal_trials):
    stim = np.random.choice([stimuli["A"], stimuli["B"]], p=[0.5, 0.5])
    if stim == stimuli["A"]:
        reward = rewards["reward"]
    else:
        reward = rewards["no_reward"]
    
    reversal_ab["stimuli"].append(stim)
    reversal_ab["rewards"].append(reward)
    reversal_ab["masks"]["reversal"].append(0)

# Post-reversal phase: A not rewarded, B rewarded
for _ in range(num_post_reversal_trials):
    stim = np.random.choice([stimuli["A"], stimuli["B"]], p=[0.5, 0.5])
    if stim == stimuli["A"]:
        reward = rewards["no_reward"]
    else:
        reward = rewards["reward"]
    
    reversal_ab["stimuli"].append(stim)
    reversal_ab["rewards"].append(reward)
    reversal_ab["masks"]["reversal"].append(1)

print(f"Generated reversal_ab sequence: {len(reversal_ab['stimuli'])} timesteps")
print(f"  Pre-reversal: {num_pre_reversal_trials} trials")
print(f"  Post-reversal: {num_post_reversal_trials} trials")


Generated reversal_ab sequence: 200 timesteps
  Pre-reversal: 100 trials
  Post-reversal: 100 trials


---
Generate Reversal ABC Task Sequence


In [224]:
# Parameters for reversal ABC task
num_expected_occurrences = 4000  # Before reversal
num_unexpected_occurrences = 4000  # After reversal

reversal_abc = {
    "stimuli": [],
    "rewards": [],
    "masks": {"reversal": []}
}

# Pre-reversal phase: A rewarded, B not, C random 50%
for _ in range(num_expected_occurrences):
    stim = np.random.choice([stimuli["A"], stimuli["B"], stimuli["C"]], p=[1/3, 1/3, 1/3])
    
    if stim == stimuli["A"]:
        reward = rewards["reward"]
    elif stim == stimuli["B"]:
        reward = rewards["no_reward"]
    else:  # C
        reward = generate_random_reward(rewards["reward"], rewards["no_reward"], prob=0.5)
    
    reversal_abc["stimuli"].append(stim)
    reversal_abc["rewards"].append(reward)
    reversal_abc["masks"]["reversal"].append(0)

# Post-reversal phase: A not rewarded, B rewarded, C still random 50%
for _ in range(num_unexpected_occurrences):
    stim = np.random.choice([stimuli["A"], stimuli["B"], stimuli["C"]], p=[1/3, 1/3, 1/3])
    
    if stim == stimuli["A"]:
        reward = rewards["no_reward"]
    elif stim == stimuli["B"]:
        reward = rewards["reward"]
    else:  # C
        reward = generate_random_reward(rewards["reward"], rewards["no_reward"], prob=0.5)
    
    reversal_abc["stimuli"].append(stim)
    reversal_abc["rewards"].append(reward)
    reversal_abc["masks"]["reversal"].append(1)

print(f"Generated reversal_abc sequence: {len(reversal_abc['stimuli'])} timesteps")
print(f"  Pre-reversal: {num_expected_occurrences} trials")
print(f"  Post-reversal: {num_unexpected_occurrences} trials")


Generated reversal_abc sequence: 8000 timesteps
  Pre-reversal: 4000 trials
  Post-reversal: 4000 trials


---
Convert to One-Hot Encoded States


In [225]:
# Create one-hot encoders
# For reversal_ab: 4 states (A, B, unrewarded, rewarded)
# For reversal_abc: 5 states (A, B, C, unrewarded, rewarded)

# Reversal AB: state sequence includes stimulus and reward states
state_map_ab = {"A": 0, "B": 1, "unrewarded": 2, "rewarded": 3}
state_sequence_ab = []

for stim, reward in zip(reversal_ab["stimuli"], reversal_ab["rewards"]):
    # Add stimulus state
    if stim == stimuli["A"]:
        state_sequence_ab.append(state_map_ab["A"])
    else:
        state_sequence_ab.append(state_map_ab["B"])
    
    # Add reward state based on action outcome
    # (In actual task, this depends on whether agent licks)
    # For now, add reward availability state
    if reward == rewards["reward"]:
        state_sequence_ab.append(state_map_ab["rewarded"])
    else:
        state_sequence_ab.append(state_map_ab["unrewarded"])

# One-hot encode
encoder_ab = OneHotEncoder(sparse_output=False, categories=[range(4)])
state_sequence_ohe_ab = encoder_ab.fit_transform(np.array(state_sequence_ab).reshape(-1, 1))

# Reversal ABC: similar but with C
state_map_abc = {"A": 0, "B": 1, "C": 2, "unrewarded": 3, "rewarded": 4}
state_sequence_abc = []

for stim, reward in zip(reversal_abc["stimuli"], reversal_abc["rewards"]):
    # Add stimulus state
    if stim == stimuli["A"]:
        state_sequence_abc.append(state_map_abc["A"])
    elif stim == stimuli["B"]:
        state_sequence_abc.append(state_map_abc["B"])
    else:
        state_sequence_abc.append(state_map_abc["C"])
    
    # Add reward state
    if reward == rewards["reward"]:
        state_sequence_abc.append(state_map_abc["rewarded"])
    else:
        state_sequence_abc.append(state_map_abc["unrewarded"])

# One-hot encode
encoder_abc = OneHotEncoder(sparse_output=False, categories=[range(5)])
state_sequence_ohe_abc = encoder_abc.fit_transform(np.array(state_sequence_abc).reshape(-1, 1))

print(f"Reversal AB: {len(state_sequence_ohe_ab)} states (one-hot encoded)")
print(f"Reversal ABC: {len(state_sequence_ohe_abc)} states (one-hot encoded)")

# Calculate phase boundaries for reversal_abc
phase_boundaries_abc = {
    "reversal_points": [num_expected_occurrences * 2],  # Reversal point at end of pre-reversal phase
    "pre_reversal": {
        "start": 0,
        "end": num_expected_occurrences * 2
    },
    "post_reversal": {
        "start": num_expected_occurrences * 2,
        "end": (num_expected_occurrences + num_unexpected_occurrences) * 2
    }
}

print(f"\nReversal ABC phase boundaries:")
print(f"  Pre-reversal: timesteps {phase_boundaries_abc['pre_reversal']['start']} to {phase_boundaries_abc['pre_reversal']['end']}")
print(f"  Post-reversal: timesteps {phase_boundaries_abc['post_reversal']['start']} to {phase_boundaries_abc['post_reversal']['end']}")
print(f"  Reversal point: {phase_boundaries_abc['reversal_points']}")

Reversal AB: 400 states (one-hot encoded)
Reversal ABC: 16000 states (one-hot encoded)

Reversal ABC phase boundaries:
  Pre-reversal: timesteps 0 to 8000
  Post-reversal: timesteps 8000 to 16000
  Reversal point: [8000]


---
Generate Multi-Reversal ABC Task Sequence


In [None]:
# Parameters for multi-reversal ABC task
# Specify number of trials for each reversal phase
# A and B reverse: A rewarded -> B rewarded -> A rewarded -> B rewarded -> ...
# C always has random 50% reward
phase_trials = [2000, 2000, 2000, 2000]  # 4 phases: A rewarded, B rewarded, A rewarded, B rewarded
# phase_trials = [1000, 1000, 1000, 1000]  # 4 phases: A rewarded, B rewarded, A rewarded, B rewarded

reversal_abc_multi = {
    "stimuli": [],
    "rewards": [],
    "masks": {"reversal": []}
}

# Generate sequence with multiple reversals
for phase_idx, num_trials in enumerate(phase_trials):
    # Determine which stimulus is rewarded in this phase
    # Even phases (0, 2, 4...): A rewarded
    # Odd phases (1, 3, 5...): B rewarded
    a_rewarded = (phase_idx % 2 == 0)
    
    for _ in range(num_trials):
        stim = np.random.choice([stimuli["A"], stimuli["B"], stimuli["C"]], p=[1/3, 1/3, 1/3])
        
        if stim == stimuli["A"]:
            reward = rewards["reward"] if a_rewarded else rewards["no_reward"]
        elif stim == stimuli["B"]:
            reward = rewards["reward"] if not a_rewarded else rewards["no_reward"]
        else:  # C
            reward = generate_random_reward(rewards["reward"], rewards["no_reward"], prob=0.5)
        
        reversal_abc_multi["stimuli"].append(stim)
        reversal_abc_multi["rewards"].append(reward)
        reversal_abc_multi["masks"]["reversal"].append(phase_idx)

print(f"Generated reversal_abc_multi sequence: {len(reversal_abc_multi['stimuli'])} timesteps")
print(f"  Number of phases: {len(phase_trials)}")
for i, num_trials in enumerate(phase_trials):
    phase_type = "A rewarded" if i % 2 == 0 else "B rewarded"
    print(f"  Phase {i} ({phase_type}): {num_trials} trials")


Generated reversal_abc_multi sequence: 4000 timesteps
  Number of phases: 4
  Phase 0 (A rewarded): 1000 trials
  Phase 1 (B rewarded): 1000 trials
  Phase 2 (A rewarded): 1000 trials
  Phase 3 (B rewarded): 1000 trials


---
Convert Multi-Reversal ABC to One-Hot Encoded States


In [227]:
# Reversal ABC Multi: same state mapping as ABC
state_map_abc_multi = {"A": 0, "B": 1, "C": 2, "unrewarded": 3, "rewarded": 4}
state_sequence_abc_multi = []

for stim, reward in zip(reversal_abc_multi["stimuli"], reversal_abc_multi["rewards"]):
    # Add stimulus state
    if stim == stimuli["A"]:
        state_sequence_abc_multi.append(state_map_abc_multi["A"])
    elif stim == stimuli["B"]:
        state_sequence_abc_multi.append(state_map_abc_multi["B"])
    else:
        state_sequence_abc_multi.append(state_map_abc_multi["C"])
    
    # Add reward state
    if reward == rewards["reward"]:
        state_sequence_abc_multi.append(state_map_abc_multi["rewarded"])
    else:
        state_sequence_abc_multi.append(state_map_abc_multi["unrewarded"])

# One-hot encode
encoder_abc_multi = OneHotEncoder(sparse_output=False, categories=[range(5)])
state_sequence_ohe_abc_multi = encoder_abc_multi.fit_transform(np.array(state_sequence_abc_multi).reshape(-1, 1))

print(f"Reversal ABC Multi: {len(state_sequence_ohe_abc_multi)} states (one-hot encoded)")

# Calculate phase boundaries
phase_boundaries = {"reversal_points": []}
cumulative_trials = 0
for i, num_trials in enumerate(phase_trials):
    if i > 0:  # First phase doesn't have a reversal point before it
        phase_boundaries["reversal_points"].append(cumulative_trials * 2)  # Each trial = 2 timesteps
    cumulative_trials += num_trials

# Add phase boundaries for each phase
for i, num_trials in enumerate(phase_trials):
    phase_start = sum(phase_trials[:i]) * 2
    phase_end = sum(phase_trials[:i+1]) * 2
    phase_boundaries[f"phase_{i}"] = {"start": phase_start, "end": phase_end}

print(f"\nPhase boundaries:")
for i in range(len(phase_trials)):
    print(f"  Phase {i}: timesteps {phase_boundaries[f'phase_{i}']['start']} to {phase_boundaries[f'phase_{i}']['end']}")
print(f"  Reversal points: {phase_boundaries['reversal_points']}")


Reversal ABC Multi: 8000 states (one-hot encoded)

Phase boundaries:
  Phase 0: timesteps 0 to 2000
  Phase 1: timesteps 2000 to 4000
  Phase 2: timesteps 4000 to 6000
  Phase 3: timesteps 6000 to 8000
  Reversal points: [2000, 4000, 6000]


---
Save to Pickle Files

In [228]:
# Save reversal_abc_multi
data_abc_multi = {
    "sequence": reversal_abc_multi,
    "state_sequence_ohe": state_sequence_ohe_abc_multi,
    "state_map": state_map_abc_multi,
    "sequence_ohe": state_sequence_ohe_abc_multi,  # Alias for compatibility
    "phase_boundaries": phase_boundaries
}

with open(data_dir / "reversal_abc_multi.pkl", "wb") as f:
    pickle.dump(data_abc_multi, f)
print(f"Saved reversal_abc_multi.pkl to {data_dir}")


Saved reversal_abc_multi.pkl to /Users/pmccarthy/Documents/StrPFC/new_repo/data


In [229]:
# Create data directory if it doesn't exist
data_dir = Path("/Users/pmccarthy/Documents/StrPFC/new_repo/data")
data_dir.mkdir(exist_ok=True)

# Save reversal_ab
data_ab = {
    "sequence": reversal_ab,
    "state_sequence_ohe": state_sequence_ohe_ab,
    "state_map": state_map_ab,
    "sequence_ohe": state_sequence_ohe_ab  # Alias for compatibility
}

with open(data_dir / "reversal_ab.pkl", "wb") as f:
    pickle.dump(data_ab, f)
print(f"Saved reversal_ab.pkl to {data_dir}")

# Save reversal_abc
data_abc = {
    "sequence": reversal_abc,
    "state_sequence_ohe": state_sequence_ohe_abc,
    "state_map": state_map_abc,
    "sequence_ohe": state_sequence_ohe_abc,  # Alias for compatibility
    "phase_boundaries": phase_boundaries_abc
}

with open(data_dir / "reversal_abc.pkl", "wb") as f:
    pickle.dump(data_abc, f)
print(f"Saved reversal_abc.pkl to {data_dir}")

print("\nTask generation complete!")


Saved reversal_ab.pkl to /Users/pmccarthy/Documents/StrPFC/new_repo/data
Saved reversal_abc.pkl to /Users/pmccarthy/Documents/StrPFC/new_repo/data

Task generation complete!
