In [1]:
from cognitive_models import *
import unittest
import numpy as np
from numpy.typing import NDArray
import copy
import matplotlib.pyplot as plt
from joblib import Parallel, delayed
import numpy as np
import matplotlib.pyplot as plt
from typing import Any, Dict, Tuple, List, Union, Callable
from typing import List
from numpy.typing import NDArray
from ast import literal_eval
import pandas as pd

# Fit probalistic Reward Learning Task

## Load all Real Data

In [None]:
PathTofile = "/media/mohammad/New Volume/DoctoralSharif/Articles/RL and MDD/git/RL-and-DD/Data/BehvioralData/Subjects_Performance.csv"
# Load CSV file
df = pd.read_csv(PathTofile)


## Fit to Real Data(all subjects)

### Create Custom Dataload for Data fitting package

In [7]:
def custom_task_loader(file_path: str,
                      task_col: str = 'Task', subject_group:str = 'D', **kwargs) -> List[NDArray]:
    """
    Custom data loader for CSV files with a Task column containing string lists of
    [stimulus, reaction_time, choice, reward] arrays.
    
    Parameters:
    - file_path (str): Path to the CSV file.
    - subject_col (str): Column name for subject IDs (used for grouping).
    - task_col (str): Column name for Task data (string lists of T*4 arrays).
    - **kwargs: Additional arguments for pandas.read_csv.
    
    Returns:
    - List[NDArray]: List of per-subject data arrays (shape (n_trials, 3)).
      Each NDArray contains [stimulus, choice, reward] columns. The order of arrays
      matches the order of subjects in the file.
    """
    df = pd.read_csv(file_path, **kwargs)
    df = df.drop(37)

    if subject_group == 'CTL':
       df = df[df["BDI"]<=7] 
    elif subject_group == "D":
       df = df[df["BDI"]>=13] 



       
    
    required_cols = [task_col]
    missing_cols = [col for col in required_cols if col not in df.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")
    
    df = df.dropna(subset=required_cols)
    if df.empty:
        raise ValueError("No valid data after removing missing values")
    
    behavioral_data: List[NDArray] = []
    
    for _, row in df.iterrows():
        try:
            task_data = np.array(literal_eval(row[task_col]), dtype=float)
        except Exception as e:
            raise ValueError(f"Failed to parse Task data for subject {row[task_col]}: {e}")
        
        if task_data.ndim != 2 or task_data.shape[1] != 4:
            raise ValueError(f"Task data for subject {row[task_col]} must be T*4, got shape {task_data.shape}")
        
        # Extract [stimulus, choice, reward] (indices 0, 2, 3)
        data = task_data[:, [0, 2, 3]].astype(int)
        behavioral_data.append(data)
    
    if not behavioral_data:
        raise ValueError("No valid subject data found")
    
    return behavioral_data


len(custom_task_loader(PathTofile,'Task','CTL'))

73

### Fitting Parallelized Apply

In [None]:
# Setup environment
config_path = "cognitive_models/tasks/mdp_pl_config.json"  # Adjust path as needed
try:
    CONTEXTS, TRANSITIONS = parse_mdp_config(config_path)
except FileNotFoundError:
    raise FileNotFoundError(f"Configuration file not found: {config_path}")

class StateTask(State):
    def __init__(self, name: str=None):
        super().__init__(name)
class StateQ(State):
    def __init__(self, name: str=None, Q: NDArray[np.float64]=None):
        super().__init__(name)
        self.Q = Q


# Create task and agent states(task state are real states and agents states are the representation of task states in agent)
# Here agent states has additional Q attribute containing the value of each action potential for that states.
StatesTask = np.array([State(name=k) for k in CONTEXTS.keys()])
task_mdp = PL(StatesTask, CONTEXTS, TRANSITIONS)
StatesAgent = np.array([StateQ(k, np.zeros((2,))) for k in CONTEXTS.keys()])

# Initialize agent and environment
agent = QL1_RL(task_mdp, StatesAgent, alphaP=0.1, alphaN=0.1)
env = MDPEnvironment(agent, task_mdp)

# Initialize EMGuassian with custom data loader
em = EMGuassian(
    environment=env,
    num_params=2,  # Assuming 2 parameters (e.g., alpha, gamma)
    num_iteration_em=20,  # Small number for testing
    num_iteration_gradient_descent=10000,  # Reduced for faster testing
    learning_rate=0.01,
    tol=1e-6,
    data_loader=custom_task_loader)

try:
    EstimatedData = em.fit(
            behavioral_data=PathTofile,
            task_col="Task"
        )
except Exception as e:
        print(f"Model fitting failed: {e}")

### Save Estimated Data

In [None]:
import pickle
# This path is just an example.
with open("/media/mohammad/New Volume/DoctoralSharif/Articles/RL and MDD/FittedData/total_dp_data.pkl", "wb") as file:
    pickle.dump(EstimatedData, file)

## Fit Availabel Subjects

### Load Available Subjects

In [8]:
A = np.load("/media/mohammad/New Volume/DoctoralSharif/Articles/RL and MDD/FittedData/AvailableSubjects.npy")

In [9]:
def custom_task_loader(file_path: str,available: NDArray = A[1,:], 
                      task_col: str = 'Task', subject_group:str = 'D',**kwargs) -> List[NDArray]:
    """
    Custom data loader for CSV files with a Task column containing string lists of
    [stimulus, reaction_time, choice, reward] arrays.
    
    Parameters:
    - file_path (str): Path to the CSV file.
    - subject_col (str): Column name for subject IDs (used for grouping).
    - task_col (str): Column name for Task data (string lists of T*4 arrays).
    - **kwargs: Additional arguments for pandas.read_csv.
    
    Returns:
    - List[NDArray]: List of per-subject data arrays (shape (n_trials, 3)).
      Each NDArray contains [stimulus, choice, reward] columns. The order of arrays
      matches the order of subjects in the file.
    """
    df = pd.read_csv(file_path, **kwargs)
    df = df.drop(37)

    if subject_group == 'CTL':
       df = df[df["BDI"]<=7] 
    elif subject_group == "D":
       df = df[df["BDI"]>=13] 

    df = df[df["ID"].isin(available)]
       
    
    required_cols = [task_col]
    missing_cols = [col for col in required_cols if col not in df.columns]
    if missing_cols:
        raise ValueError(f"Missing required columns: {missing_cols}")
    
    df = df.dropna(subset=required_cols)
    if df.empty:
        raise ValueError("No valid data after removing missing values")
    
    behavioral_data: List[NDArray] = []
    
    for _, row in df.iterrows():
        try:
            task_data = np.array(literal_eval(row[task_col]), dtype=float)
        except Exception as e:
            raise ValueError(f"Failed to parse Task data for subject {row[task_col]}: {e}")
        
        if task_data.ndim != 2 or task_data.shape[1] != 4:
            raise ValueError(f"Task data for subject {row[task_col]} must be T*4, got shape {task_data.shape}")
        
        # Extract [stimulus, choice, reward] (indices 0, 2, 3)
        data = task_data[:, [0, 2, 3]].astype(int)
        behavioral_data.append(data)
    
    if not behavioral_data:
        raise ValueError("No valid subject data found")
    
    return behavioral_data

len(custom_task_loader(PathTofile))

44

In [None]:
# Setup environment
config_path = "cognitive_models/tasks/mdp_pl_config.json"  # Adjust path as needed
try:
    CONTEXTS, TRANSITIONS = parse_mdp_config(config_path)
except FileNotFoundError:
    raise FileNotFoundError(f"Configuration file not found: {config_path}")

class StateTask(State):
    def __init__(self, name: str=None):
        super().__init__(name)
class StateQ(State):
    def __init__(self, name: str=None, Q: NDArray[np.float64]=None):
        super().__init__(name)
        self.Q = Q


# Create task and agent states(task state are real states and agents states are the representation of task states in agent)
# Here agent states has additional Q attribute containing the value of each action potential for that states.
StatesTask = np.array([State(name=k) for k in CONTEXTS.keys()])
task_mdp = PL(StatesTask, CONTEXTS, TRANSITIONS)
StatesAgent = np.array([StateQ(k, np.zeros((2,))) for k in CONTEXTS.keys()])

# Initialize agent and environment
agent = QL1_RL(task_mdp, StatesAgent, alphaP=0.1, alphaN=0.1)
env = MDPEnvironment(agent, task_mdp)

# Initialize EMGuassian with custom data loader
em = EMGuassian(
    environment=env,
    num_params=2,  # Assuming 2 parameters (e.g., alpha, gamma)
    num_iteration_em=20,  # Small number for testing
    num_iteration_gradient_descent=10000,  # Reduced for faster testing
    learning_rate=0.01,
    tol=1e-6,
    data_loader=custom_task_loader)

try:
    EstimatedData = em.fit(
            behavioral_data=PathTofile,
            task_col="Task"
        )
except Exception as e:
        print(f"Model fitting failed: {e}")

### Save Estimate Data

In [None]:
import pickle
with open("/media/mohammad/New Volume/DoctoralSharif/Articles/RL and MDD/FittedData/available_dp_data.pkl", "wb") as file:
    pickle.dump(EstimatedData, file)