# Utility functions for the data analysis

**Copyright 2023 (c) Naomi Chaix-Echel & Nicolas P Rougier**  
Released under a BSD 2-clauses license

This notebook gather common functions used in other notenook. It can be used by typing:

> `%run "00-common.ipynb"`

at the top of a notebook.

## Import packages

In [1]:
import json                          # JSON operations
import datetime                      # Time operations
import numpy as np                   # Array operations
import pandas as pd                  # Database operations
import matplotlib.pyplot as plt      # Figures
from tqdm.notebook import tqdm       # Progress bar
from scipy.optimize import curve_fit # Curve fit
from scipy.optimize import minimize  # Minimize function

## Select subject[s] and task[s]

In [2]:
def select_trials(data, subject_id=None, task_id=None):
    """
    Select all the trials for given individual(s) (subject_id) and task(s) (task_id).
    subject_id must be a subgroup of subject_ids, task_id mist be a subgroup of task_ids.
    
    Parameters:
    -----------
    
    data : dataframe
      Database
    
    subject_id: string or list
      subjects to be selected (ID)
      
    task_id: int or list
      tasks to be selected (ID)
      
    Return:
    -------

    A dataframe containing subject_id(s) and task_id(s)
    """
    
    if isinstance(subject_id, str):
        subject_id = [subject_id]
        
    if isinstance(task_id, int):
        task_id = [task_id]
        
    if subject_id is not None and task_id is not None:
        return data.loc[(data['task_id'].isin(task_id)) & (data['subject_id'].isin(subject_id))]
    elif subject_id is not None:
        return data.loc[(data['subject_id'].isin(subject_id))]
    elif task_id is not None:
        return data.loc[(data['task_id'].isin(task_id))]
    else:
        return data


## Filter subjects based on their day bias

In [3]:
subjects_bias = {}

def filter_subjects(data, bias=0.4):
    """
    Filter valid subjects by excluding weeks with bias greater than the specified threshold.
    If an individual has remaining data after excluding biased days, include the individual in valid_ids.
    Otherwise, include the individual in reject_ids.
    Return the number of biased days for each individual.
    
    Parameters:
    -----------
    data : dataframe
        Database
    bias : float 
        Maximum allowed day bias
    
    Return:
    -------
    A tuple of valid ID and rejected ID lists, and a dictionary with the count of biased days for each subject
    """

    subject_ids = data['subject_id'].unique()
    valid_ids, reject_ids = [], []
    biased_days_count = {}
    

    for subject_id in subject_ids:
        # Group data by subject and day, and calculate the mean bias for each day
        daily_bias = data[data['subject_id'] == subject_id].groupby(['subject_id', pd.Grouper(key='date', freq='D')])['response'].mean() - 0.5

        # Identify days with bias greater than the specified threshold
        biased_days = daily_bias[abs(daily_bias) > bias].index.get_level_values('date')

        # Exclude rows corresponding to biased days for the current subject
        data = data[~((data['subject_id'] == subject_id) & (data['date'].isin(biased_days)))]

        # Check if the individual has remaining data after excluding biased days
        if not data.empty:
            valid_ids.append(subject_id)
            biased_days_count[subject_id] = len(biased_days)
        else:
            reject_ids.append(subject_id)
            biased_days_count[subject_id] = 0

    return valid_ids, reject_ids, biased_days_count, data


## Task identification by condition & outcome

In [4]:
from enum import Enum

Condition = Enum("Condition", ["same_p", "same_v", "tradeoff"])
Outcome = Enum("Outcome", ["gain", "loss", "both"])
_task_description = {
    0 : (Condition.tradeoff, Outcome.both),
    1 : (Condition.same_p,   Outcome.both),
    2 : (Condition.same_p,   Outcome.gain),
    3 : (Condition.same_p,   Outcome.loss),
    4 : (Condition.same_v,   Outcome.gain),
    5 : (Condition.same_v,   Outcome.loss),
    6 : (Condition.tradeoff, Outcome.gain),
    7 : (Condition.tradeoff, Outcome.loss) }

def get_task_description(task_id):
    """
    Description of a task in terms of condition and outcome.
    
    Parameters:
    -----------
    
    task_id : integer
      Identification of the task
    
    Return:
    -------
    
      condition and outcome
    """
    
    return _task_description[task_id]


def get_task_id(condition, outcome):
    """
    Identify a task for given contidition and outcome.
    
    Parameters:
    -----------
    
    condition: Condition
      One of Condition.same_p, Condition.same_v or Condition.tradeoff
      
    outcome : Outcome
      One of Outcome.gain, Outcome.loss or Outcome.both
    
    
    Return:
    -------
    
      ID of the task with given condition and outcome
    """
    
    for key, value in _task_description.items():
        if value == (condition, outcome):
            return key
    return None


## Convert left/right trials to risky/safe trials

In [5]:
def convert_trials(data, subject_id=None, task_id=None):
    """
    Convert trials from left/right to risky/safe for given individual(s) (subject_id)
    and task(s) (task_id). subject_id must be a subgroup of subject_ids, task_id mist
    be a subgroup of task_ids.
    
    Parameters:
    -----------
    
    data : dataframe
      Database
    
    subject_id: string or list
      subjects to be selected (ID)
      
    task_id: int or list
      tasks to be selected (ID)
      
    Return:
    -------

    A converted and renamed dataframe (left/right replaced with risky/safe)
    """
    
    trials = select_trials(data, subject_id, task_id).copy()
    trials["bias"] = 0.0
    
    # We compute and store the left/right bias since it can be later used
    # for fitting and thus need to be transformed according to the risky/safe
    # paradigm. The bias is computed over all the tasks (i.e. not restricted
    # to the to b converted task_ids)
    for i, sid in enumerate(trials["subject_id"].unique()):
        T = select_trials(data, subject_id)

        # Right bias for task_id only
        B = len(T.loc[(T['response']==1)])/len(T) - 0.5
        trials.loc[(trials['subject_id'] == sid), "bias"] = B

        # Right bias over all trials
        # trials.loc[(trials['subject_id'] == sid), "bias"] = subjects_bias[sid]
        
        
    P1_left, V1_left = trials['P1_left'], trials['V1_left']
    P1_right, V1_right = trials['P1_right'], trials['V1_right']
    
    I = P1_right < P1_left
    P_risky = np.where(I, P1_right, P1_left)
    V_risky = np.where(I, V1_right, V1_left)
    P_safe = np.where(I, P1_left, P1_right)
    V_safe = np.where(I, V1_left, V1_right)
    R = np.where(I, trials['response'], 1-trials['response'])
    B = np.where(I, trials['bias'], -trials['bias'])
    
    trials = trials.rename(columns={ "P1_left"  : "P_risky",
                                     "V1_left"  : "V_risky",
                                     "P1_right" : "P_safe",
                                     "V1_right" : "V_safe" })
    trials["P_risky"] = P_risky
    trials["V_risky"] = V_risky
    trials["P_safe"] = P_safe
    trials["V_safe"] = V_safe

    
    trials["response"] = R
    trials["bias"] = B
    
    return trials