# Utility functions for the data analysis

**Copyright 2023 (c) Naomi Chaix-Echel & Nicolas P Rougier**  
Released under a BSD 2-clauses license

This notebook gather common functions used in other notenook. It can be used by typing:

> `%run "00-common.ipynb"`

at the top of a notebook.

## Import packages

In [None]:
import json                          # JSON operations
import datetime                      # Time operations
import numpy as np                   # Array operations
import pandas as pd                  # Database operations
import matplotlib.pyplot as plt      # Figures
from tqdm.notebook import tqdm       # Progress bar
from scipy.optimize import curve_fit # Curve fit
from scipy.optimize import minimize  # Minimize function

## Select subject[s] and task[s]

In [None]:
def select_trials(data, subject_id=None, task_id=None):
    """
    Select all the trials for given individual(s) (subject_id) and task(s) (task_id).
    subject_id must be a subgroup of subject_ids, task_id mist be a subgroup of task_ids.
    
    Parameters:
    -----------
    
    data : dataframe
      Database
    
    subject_id: string or list
      subjects to be selected (ID)
      
    task_id: int or list
      tasks to be selected (ID)
      
    Return:
    -------

    A dataframe containing subject_id(s) and task_id(s)
    """
    
    if isinstance(subject_id, str):
        subject_id = [subject_id]
        
    if isinstance(task_id, int):
        task_id = [task_id]
        
    if subject_id is not None and task_id is not None:
        return data.loc[(data['task_id'].isin(task_id)) & (data['subject_id'].isin(subject_id))]
    elif subject_id is not None:
        return data.loc[(data['subject_id'].isin(subject_id))]
    elif task_id is not None:
        return data.loc[(data['task_id'].isin(task_id))]
    else:
        return data


## Filter subjects based on their bias

In [None]:
subjects_bias = {}

def filter_subjects(data, bias=0.4):
    """
    Filter valid subjects according to their bias between
    left and right that needs to be smaller than given bias.
    
    Parameters:
    -----------

    data : dataframe
      Database

    bias: float 
      maximum bias
    
    Return:
    -------
    
    A tuple of valid ID and rejected ID lists
    """

    subject_ids = data['subject_id'].unique()
    valid_ids, reject_ids = [], []
    for i, subject_id in enumerate(subject_ids):
        trials = select_trials(data, [subject_id])
        c0 = len(trials.loc[(trials['response']==0)])/len(trials)
        c1 = len(trials.loc[(trials['response']==1)])/len(trials)
        subjects_bias[subject_id] = c1 - 0.5

        if abs(c0-c1) > bias:
            reject_ids.append(subject_id)
        else:
            valid_ids.append(subject_id)

    return valid_ids, reject_ids


## Task identification by condition & outcome

In [None]:
from enum import Enum

Condition = Enum("Condition", ["same_p", "same_v", "tradeoff"])
Outcome = Enum("Outcome", ["gain", "loss", "both"])
_task_description = {
    0 : (Condition.tradeoff, Outcome.both),
    1 : (Condition.same_p,   Outcome.both),
    2 : (Condition.same_p,   Outcome.gain),
    3 : (Condition.same_p,   Outcome.loss),
    4 : (Condition.same_v,   Outcome.gain),
    5 : (Condition.same_v,   Outcome.loss),
    6 : (Condition.tradeoff, Outcome.gain),
    7 : (Condition.tradeoff, Outcome.loss) }

def get_task_description(task_id):
    """
    Description of a task in terms of condition and outcome.
    
    Parameters:
    -----------
    
    task_id : integer
      Identification of the task
    
    Return:
    -------
    
      condition and outcome
    """
    
    return _task_description[task_id]


def get_task_id(condition, outcome):
    """
    Identify a task for given contidition and outcome.
    
    Parameters:
    -----------
    
    condition: Condition
      One of Condition.same_p, Condition.same_v or Condition.tradeoff
      
    outcome : Outcome
      One of Outcome.gain, Outcome.loss or Outcome.both
    
    
    Return:
    -------
    
      ID of the task with given condition and outcome
    """
    
    for key, value in _task_description.items():
        if value == (condition, outcome):
            return key
    return None


## Convert left/right trials to risky/safe trials

In [None]:
def convert_trials(data, subject_id=None, task_id=None):
    """
    Convert trials from left/right to risky/safe for given individual(s) (subject_id)
    and task(s) (task_id). subject_id must be a subgroup of subject_ids, task_id mist
    be a subgroup of task_ids.
    
    Parameters:
    -----------
    
    data : dataframe
      Database
    
    subject_id: string or list
      subjects to be selected (ID)
      
    task_id: int or list
      tasks to be selected (ID)
      
    Return:
    -------

    A converted and renamed dataframe (left/right replaced with risky/safe)
    """
    
    trials = select_trials(data, subject_id, task_id).copy()
    trials["bias"] = 0.0
    
    # We compute and store the left/right bias since it can be later used
    # for fitting and thus need to be transformed according to the risky/safe
    # paradigm. The bias is computed over all the tasks (i.e. not restricted
    # to the to b converted task_ids)
    for i, sid in enumerate(trials["subject_id"].unique()):
        T = select_trials(data, subject_id)

        # Right bias for task_id only
        B = len(T.loc[(T['response']==1)])/len(T) - 0.5
        trials.loc[(trials['subject_id'] == sid), "bias"] = B

        # Right bias over all trials
        # trials.loc[(trials['subject_id'] == sid), "bias"] = subjects_bias[sid]
        
        
    P_left, V_left = trials['P_left'], trials['V_left']
    P_right, V_right = trials['P_right'], trials['V_right']
    
    I = P_right < P_left
    P_risky = np.where(I, P_right, P_left)
    V_risky = np.where(I, V_right, V_left)
    P_safe = np.where(I, P_left, P_right)
    V_safe = np.where(I, V_left, V_right)
    R = np.where(I, trials['response'], 1-trials['response'])
    B = np.where(I, trials['bias'], -trials['bias'])
    
    trials = trials.rename(columns={ "P_left"  : "P_risky",
                                     "V_left"  : "V_risky",
                                     "P_right" : "P_safe",
                                     "V_right" : "V_safe" })
    trials["P_risky"] = P_risky
    trials["V_risky"] = V_risky
    trials["P_safe"] = P_safe
    trials["V_safe"] = V_safe
    trials["response"] = R
    trials["bias"] = B
    
    return trials