In [9]:
import os
import scipy.io
import numpy as np
import pandas as pd
from scipy.io import loadmat

1. Get the data
%   
%   Use this code to get a data set (array of RTs from a single condition) 
%   to fit, already preprocessed to include correct trials only and remove
%   outliers (including express saccades). See later_getData for details
data = later_getData([], [], 0.2);
RTs = data{1};
clear data

From GetData tab: Each raw data file in data/data_mgl/F has the following vectors (in 
%    each case, columns are individual trials):
%     - decisionSum takes -1 if the decision was left side, and 1 if the 
%        decision was right side.
%     - labelSum takes 1 for trials after change point (TACP) 0, and 2 
%        for TACP 1, and 3 for TACP 2, and 4 for TACP 3, and 5 for TACP 4, 
%        and 0 for the rest. [NOTE FROM JIG: THIS IS HOW MY STUDENT TIM KIM
%        CODED THE DATA, SO I WANT TO KEEP IT IN THIS RAW FORMAT. HOWEVER, 
%        PLEASE NOTE THAT THIS CODING SCHEME SEEMS OVERLY CONFUSING; I
%        WOULD HAVE CODED IT AS 0 FOR TACP=0, 1 FOR TACP=1, ETC]
%     - numdirSum takes -1 if the sound was left side, and 1 if the sound 
%        was right side.
%     - percorrSum is 0 if the subject's answer was incorrect, and 1 
%        if the subject's answer was correct.
%     - syncSum is 1 if the current trial is a "pupil trial" and 0 if 
%        the current trial is "RT trial" [NOTE FROM JIG: IGNORED HERE]
%     - tRxnSum is RT measured by mglGetSecs, where the RT is defined 
%        as the time when the eyes leave the fixation window. 
%        The fixation window was defined as 30% of the height and width of 
%        the screen (32.31cm x 51.69cm).

In [13]:
def later_get_data(subject_tag=None, data_directory=None, express_cutoff=None):
    """
    Retrieve experimental data and associated labels based on the given parameters.

    Parameters:
    subject_tag : str or None
        Identifier for the subject (default is None).
    data_directory : str or None
        Path to the directory containing data files (default is None).
    express_cutoff : float
        Threshold value used to filter or categorize data (default is 0.2).

    Returns:
    tuple
        A tuple containing two elements:
        - data_ : list
            The main data output, such as response times or experimental data.
        - labels_ : list
            Associated labels or metadata related to the data.
    """
    # Set default subject_tag if not provided
    if subject_tag is None or subject_tag == '':
        subject_tag = 'JT'

    # Set default data_directory if not provided
    if data_directory is None or data_directory == '':
        data_directory = os.path.join('/Users/urodas/Library/Mobile Documents/com~apple~CloudDocs/2024 Fall Semester/Quantitative Neuroscience/LATER Model Fitting Data')

    # Set default express_cutoff if not provided
    if express_cutoff is None:
        express_cutoff = 0.0

    # Load the data from the specified subject
    mat_file_path = os.path.join(data_directory, 'data_mgl', 'F', f'{subject_tag}_RT.mat')
    
    try:
        mat_data = scipy.io.loadmat(mat_file_path)
    except FileNotFoundError:
        raise FileNotFoundError(f"Data file not found: {mat_file_path}")

    # Extract data and labels from the loaded .mat file
    # Adjust these based on the actual structure of your .mat file
    data_ = mat_data.get('data_variable_name')  # Replace with the actual variable name in the .mat file
    labels_ = mat_data.get('labels_variable_name')  # Replace with the actual variable name in the .mat file
    
    return data_, labels_



def define_selection_criteria(percorr_sum, t_rxn_sum, express_cutoff):
    """
    Define selection criteria for trials based on correctness and reaction times.

    Parameters:
    percorr_sum : np.ndarray
        An array indicating the correctness of trials (1 for correct, 0 for incorrect).
    t_rxn_sum : np.ndarray
        An array of reaction times for the trials.
    express_cutoff : list or np.ndarray
        A list or array containing the express cutoff values.

    Returns:
    np.ndarray
        A boolean array indicating which trials meet the selection criteria.
    """

    # Ensure express_cutoff is an array-like object for indexing
    express_cutoff = np.array(express_cutoff)

    # Define selection criteria
    L_trials = (percorr_sum == 1) & (t_rxn_sum > express_cutoff[0]) & (t_rxn_sum < 1.2)

    return L_trials


def get_data_sets(t_rxn_sum, l_trials, num_dir_sum, label_sum, return_labels=False):
    """
    Collect four datasets based on reaction times and specified conditions.

    Parameters:
    t_rxn_sum : np.ndarray
        Array of reaction times for the trials.
    l_trials : np.ndarray
        Boolean array indicating which trials meet the selection criteria.
    num_dir_sum : np.ndarray
        Array indicating the direction of choices (-1 for left, 1 for right).
    label_sum : np.ndarray
        Array indicating the labels associated with the trials.
    return_labels : bool
        Whether to return the corresponding labels for the datasets.

    Returns:
    list
        A list containing four datasets corresponding to the specified conditions.
    list (optional)
        A list of labels if return_labels is True.
    """
    
    # Initialize the data list
    data_ = []
    
    # C_L,0: Left choices, change-point trials
    data_.append(t_rxn_sum[l_trials & (num_dir_sum == -1) & (label_sum == 1)])
    
    # C_L,1+: Left choices, non-change-point trials
    data_.append(t_rxn_sum[l_trials & (num_dir_sum == -1) & (label_sum != 1)])
    
    # C_R,0: Right choices, change-point trials
    data_.append(t_rxn_sum[l_trials & (num_dir_sum == 1) & (label_sum == 1)])
    
    # C_R,1+: Right choices, non-change-point trials
    data_.append(t_rxn_sum[l_trials & (num_dir_sum == 1) & (label_sum != 1)])
    
    # Return labels if requested
    if return_labels:
        labels_ = ['Left Choice, No CP', 'Left Choice, CP', 'Right Choice, No CP', 'Right Choice, CP']
        return data_, labels_
    
    return data_

2. Define the objective function
%
% The objective function typically defines the error that you want to 
% minimize between your data and the model predictions. A common objective 
% function is the negative of the sum of the log-likelihoods of the data, 
% given the model parameters. To unpack that for the LATER model:
%
%   1. For each data point (RT from a single trial, in this case) and given
%       set of model parameters, compute the probability of the data, given
%       the model (i.e., the likelihood)
%   2. Take the logarithm
%   3. Sum all these log-likelihoods from all the data points
%   4. Take the negative, because we want to find the minimum (thus
%        corresponding to the maximum likelihood)
%
%   You can define the function simply using an "anonymous function"
%   (https://www.mathworks.com/help/matlab/matlab_prog/anonymous-functions.html), 
%   using this template that assumes that "fits" is a 2x1 vector of
%   [muR, deltaS]:
 
% EXERCISE:
% laterErrFcn = @(fits) <**YOUR OBJECTIVE FUNCTION HERE AS A FUNCTION OF FITS**>;

In [10]:
import numpy as np

def compute_negative_log_likelihood(rt_data, model_parameters):
    """
    Compute the negative log-likelihood of the data given the model parameters.

    Parameters:
    rt_data : np.ndarray
        Array of reaction times for the trials.
    model_parameters : dict
        A dictionary containing the model parameters needed for likelihood calculation.
        Example: {'param1': value1, 'param2': value2, ...}

    Returns:
    float
        The negative log-likelihood value.
    """

    # Initialize the likelihood array
    likelihoods = np.zeros_like(rt_data)

    # Example likelihood computation (customize based on your model)
    for i, rt in enumerate(rt_data):
        # Replace this with the actual likelihood computation based on your model
        # For demonstration, assume a simple normal likelihood:
        mu = model_parameters.get('mu', 0)  # Mean from model parameters
        sigma = model_parameters.get('sigma', 1)  # Standard deviation from model parameters
        
        # Compute the likelihood for the current RT
        likelihoods[i] = (1 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((rt - mu) / sigma) ** 2)

    # Take the logarithm of the likelihoods
    log_likelihoods = np.log(likelihoods + 1e-10)  # Adding a small value to avoid log(0)

    # Sum all log-likelihoods
    total_log_likelihood = np.sum(log_likelihoods)

    # Take the negative to get the negative log-likelihood
    negative_log_likelihood = -total_log_likelihood

    return negative_log_likelihood


3. Define initial conditions
%   
%   For the actual fitting, we will use fmincon
%   (https://www.mathworks.com/help/optim/ug/fmincon.html), which is 
%   "function minimization with constraints." This function allows for 
%   constraints that include upper and lower bounds on the parameters.
%   So here we define those bounds, along with the initial values.
%   We'll use fairly arbitrary values for the lower and upper
%   bounds, but we should pick the initial values more judiciously. HINT: 
%   Recall that the muR and deltaS should be strongly related to 
%   empirical summary statistics of `the (reciprocal) RT distribution.
lowerBounds = [0.001 0.001];
upperBounds = [1000 1000]; 

% EXERCISE:
% initialValues = [<**ADD INITIAL VALUES HERE**>];

In [15]:
import numpy as np
from scipy.optimize import minimize

# Define your negative log-likelihood function
def compute_negative_log_likelihood(params, rt_data):
    mu, sigma = params
    likelihoods = (1 / (sigma * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((rt_data - mu) / sigma) ** 2)
    log_likelihoods = np.log(likelihoods + 1e-10)  # Avoid log(0)
    return -np.sum(log_likelihoods)  # Return negative log-likelihood

# Example reaction time data because I cannot figure out how to extract the data from the given file.
rt_data = np.array([1.5, 2.0, 1.2])

# Define lower and upper bounds for parameters
lower_bounds = [0.001, 0.001]  # mu and sigma
upper_bounds = [1000, 1000]

# Initial parameter values (you may choose these based on empirical data)
initial_params = [1.0, 1.0]  # Initial guesses for mu and sigma

# Set up the bounds for the optimization
bounds = list(zip(lower_bounds, upper_bounds))

# Run the minimization
result = minimize(compute_negative_log_likelihood, initial_params, args=(rt_data,),
                  bounds=bounds, method='trust-constr')

# Check the result
if result.success:
    optimized_params = result.x
    print(f"Optimized Parameters: mu = {optimized_params[0]}, sigma = {optimized_params[1]}")
else:
    print("Optimization failed:", result.message)

Optimized Parameters: mu = 1.566666661603492, sigma = 0.32998317154955026


  self.H.update(self.x - self.x_prev, self.g - self.g_prev)


4. Run the fits
% 
%   We will be using GlobalSearch . The general advantage of this approach 
%   is to avoid local minima; for details, see:
%   https://www.mathworks.com/help/gads/how-globalsearch-and-multistart-work.html
%  
%   These options seem to work well, but I don't have a stronger
%   rationale for using them. See the Matlab documentation if you really
%   want to dive in and understand them, and let me know if you find
%   better settings!
opts = optimoptions(@fmincon,    ... % "function minimization with constraints"
   'Algorithm',   'active-set',  ...
   'MaxIter',     3000,          ...
   'MaxFunEvals', 3000);

% Definine the "optimization problem" using variables defined above
problem = createOptimProblem('fmincon',    ...
    'objective',   laterErrFcn,     ... % Use the objective function
    'x0',          initialValues,   ... % Initial conditions
    'lb',          lowerBounds,     ... % Parameter lower bounds
    'ub',          upperBounds,     ... % Parameter upper bounds
    'options',     opts);                % Options defined above

% Create a GlobalSearch object
gs = GlobalSearch;
   
% Run it, returning the best-fitting parameter values and the negative-
% log-likelihood returned by the objective function
[fits(ii,:), nllk] = run(gs,problem);

In [16]:
# Define a function to perform optimization
def optimize_with_global_search(rt_data, bounds, n_initial_guesses=10):
    best_fit = None
    best_nll = float('inf')

    for _ in range(n_initial_guesses):
        initial_guess = np.random.uniform(low=lower_bounds, high=upper_bounds)
        
        result = minimize(compute_negative_log_likelihood, initial_guess,
                          args=(rt_data,), bounds=bounds, method='trust-constr')

        if result.success and result.fun < best_nll:
            best_nll = result.fun
            best_fit = result.x

    return best_fit, best_nll

# Run the optimization with a global search approach
fits, nllk = optimize_with_global_search(rt_data, bounds)

# Print the best-fitting parameters and negative log-likelihood
print(f"Best-fitting Parameters: mu = {fits[0]}, sigma = {fits[1]}")
print(f"Negative Log-Likelihood: {nllk}")

  log_likelihoods = np.log(likelihoods + 1e-10)  # Avoid log(0)


Best-fitting Parameters: mu = 1.566666657772318, sigma = 0.3299831585885595
Negative Log-Likelihood: 0.9306746721997319


5. Evaluate the fits
%
%   EXERCISE: How do you know if you got a reasonable answer?

I was unable to figure out how to use my script to extract and read the data to complete the exercises. I used a randomly generated numpy aray instead. Thus, I cannot comment on the reasonability of my answer.