In [273]:
import numpy as np
import math
from numpy import random
from scipy.stats import norm, shapiro
import pandas as pd
import matplotlib.pyplot as plt

In [13]:
from google.colab import files
uploaded = files.upload()

Saving SIMULATED_DATA.csv to SIMULATED_DATA.csv


In [354]:
# read data

magnotti_data = pd.read_csv("TEST-BCI-ASYNC-DATA.csv")
magnotti_df = pd.DataFrame(magnotti_data)

params = pd.read_csv("RAW PARAM OUTPUT.csv")
param_df = pd.DataFrame(params)
param_df.head(5)
#print(param_df)

data = pd.read_csv("SIMULATED_DATA.csv")
ob_data = pd.DataFrame(data)
ob_data = ob_data.loc[:, ~ob_data.columns.str.contains('^Unnamed')]
#print(ob_data)

# preds df

ms_data = pd.read_csv("MODEL SELECTION OUTPUT.csv")
ms_output_df = pd.DataFrame(ms_data)
pm_data = pd.read_csv("PROBABILITY MATCHING OUTPUT.csv")
pm_output_df = pd.DataFrame(pm_data)



In [None]:
# PLOT OF OUR DATA

def get_descriptives(df):

  mean_list = []
  std_list = []

  for column in df:

    mean = df[column].sum() / len(df[column])
    std = np.std(df[column])
    
    mean_list.append(mean)
    std_list.append(std)

  return mean_list, std_list

mean_sim, std_sim = get_descriptives(ob_data)
mean_mag, std_mag = get_descriptives(magnotti_df)

plt.figure(figsize=(8,6), dpi = 500)
plt.rcParams['axes.spines.top'] = False
plt.rcParams['axes.spines.right'] = False
plt.rcParams['axes.spines.bottom'] = True
plt.rcParams['axes.spines.left'] = True
plt.rcParams['savefig.format'] = ['svg']
plt.xlabel("Temporal Asynchrony")
plt.ylabel("Observed Times reported Synchronous over 12 Trials")
plt.ylim([0,12])
plt.xlim([-300, 500])

x = async_conditions
y1 = mean_sim
y2 = mean_mag
err_sim = [std / 2 for std in std_sim]
err_mag = [std / 2 for std in std_mag]

plt.errorbar(x, y1, xerr = err_sim, yerr = err_sim, label = "Simulated Data", linestyle = '-', color = "#453781FF", marker = 'o')
plt.errorbar(x, y2, xerr = err_mag, yerr = err_mag, label = "Magnotti et al., (2013)", linestyle = '-', color = "#1F968BFF", marker = 'o')

plt.legend()
plt.show()

In [17]:
#@title MODEL REFIT FUNCTIONS
# EXPERIMENTAL PARAMETERS
async_conditions = [-300, -267, -200, -133, -100, -67, 0, 67, 100, 133, 200, 267, 300, 400, 500]
n_trials = 180 # NEEDS TO BE CHANGED FOR OUR EXPERIMENT
trials_per_cond = int(n_trials / len(async_conditions)) 

# PARAMETERS
pc1 = 0.58
sens_noise = 100
mu1 = 0.0
sigma1 = 50
mu2 = -48
sigma2 = 123

exp_params = [async_conditions, n_trials, trials_per_cond]
#model_params = [pc1, sens_noise, mu1, sigma1, mu2, sigma2]

def calc_posterior(x, cond, model_params):

  """Args: x, cond, sens_noise"""

  noise = sens_noise / 2

  cond = float(cond) # make sure the condition is a float
  var1 = sigma1**2 # transform into variance for the cdf function
  var2 = sigma2**2
  varx = noise**2
	
  lprior = 2 * np.log(pc1 / (1-pc1)) 
  b = np.log((varx + var2) / (varx + var1)) +  (mu2**2 / (var2 - var1))
  c = (1 / (varx + var1)) - (1 / (varx + var2))

  if lprior < -b:
    lprior = -b

  bound = np.sqrt((lprior+b)/c)
  middle = abs(mu2) * (varx+var1)/(var2-var1)
  upper = middle + bound
  lower = middle - bound

  one = norm.cdf(x = upper, loc = x, scale = noise)
  two = norm.cdf(x = lower, loc = x, scale = noise)

  posterior = one - two

  return posterior

def model_selection(c1_posterior):

  """ Args: c1_posterior (float)"""

  if c1_posterior > 0.5:

    return 1 

  else:

    return 0

def probability_matching(c1_posterior): 

  """ Args: c1_posterior (float)"""
  
  rng = np.random.default_rng()
  alpha = rng.uniform(low = 0, high = 1)

  if c1_posterior > alpha:

    return 1

  else:

    return 0

def pred_MS(model_params, exp_params): 

  """Args: arams, exp_params"""

  MS_fpreds = []

  noise = sens_noise / 2

  for cond in async_conditions:

    MS_preds = []

    for i in range(trials_per_cond):

      x = np.random.normal(loc = cond, scale = noise, size = 1)
      posterior = calc_posterior(x, cond, model_params)
      MS_pred = model_selection(posterior) # make prediction of synchrony
      MS_preds.append(MS_pred)
      
    MS_fin_preds = sum(MS_preds)
    MS_fpreds.append(MS_fin_preds) # append to the relevant list

  return MS_fpreds

def pred_PM(model_params, exp_params):

  """Args: model_params, exp_params"""

  PM_fpreds = []

  noise = sens_noise / 2

  for cond in async_conditions:

    PM_preds = []

    for i in range(trials_per_cond):

      x = np.random.normal(loc = cond, scale = noise, size = 1)

      posterior = calc_posterior(x, cond, model_params)
      PM_pred = probability_matching(posterior) # make prediction of synchrony

      PM_preds.append(PM_pred)
      
    PM_fin_preds = sum(PM_preds)
    PM_fpreds.append(PM_fin_preds) # append to the relevant list

  return PM_fpreds

In [89]:
param_df.to_csv("param output processed.csv")

In [None]:
ms_output_df = ms_output_df.loc[:, ~ms_output_df.columns.str.contains('^Unnamed')]
pm_output_df = pm_output_df.loc[:, ~pm_output_df.columns.str.contains('^Unnamed')]

ms_output_df.drop(columns = "R2")
pm_output_df.drop(columns = "R2")

In [426]:
def r2(x, y):

  map(float, x)
  map(float, y)

  r2_array = np.corrcoef(x = x, y = y)

  return r2_array[0,1] # returns the single R2 score as opposed to the array

def model_refit_MS(fitted_params, subject_data):

  """Args: model_params = list level, same as previous model 
  params object but subject specific, subject_data = list"""

  MS_best_preds = []
  r2_best = []

  initial_preds = pred_MS(fitted_params, subject_data)
  initial_correl = r2(subject_data, initial_preds)
  MS_best_preds.append(initial_preds)
  r2_best.append(initial_correl)

  refit_attempts = 100

  for i in range(refit_attempts):

    new_preds = pred_MS(fitted_params, exp_params)

    r2_new = r2(new_preds, subject_data)
    r2_old = r2(MS_best_preds, subject_data)

    if r2_new > r2_old:
      
      MS_best_preds.clear()
      r2_best.clear()
      MS_best_preds.append(new_preds)
      r2_best.append(r2_new)
      print("Better predictions found with R2 %s" %(r2_new))

  return MS_best_preds, r2_best

def model_refit_PM(fitted_params, subject_data):

  """Args: model_params = list level, same as previous model 
  params object but subject specific, subject_data = list"""

  PM_best_preds = []
  r2_best = []

  initial_preds = pred_PM(fitted_params, subject_data)
  initial_correl = r2(subject_data, initial_preds)
  PM_best_preds.append(initial_preds)
  r2_best.append(initial_correl)

  refit_attempts = 100

  for i in range(refit_attempts):

    new_preds = pred_PM(fitted_params, exp_params)

    r2_new = r2(new_preds, subject_data)
    r2_old = r2(PM_best_preds, subject_data)

    if r2_new > r2_old:
      
      PM_best_preds.clear()
      r2_best.clear()
      PM_best_preds.append(new_preds)
      r2_best.append(r2_new)
      print("Better predictions found with R2 %s" %(r2_new))

  return PM_best_preds, r2_best

def write_csv(df, name = str):

  pd.df.to_csv("")

def refit_models(data_df, param_df):

  participants = 50

  for subject in range(participants):

    print("Refitting models for subject %s" %(subject + 1))

    loc = subject - 1 # determining the correct place in the dataframe
    mu1 = 0.0 # putting mu1 sp the data will fit
    
    data = ob_data.iloc[loc]
    subject_data = list(data) # get subject data
    print(subject_data)
    
    ms_params_cols = param_df.loc[:,"PC1_MS":"SIGMA2_MS"] # select col
    ms_params = ms_params_cols.iloc[loc] # select row
    pm_params_cols = param_df.loc[:,"PC1_PM":"SIGMA2_PM"]
    pm_params = pm_params_cols.iloc[loc] # get subject params for both instances

    ms_params_list = list(ms_params)
    pm_params_list = list(pm_params)
    ms_params_list.insert(2, mu1)
    pm_params_list.insert(2, mu1) # convert to list, insert mu1 so it's like the model_params object

    ms_best_preds, ms_r2 = model_refit_MS(ms_params_list, subject_data)
    pm_best_preds, pm_r2 = model_refit_PM(pm_params_list, subject_data)
    
    ms_fin_list = []
    for sublist in ms_best_preds:
      for item in sublist:
        ms_fin_list.append(item)

    pm_fin_list = []
    for sublist in pm_best_preds:
      for item in sublist:
        pm_fin_list.append(item) # unpack from list of lists

    ms_fin_list.insert(15, ms_r2[0])
    pm_fin_list.insert(15, pm_r2[0])

    ms_output_df.loc[len(ms_output_df)] = ms_fin_list
    pm_output_df.loc[len(pm_output_df)] = pm_fin_list

    print("Finished refitting subject %s" %(subject))

def get_all_descriptives(ob_df, ms_df, pm_df):

  # Gets means and standard deviations to plot prediction / ob plot

  ob_m, ob_std = get_descriptives(ob_df)
  ms_m, ms_std = get_descriptives(ob_df)
  pm_m, pm_std = get_descriptives(ob_df)

  return ob_m, ob_std, ms_m, ms_std, pm_m, pm_std

def get_group_plot(ob_df, ms_df, pm_df):

  ob_m, ob_std = get_descriptives(ob_data)
  ms_m, ms_std = get_descriptives(ms_output_df.loc[:,"-300":"+500"])
  pm_m, pm_std = get_descriptives(pm_output_df.loc[:,"-300":"+500"])

  #fig, (ax1, ax2) = plt.subplots(ncols=1)

  plt.figure(figsize=(8,6), dpi = 500)
  plt.rcParams['axes.spines.top'] = False
  plt.rcParams['axes.spines.right'] = False
  plt.rcParams['axes.spines.bottom'] = True
  plt.rcParams['axes.spines.left'] = True
  plt.rcParams['savefig.format'] = ['svg']
  plt.xlabel("Temporal Asynchrony")
  plt.ylabel("Times reported Synchronous over 12 Trials")
  
  plt.ylim([0,12])
  plt.xlim([-300, 500])

  # error bar formatting

  linewidth = 1

  x = async_conditions
  y1 = ob_m
  y2 = ms_m
  y3 = pm_m
  y4 = mean_mag
  err_ob = [std / 2 for std in ob_std]
  err_ms = [std / 2 for std in ms_std]
  err_pm = [std / 2 for std in pm_std]
  err_mag = [std / 2 for std in std_mag]

  plt.errorbar(x, y1, xerr = err_ob, yerr = err_ob, label = "Observed Data", linestyle = '-', color = "k", marker = 'o', 
               elinewidth = linewidth, snap = True)
  plt.errorbar(x, y2, xerr = err_ms, yerr = err_ms, label = "CIMS-MS", linestyle = '-', color = "tab:blue", marker = 'o', 
               elinewidth = linewidth, snap = True)
  plt.errorbar(x, y3, xerr = err_pm, yerr = err_pm, label = "CIMS-PM", linestyle = '-', color = "tab:green", marker = 'o', 
               elinewidth = linewidth, snap = True)
  plt.errorbar(x, y4, xerr = err_mag, yerr = err_mag, label = "Magnotti et al., (2013)", linestyle = ':', color = "tab:red", marker = 'o', 
               elinewidth = linewidth, snap = True)

  plt.legend(loc = 'upper right')
  plt.tight_layout()
  plt.show()

  #ax2 plots

def get_par_avg_diffs(param_df):

  pc1_diff_list = []
  sns_diff_list = []
  sigma1_diff_list = []
  mu2_diff_list = []
  sigma2_diff_list = []

  for subject in param_df.iterrows():

    pc1_diff = abs(param_df["PC1_MS"] - param_df["PC1_PM"])
    pc1_diff_list.append(pc1_diff)
    sns_diff = abs(param_df["SENS_NOISE_MS"] - param_df["SENS_NOISE_PM"])
    sns_diff_list.append(sns_diff)
    sigma1_diff = abs(param_df["SIGMA1_MS"] - param_df["SIGMA1_PM"])
    sigma1_diff_list.append(sigma1_diff)
    mu2_diff = abs(param_df["MU2_MS"] - param_df["MU2_PM"])
    mu2_diff_list.append(mu2_diff)
    sigma2_diff = abs(param_df["SIGMA2_MS"] - param_df["SIGMA2_PM"])
    sigma2_diff_list.append(sigma2_diff)

  pc1_avg_diff = np.mean(pc1_diff_list)
  pc1_avg_std = np.std(pc1_diff_list)
  sns_avg_diff = np.mean(sns_diff_list)
  sns_avg_std = np.std(sns_diff_list)
  sigma1_avg_diff = np.mean(sigma1_diff_list)
  sigma1_avg_std = np.std(sigma1_diff_list)
  mu2_avg_diff = np.mean(mu2_diff_list)
  mu2_avg_std = np.std(mu2_diff_list)
  sigma2_avg_diff = np.mean(sigma2_diff_list)
  sigma2_avg_std = np.std(sigma2_diff_list)

  print("The mean difference for PC1 was %s, with standard deviation %s" %(pc1_avg_diff, pc1_avg_std))
  print("The mean difference for SN was %s, with standard deviation %s" %(sns_avg_diff, sns_avg_std))
  print("The mean difference for SIGMA1 was %s, with standard deviation %s" %(sigma1_avg_diff, sigma1_avg_std))
  print("The mean difference for MU2 was %s, with standard deviation %s" %(mu2_avg_diff, mu2_avg_std))
  print("The mean difference for SIGMA2 was %s, with standard deviation %s" %(sigma2_avg_diff, sigma2_avg_std))

def get_par_avgs(param_df):

  pc1_ms = np.mean(param_df["PC1_MS"])
  pc1_ms_std = np.std(param_df["PC1_MS"])
  sn_ms = np.mean(param_df["SENS_NOISE_MS"])
  sn_ms_std = np.std(param_df["SENS_NOISE_MS"])
  s1_ms = np.mean(param_df["SIGMA1_MS"])
  s1_ms_std = np.std(param_df["SIGMA1_MS"])
  mu2_ms = np.mean(param_df["MU2_MS"])
  mu2_ms_std = np.std(param_df["MU2_MS"])
  s2_ms = np.mean(param_df["SIGMA2_MS"])
  s2_ms_std = np.std(param_df["SIGMA2_MS"])

  pc1_pm = np.mean(param_df["PC1_PM"])
  pc1_pm_std = np.std(param_df["PC1_PM"])
  sn_pm = np.mean(param_df["SENS_NOISE_PM"])
  sn_pm_std = np.std(param_df["SENS_NOISE_PM"])
  s1_pm = np.mean(param_df["SIGMA1_PM"])
  s1_pm_std = np.std(param_df["SIGMA1_PM"])
  mu2_pm = np.mean(param_df["MU2_PM"])
  mu2_pm_std = np.std(param_df["MU2_PM"])
  s2_pm = np.mean(param_df["SIGMA2_PM"])
  s2_pm_std = np.std(param_df["SIGMA2_PM"])

  print("The average PC1 estimate for CIMS-MS: M = %s, STD = %s" %(pc1_ms, pc1_ms_std))
  print("The average SN estimate for CIMS-MS: M = %s, STD = %s" %(sn_ms, sn_ms_std))
  print("The average S1 estimate for CIMS-MS: M = %s, STD = %s" %(s1_ms, s1_ms_std))
  print("The average MU2 estimate for CIMS-MS: M = %s, STD = %s" %(mu2_ms, mu2_ms_std))
  print("The average S2 estimate for CIMS-MS: M = %s, STD = %s" %(s2_ms, s2_ms_std))

  print("The average PC1 estimate for CIMS-PM: M = %s, STD = %s" %(pc1_pm, pc1_pm_std))
  print("The average SN estimate for CIMS-PM: M = %s, STD = %s" %(sn_pm, sn_pm_std))
  print("The average S1 estimate for CIMS-PM: M = %s, STD = %s" %(s1_pm, s1_pm_std))
  print("The average MU2 estimate for CIMS-PM: M = %s, STD = %s" %(mu2_pm, mu2_pm_std))
  print("The average S2 estimate for CIMS-PM: M = %s, STD = %s" %(s2_pm, s2_pm_std))

def get_group_stats_ms(ms_output_df):

  group_r2 = sum(ms_output_df["R2"]) / len(ms_output_df["R2"])
  group_nll = sum(param_df["NLL_MS"]) / len(param_df["NLL_MS"])

  return group_r2, group_nll

def get_group_stats_pm(pm_output_df):

  group_r2 = sum(pm_output_df["R2"]) / len(pm_output_df["R2"])
  group_nll = sum(param_df["NLL_PM"]) / len(param_df["NLL_PM"])

  return group_r2, group_nll

def mpl_scatterplots(params_df):

  # MUST SORT THE PARAMETER BY COMPUTATIONAL STRATEGY IF NEED BE

  # MS color = "tab:blue"
  # PM color = "tab:green"

  #plt.figure(figsize=(8,6), dpi = 500)
  plt.rcParams['axes.spines.top'] = False
  plt.rcParams['axes.spines.right'] = False
  plt.rcParams['axes.spines.bottom'] = True
  plt.rcParams['axes.spines.left'] = True

  # FIG SETUP

  fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(2, 3, figsize=(8,6), dpi = 500, sharex=False, sharey=False)

  # PC1 DIST

  pc1_ms = param_df["PC1_MS"]
  pc1_pm = param_df["PC1_PM"]

  ax1.set_title("PC1", fontsize = 10)
  ax1.set_xlim(0.8, 1.1)
  ax1.set_ylim(0,1)

  ax1.set_xticks([])
  
  ax1.plot(len(pc1_ms) * [1], pc1_ms, label = "CIMS-MS", color = "tab:blue", marker = 'o', ls = 'none', markeredgecolor = 'k')
  ax1.plot(len(pc1_pm) * [0.9], pc1_pm, label = "CIMS-PM", color = "tab:green", marker = 'o', ls = 'none', markeredgecolor = 'k')

  #ax1.legend()

  # SENS_NOISE DIST

  #ax1.figure(figsize=(8,4), dpi = 500)

  sn_ms = param_df["SENS_NOISE_MS"]
  sn_pm = param_df["SENS_NOISE_PM"]

  ax2.set_title("Sensory Noise", fontsize = 10)
  ax2.set_xlim(0.8, 1.1)
  ax2.set_ylim(0,500)

  ax2.set_xticks([])
  
  ax2.plot(len(sn_ms) * [1], sn_ms, label = "SN-MS", color = "tab:blue", marker = 'o', ls = 'none', markeredgecolor = 'k')
  ax2.plot(len(sn_pm) * [0.9], sn_pm, label = "SN-PM", color = "tab:green", marker = 'o', ls = 'none', markeredgecolor = 'k')

  #ax2.legend()

  # sigma c = 1

  s1_ms = param_df["SIGMA1_MS"]
  s1_pm = param_df["SIGMA1_PM"]

  ax3.set_title("Sigma C = 1", fontsize = 10)
  ax3.set_xlim(0.8, 1.1)
  ax3.set_ylim(0,500)

  ax3.set_xticks([]) 
  
  ax3.plot(len(s1_ms) * [1], s1_ms, label = "Sigma1-MS", color = "tab:blue", marker = 'o', ls = 'none', markeredgecolor = 'k')
  ax3.plot(len(s1_pm) * [0.9], s1_pm, label = "Sigma1-PM", color = "tab:green", marker = 'o', ls = 'none', markeredgecolor = 'k')

  #ax3.legend()

  # mean c = 2

  mu2_ms = param_df["MU2_MS"]
  mu2_pm = param_df["MU2_PM"]

  ax4.set_title("Mu C = 2", fontsize = 10)
  ax4.set_xlim(0.8, 1.1)
  ax4.set_ylim(-500,500)
  yticks = [-500, -400, -300, -200, -100, 0, 100, 200, 300, 400, 500]
  ax4.set_yticks(yticks)

  ax4.set_xticks([])
  
  ax4.plot(len(mu2_ms) * [1], mu2_ms, label = "Mu2-MS", color = "tab:blue", marker = 'o', ls = 'none', markeredgecolor = 'k')
  ax4.plot(len(mu2_pm) * [0.9], mu2_pm, label = "Mu2-PM", color = "tab:green", marker = 'o', ls = 'none', markeredgecolor = 'k')

  #ax4.legend()

  # sigma c = 2

  s2_ms = param_df["SIGMA2_MS"]
  s2_pm = param_df["SIGMA2_PM"]

  ax5.set_title("Sigma C = 2", fontsize = 10)
  ax5.set_xlim(0.8, 1.1)
  ax5.set_ylim(0,500)

  ax5.set_xticks([])
  
  ax5.plot(len(s2_ms) * [1], s2_ms, label = "Sigma2-MS", color = "tab:blue", marker = 'o', ls = 'none', markeredgecolor = 'k')
  ax5.plot(len(s2_pm) * [0.9], s2_pm, label = "Sigma2-PM", color = "tab:green", marker = 'o', ls = 'none', markeredgecolor = 'k')

  #ax5.legend()

  fig.delaxes(ax6)

  plt.tight_layout()
  plt.show()

def plot_all_preds(ob_data, ms_output_data, pm_output_data):

  fig, axes = plt.subplots(nrows=10, ncols=5, figsize = (16, 16), dpi = 500)
  #fig.suptitle('Observed data and CIMS-MS and CIMS-PM predicted responses')
  fig.subplots_adjust(hspace=0.5)

  counter = 0

  for ax in axes.flatten():

      loc = counter

      ob_list = ob_data.iloc[loc]
      ob_list = ob_list.values
      
      ms_preds = ms_output_df.iloc[loc]
      ms_preds = ms_preds.values
      ms_preds = np.delete(ms_preds, 15)

      pm_preds = pm_output_df.iloc[loc]
      pm_preds = pm_preds.values
      pm_preds = np.delete(pm_preds, 15)

      #ax.set_ylim(0, 12)
      #yticks = [0, 12]
      #ax.set_yticks([yticks])
      l1,= ax.plot(async_conditions, ob_list, label = "Observed Data", linestyle = '-', color = 'k', marker = "None")
      l2,= ax.plot(async_conditions, ms_preds, label = "CIMS-MS", linestyle = '-', color = 'tab:blue', marker = "None")
      l3,= ax.plot(async_conditions, pm_preds, label = "CIMS-PM", linestyle = '-', color = 'tab:green', marker = "None")

      counter += 1

  plt.legend((l1, l2, l3), ("Observed Data", "CIMS-MS", "CIMS-PM"), bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

In [None]:
plot_all_preds(ob_data, ms_output_df, pm_output_df)

In [None]:
mpl_scatterplots(param_df)

In [281]:
ob_m, ob_std = get_descriptives(ob_data)

shapiro(ob_m)

(0.8791519999504089, 0.04608379676938057)