# RSA between change in hormone concentrations and behavioral measures
### model: behavior_delta - positive and negative mood, depressive symptoms and emotion recognition performance, hormones_delta - estrogens, progestogens
### Note: code cannot be run as a whole - the desired variables to be compared have to be selected manually by running the respective cells
#### import modules & correct read-in (+split groups)

In [None]:
#import os
#from os import system as oss
import pandas as pd
import numpy as np
import seaborn as sns
#from matplotlib import pyplot as plt

from sklearn.metrics import pairwise_distances
#from sklearn import metrics
import scipy
from scipy import stats
from scipy.stats import permutation_test
import math
import json

## Modelling - creating dataframes
### Hormone Delta dataframes
#### Load Estrogen Data

In [None]:
#### Change in Estrogens Dataframe 
### create new DF with hormone data
#load the hormones sheet
df_hormonesdelta_neworder = pd.read_excel('HormonesBehaviorDelta_anonymized.xlsx', sheet_name = 'Hormones_Delta', index_col = 0)
df_hormonesdelta_neworder = df_hormonesdelta_neworder.drop(['group_noONC', 'Progesterone_delta', 'Gestagen_delta', 'Testosterone_delta'], axis = 1)

#### Load Progestogen Data

In [None]:
#### Change in Progestogens Dataframe 
### create new DF with hormone data
#load the hormones sheet
df_hormonesdelta_neworder = pd.read_excel('HormonesBehaviorDelta_anonymized.xlsx', sheet_name = 'Hormones_Delta', index_col = 0)
df_hormonesdelta_neworder = df_hormonesdelta_neworder.drop(['group_noONC', 'Estradiol_delta', 'EE_delta', 'Testosterone_delta'], axis = 1)

### Run one dataframe to be compared (e.g. positive mood or negative mood or depression or emotion recognition) with one hormonal dataframe
#### Load Itemwise positive mood data

In [None]:
##read in behav data - Positive Mood
df_behav_neworder = pd.read_excel('HormonesBehaviorDelta_anonymized.xlsx', sheet_name = 'PANASpos_DeltaperItem', index_col = 0)

#### Load Itemwise negative mood data 

In [None]:
##read in behav data - Negative Mood
df_behav_neworder = pd.read_excel('HormonesBehaviorDelta_anonymized.xlsx', sheet_name = 'PANASneg_DeltaperItem', index_col = 0)

#### Load Itemwise depression score

In [None]:
##read in behav data - Depressive Symptoms
df_behav_neworder = pd.read_excel('HormonesBehaviorDelta_anonymized.xlsx', sheet_name = 'BDI_DeltaperItem', index_col = 0)

#### Load Itemwise emotion recognition accuracy

In [None]:
##read in behav data - Emotion Recognition Accuracy
df_behav_neworder = pd.read_excel('HormonesBehaviorDelta_anonymized.xlsx', sheet_name = 'EmoRecogAcc_DeltaperItem', index_col = 0)

#### Load Itemwise emotion recognition response times 

In [None]:
#read in behav data - Emotion Recognition Response Times
df_behav_neworder = pd.read_excel('HormonesBehaviorDelta_anonymized.xlsx', sheet_name = 'EmoRecogRT_DeltaperEmotion', index_col = 0)

### Drop participants with missing data in either of the dataframe from both dataframes

In [None]:
#delete subjects with missing data
SubjectswithNAN_behav = df_behav_neworder.loc[pd.isna(df_behav_neworder).any(1), :].index
df_behavFIN = df_behav_neworder.drop(index=SubjectswithNAN_behav)
SubjectswithNAN = df_hormonesdelta_neworder.loc[pd.isna(df_hormonesdelta_neworder).any(1), :].index
df_hormonesdeltaFIN= df_hormonesdelta_neworder.drop(index=SubjectswithNAN)

#make sure that participants without hormonal data are also dropped from behavioral dataframe
df_behavFIN = df_behavFIN.drop(index=SubjectswithNAN)   
#make sure that participants without behavioral data are also dropped from hormonal dataframe
df_hormonesdeltaFIN = df_hormonesdeltaFIN.drop(index=SubjectswithNAN_behav)  
# compare whether both dataframes include the same participants 
if df_behavFIN.index.equals(df_hormonesdeltaFIN.index):
    print("dataframes have the same order and number of participants")
else:
    print("needs checking") 
#len(df_hormonesdeltaFIN)

In [None]:
df_behavFIN

In [None]:
df_hormonesdeltaFIN
#len(df_hormonesdeltaFIN)

## RSA approach - creating and comparing RDMs

### Create RDM for behavioral change 

In [None]:
### compute RDM for behavioral changes using standardized Euclidean distance
rdm_behav = pairwise_distances(df_behavFIN.to_numpy(), metric='seuclidean')
DF_rdm_behav = pd.DataFrame(rdm_behav)
# vectorize
rdm_behav_vec = DF_rdm_behav.to_numpy()[np.triu_indices(len(DF_rdm_behav.to_numpy()[0]), 1)]

###  Visualize behavioral RDM

In [None]:
#mask dataframe
mask = np.zeros_like(DF_rdm_behav, dtype=bool)
mask[np.triu_indices_from(mask)] = True


#ax = sns.heatmap(DF_rdm_hormone_visualize, mask=mask, yticklabels=False, xticklabels=False, cbar=False, vmin = 0, vmax = 10)
ax = sns.heatmap(DF_rdm_behav, mask=mask, yticklabels=False, xticklabels=False, cbar=False, square=True)
# use matplotlib.colorbar.Colorbar object
#cbar = ax.collections[0].colorbar
# here set the labelsize by 20
#cbar.ax.tick_params(labelsize=20)
#plt.show()
#make background transparent
#cbar.patch.set_alpha(0)
ax.patch.set_alpha(0)

print(DF_rdm_behav.shape)

# save RDM figure, if needed
#plt.savefig('C:/Users/UKPP/Documents/HormonesRestingStateRSA/ManuscriptRSA_posmoodRDM_standEuclidean.jpg', bbox_inches='tight', dpi=300)

#get info of min and max values as well as matrix size
x = DF_rdm_behav.to_numpy()
print(np.max(x[np.nonzero(x)]))
print(np.min(x[np.nonzero(x)]))

### Create RDM for hormonal change 

In [None]:
### compute RDM for hormonal changes using standardized Euclidean distance
rdm_modelFull = pairwise_distances(df_hormonesdeltaFIN.to_numpy(), metric='seuclidean')
DF_rdm_modelFull = pd.DataFrame(rdm_modelFull)
# vectorize
rdm_modelFull_vec = DF_rdm_modelFull.to_numpy()[np.triu_indices(len(DF_rdm_modelFull.to_numpy()[0]), 1)]

### Visualize hormonal RDM 

In [None]:
#mask dataframe
mask = np.zeros_like(DF_rdm_modelFull, dtype=bool)
mask[np.triu_indices_from(mask)] = True


#ax = sns.heatmap(DF_rdm_hormone_visualize, mask=mask, yticklabels=False, xticklabels=False, cbar=False, vmin = 0, vmax = 10)
ax = sns.heatmap(DF_rdm_modelFull, mask=mask, yticklabels=False, xticklabels=False, cbar=False, square=True)
# use matplotlib.colorbar.Colorbar object
#cbar = ax.collections[0].colorbar
# here set the labelsize by 20
#cbar.ax.tick_params(labelsize=20)
#plt.show()
#make background transparent
#cbar.patch.set_alpha(0)
ax.patch.set_alpha(0)

print(DF_rdm_modelFull.shape)

# save RDM figure, if needed
#plt.savefig('C:/Users/UKPP/Documents/HormonesRestingStateRSA/ManuscriptRSA_EstrogenRDM_standEuclidean.jpg', bbox_inches='tight', dpi=300)

#get info of min and max values as well as matrix size
x = DF_rdm_modelFull.to_numpy()
print(np.max(x[np.nonzero(x)]))
print(np.min(x[np.nonzero(x)]))

## RSA - with permutation testing (10'000 permutations)

**Permutation Testing**

Using permutation testing to evaluate the significance of RSA analyses (Spearman correlations) via Family-Wise Error (FWE) correction

(1) subject labels (i.e., rows and columns) are randomly reordered/permuted for one of the two similarity matrices (behavioral RDM) a large number of times (in this case, 10'000)

(2) the correlation between the two matrices (permuted behavioral RDM and original brain/hormone RDM) is calculated - this is done 10'000 times - and this forms a null distribution of 10'000 surrogate correlation values

(3) the true correlation values (between the original behavioral RDM and the original hormone RDM) is calculated and the observed correlation coefficient is then compared to this family-wise null distribution to obtain a new p-value (assessed at a 0.05 significance threshold, as the FWE correction is done by assessing the ture correlation against a family-wise null distribution

In [None]:
 ### define function to get null distribution of permutated behavior RDM
def permutation_nulldistribution(rdm_hormone, rdm_behav, perm_num):    
# Bootstrapping: looping over the number of permutations to rearrange the columns/rows [perm_num] times and using those 10'000 permutation labels to calculate the correlation values for each of the 140 conditions
    family_wise_null_distr_behav = []
    for i in range(perm_num):
        
        ## Creating the permuted behavioral RDM
        # creating an array of weights ('factors') to reorder the rows and columns of the behavioral/hormonal RDM
        perm_fact_behav = np.random.permutation(np.eye(len(rdm_behav),dtype=int))

        # creating the reordered (permutated) behavioral matrix based on the original RDM and the above created factors
        df_behav_corrected_perm = (perm_fact_behav @ rdm_behav) @ (np.linalg.inv(perm_fact_behav))

        # making distance vector out of that permutated behavioral RDM that will be used to run the RSA - i.e. that will be correlated to the distance vector of the hormone RDM
        behav_corrected_perm_vec = df_behav_corrected_perm.to_numpy()[np.triu_indices(len(df_behav_corrected_perm.to_numpy()[0]), 1)]

        # RSA: compute the correlation between the reordered behavioral RDM (vector) and the non-reordered hormonal RDM (vector)
        corr_behav = scipy.stats.spearmanr(behav_corrected_perm_vec, rdm_hormone) 
        family_wise_null_distr_behav.append(corr_behav[0])
       
    # retrieve the max R from this permutation and append to overall array of max values --> only if multiple conditions
    #temp_max_value = np.max(temp_r_coef_for_one_perm_behav)
    #family_wise_null_distr_behav.append(temp_max_value) 

    return family_wise_null_distr_behav

  
  ### define function to get null distribution of permutated behavior RDM  
def calc_pvalue(null_distr, value):
    
    ''' 
    Function that calculates the p value of a given value on a self made null distribution, i.e. the probability that 
    that value is observed if the null hypothesis is true
    
    null_distr is a list
    value needs to be a single value (so careful when computing correlation, need to feed in only the correlation coefficent and exclude p-value)
    
    '''
    
    # Compute the percentile rank of a score relative to a array of scores (here it's the scores making up the null distribution)
    percentile = scipy.stats.percentileofscore(np.asarray(null_distr), value, kind = 'rank')
    #print(percentile)
    formatted_percentile = "{:.16f}".format(percentile)
    #print(formatted_percentile)
    
    # The corresponding p-value
    p_value = (100 - percentile)/100
    #print(p_value)
    formatted_p_value = "{:.16f}".format(p_value)
    #print(formatted_p_value)
    
    return p_value

In [None]:
### statistic
rsa_behavAndHormones = scipy.stats.spearmanr(rdm_behav_vec, rdm_modelFull_vec)
# permutation testing
family_wise_null_distr = permutation_nulldistribution(rdm_behav_vec, DF_rdm_modelFull, perm_num = 10000)
RSA_correlation_res = {} 
RSA_correlation_res['corr_coef'] = rsa_behavAndHormones[0]
RSA_correlation_res['p-value'] = calc_pvalue(family_wise_null_distr, rsa_behavAndHormones[0])
#print(df_modelFull.index.values)

#plt.imshow(DF_rdm_modelFull.to_numpy())
#plt.show()
print(RSA_correlation_res)
#
##############################

## Save results in seperate text files 

In [None]:
# Save to a text file
with open("RSA_EstrogensPANASpos_seuclidean_FWEcorrected.txt", "w") as file:
    json.dump(RSA_correlation_res, file, indent=4)

In [None]:
# Save to a text file
with open("RSA_ProgestogensPANASpos_seuclidean_FWEcorrected.txt", "w") as file:
    json.dump(RSA_correlation_res, file, indent=4)

In [None]:
# Save to a text file
with open("RSA_EstrogensPANASneg_seuclidean_FWEcorrected.txt", "w") as file:
    json.dump(RSA_correlation_res, file, indent=4)

In [None]:
# Save to a text file
with open("RSA_ProgestogensPANASneg_seuclidean_FWEcorrected.txt", "w") as file:
    json.dump(RSA_correlation_res, file, indent=4)

In [None]:
# Save to a text file
with open("RSA_EstrogensBDI_seuclidean_FWEcorrected.txt", "w") as file:
    json.dump(RSA_correlation_res, file, indent=4)

In [None]:
# Save to a text file
with open("RSA_ProgestogensBDI_seuclidean_FWEcorrected.txt", "w") as file:
    json.dump(RSA_correlation_res, file, indent=4)

In [None]:
# Save to a text file
with open("RSA_EstrogensEmoRecACC_seuclidean_FWEcorrected.txt", "w") as file:
    json.dump(RSA_correlation_res, file, indent=4)

In [None]:
# Save to a text file
with open("RSA_ProgestogensEmoRecACC_seuclidean_FWEcorrected.txt", "w") as file:
    json.dump(RSA_correlation_res, file, indent=4)

In [None]:
# Save to a text file
with open("RSA_EstrogensEmoRecRT_seuclidean_FWEcorrected.txt", "w") as file:
    json.dump(RSA_correlation_res, file, indent=4)

In [None]:
# Save to a text file
with open("RSA_ProgestogensEmoRecRT_seuclidean_FWEcorrected.txt", "w") as file:
    json.dump(RSA_correlation_res, file, indent=4)

## Create histrogram of null distribution 

In [None]:
# histogram of the null distribution of the surrogate Spearman correlation values
plt.hist(family_wise_null_distr, bins=50, alpha = 0.25)
plt.title('Family-wise null distribution of surrogate Spearman correlation values')
plt.grid()
plt.show()
print("Mean:  ", round(np.mean(family_wise_null_distr_behav),3))
print("Median:", round(np.median(family_wise_null_distr_behav),3))
print("Minimum:", round(np.min(family_wise_null_distr_behav),3))
print("Maximum:", round(np.max(family_wise_null_distr_behav),3))
