# Overview

DATA : stress ratings

CODE : analyze & visualize

APPLY : behavioral, fMRI

# Imports

In [None]:
import pandas as pd; import os; import warnings
import matplotlib as mpl
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')
%matplotlib notebook
import hypertools; import seaborn as sb; 

In [None]:
%matplotlib notebook
mpl.get_backend() 

# Compile data

In [None]:
#unzip our behavioral data into the hypertools folder
!unzip /home/jovyan/psych60_jhub/hypertools/ProcessedSuperlabData.zip -d /home/jovyan/psych60_jhub/hypertools/

In [None]:
#install a package so we can read excel files
!pip install xlrd

Make sure that you have placed the behavioral data in the hypertools folder first otherwise you will get a "No objects to concatenate error"!

In [None]:
data_list = []

for x in os.listdir('.'):
    if x[-3:] == 'csv':
        data_list.append(pd.read_csv(x))

data   = pd.concat(data_list)
#onsets = pd.read_csv('Onsets.csv')

In [None]:
onsets = pd.read_excel('Onsets.xlsx')

In [None]:
data

In [None]:
onsets

# Check it out ...

In [None]:
print(len(data['Name'].unique()))
print()
print(data['Name'].unique())
data['Name2'] = data['Name.2']

In [None]:
data[['new_order', 'new_trial']] = data.Name2.str.split(' ', n=2, expand=True)

In [None]:
data

In [None]:
data = data[(data['subject']!='A1548') & (data['subject']!='SID001548')]

data.groupby(['subject','new_order']).mean()

In [None]:
# for each subject
for subject in data['subject'].unique():

    if type(subject)!=float:
    
        # for each order
        for order in data['new_order'].unique():
            
            if type(order)!=float:

            # get the chunk of this subject that has this order
            # set Question columns equal to the question list from Onsets.csv

                data.loc[(data['subject']   == subject) & 
                     (data['new_order'] == order  ), 'question'] = onsets['Order '+str(order[-2])]


# Let's make some plots, eh?

In [None]:
data[['button','button_num','color']] = data.Response.str.split(' ', n=2, expand=True)

In [None]:
# data['Name3']=data['Name.3']


# new_stim = []

# for x in list(data.Name3.str.split(', ')):
    
#     if type(x)!=float and len(x)>1:
#         new_stim.append(x[1])
#     else:
#         new_stim.append(str(x)[0])
    
# data['new_stim'] = new_stim

In [None]:
plot_data = data.groupby(['Name','question','button_num'], as_index=False).mean()
plot_data['button_num'] = pd.to_numeric(plot_data.button_num)

In [None]:
plot_data

# Initial Look

### Some subjects have higher / lower average stress scores

In [None]:
sb.barplot(x='Name', y='button_num', data=plot_data)

### Some questions also yield higher / lower average scores

In [None]:
sb.barplot(x='question', y='button_num', data=plot_data)

# STUDENT QUESTIONS

# QUESTION 1: 

## Do people get less stressed over time / acclimate to the stressors ? 

In [None]:
data['button_num'] = pd.to_numeric(data.button_num)
data.head()
data['Name2'] = data['Name.2']
data['OrderType'] = data.Name2.str.split(',', n=2, expand=True)[0]
data['OrderType'].unique()

### Sure doesn't look like it.. ;)

In [None]:
# sb.lineplot(x='event', y='button_num', data=data)
# sb.regplot('event', 'button_num', data=data, x_ci='ci', scatter=False, fit_reg=True, marker='o')



### Gosh, that grey line looks interesting...

In [None]:
# sb.lineplot(x='event', y='button_num', data=data)
# sb.regplot('event', 'button_num', data=data[data['OrderType']=='Order1'], x_ci='ci', scatter=False, fit_reg=True,  color='white', marker='o')
# sb.regplot('event', 'button_num', data=data[data['OrderType']=='Order2'], x_ci='ci', scatter=False, fit_reg=True,  color='gray' , marker='o')
# sb.regplot('event', 'button_num', data=data[data['OrderType']=='Order3'], x_ci='ci', scatter=False, fit_reg=True,  color='black', marker='o')

In [None]:
# Question for students: what kind of stats will we want for the above trendline(s)?

# Are there other things you might want to look at or explore here?

In [None]:
data['subject'].unique()

In [None]:
data[data['OrderType']=='Order1']

In [None]:
# # response time over time seems not too exciting...
# sb.lineplot(x='event', y='Time', data=data)

In [None]:
data

In [None]:


hypertools.plot([data[data['new_order']=='Order1,'][[ 'question']],
                data[data['new_order']=='Order2,'][[ 'question']],
                data[data['new_order']=='Order3,'][['question']]],  frame_rate=200, animate=True)

### Other things to think about

In [None]:
import matplotlib as mpl

In [None]:
mpl.get_backend()

We can also reduce and plot high dimensional data (like fMRI) over time.... Hmm


In [None]:
from IPython.display import Image
Image(url='https://hypertools.readthedocs.io/en/latest/_images/hypertools.gif')  

# QUESTION 2:

## Are the stressors unique or different from each other?

In [None]:
data['question'].unique()

In [None]:
# see how questions cluster, based on content ! 

# text vectorized (scikit-learn CountVectorizer) --> estimate topics (LatentDirichletAllocation)

labels = data['question'].unique()[:-1]
geo    = hypertools.plot(data['question'].unique()[:-1], 'o', size=[8, 6], n_clusters=4) 

In [None]:
labels = hypertools.cluster(data['question'].unique()[:-1], n_clusters=4) 
dictionary = {'questions':data['question'].unique()[:-1], 'labels':labels}
df = pd.DataFrame(dictionary)

df[df['labels']==3]

# look at the different clusters of questions...
# do they appear to have anything in common?

# what happens if we use more or fewer clusters? 

In [None]:
data.columns

In [None]:
cluster_button = data.groupby(['question'], as_index=False).mean()

In [None]:
data.columns

In [None]:
# data['Response.1']

In [None]:
#labels = data['question'].unique()[:-1]
geo    = hypertools.plot(cluster_button[['question', 'button_num']], '*', size=[8, 6], n_clusters=5) 
labels = hypertools.cluster(cluster_button[['question', 'button_num']], n_clusters=5) 
plt.show()

dictionary = {'questions':cluster_button['question'].unique(), 'labels':labels}
df = pd.DataFrame(dictionary)
df[df['labels']==2]

In [None]:
geo    = hypertools.plot(cluster_button[['button_num']], 'o', size=[8, 6], n_clusters=5) 
labels = hypertools.cluster(cluster_button[['button_num']], n_clusters=5) 
plt.show()

k = [i for i, e in enumerate(labels) if e == 4]

for a in k:
    print(cluster_button['question'][a])


In [None]:
grp = data.groupby(['question'], as_index=False).mean()
grp[grp['question']=='Seeing your ex at a party']['button_num']
grp[grp['question']=='Walking into an interview for your dream job']['button_num']

In [None]:
cluster_button[['question', 'button_num']]

Question for students: If I cluster the questions by the scores students give them, how will that relate to the structure of the Order 3 stimulus set

In [None]:
# Determining the number of clusters

In [None]:
# helpful resource here :
# https://learn.scientificprogramming.io/python-k-means-data-clustering-and-finding-of-the-best-k-485f66297c06

In [None]:
from IPython.display import Image
Image("screenshot.png")

# QUESTION 3: 

## Does stress from one stimulus influence the response to the next? 

## Does some stress "carry over"?

In [None]:
# Question for students: what are different ways we might look at this?

In [None]:
# Question for students: what other thigns might we want to look at / visualize / think about?