# 0: Sentences Defining Event Boundaries
# I: All Participants
# II: Social Primed Participants
# III: Location Primed Participants
# IV: Graphing
# V: Printing


In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import plotly.plotly as py
import plotly
import os
import glob
import math
import statistics
import matplotlib.pyplot as plt
import xlrd
import pickle
import seaborn as sb

%autosave 5

#import Alex-scored free recall sheets for story 43
xls43 = pd.ExcelFile('/Users/alexreblando/Documents/Baldassano Lab/43_Alex.xlsx')

#get sheet names
xls = xlrd.open_workbook(r'/Users/alexreblando/Documents/Baldassano Lab/43_Alex.xlsx', on_demand=True)
sheet_names = xls.sheet_names()

#import story stats in order to get story lengths
pickle_in = open("story_stats.pickle","rb")
story_stats = pickle.load(pickle_in)

#import story boundaries to get putative event boundaries
pickle_in = open("story_boundaries.pickle","rb")
story_boundaries = pickle.load(pickle_in)

#import subj_schemas matrix so that for each story for each subject you can know if they are
#social or location primed
pickle_in = open("subj_schemas.pickle","rb")
subj_schemas = pickle.load(pickle_in)

Autosaving every 5 seconds


# 0: Sentences defining event boundaries
## Step 1: Social Events
## Step 2: Location Events

In [2]:
# Step 1: Determining the story sentences that mark the boundaries of each social event
soc_event = {}

#make the dictionary of all the events
for i in range(1,5):
    soc_event[i] = np.zeros((1,3))

count = 1
for j in range(len(story_boundaries['43'])):
    if story_boundaries['43'][j, 1] == 1:
        if count == 1:
            soc_event[count][0,0] = 1
            count = 2
        else:
            soc_event[count-1][0,1] = j
            soc_event[count][0,0] = j+1
            count += 1
        
soc_event[4][0,1] = len(story_boundaries['43']) 

#length of each event
for i in range(1,5):
    soc_event[i][0,2] = soc_event[i][0,1] - soc_event[i][0,0] + 1

In [3]:
# Step 2: Determining the story sentences that define the location events
loc_event = {}

#make the dictionary of all the events
for i in range(1,5):
    loc_event[i] = np.zeros((1,3))

count = 1
for j in range(len(story_boundaries['43'])):
    if story_boundaries['43'][j, 0] == 1:
        if count == 1:
            loc_event[count][0,0] = 1
            count = 2
        else:
            loc_event[count-1][0,1] = j
            loc_event[count][0,0] = j+1
            count += 1
            
        
loc_event[4][0,1] = len(story_boundaries['43']) 

#length of each event
for i in range(1,5):
    loc_event[i][0,2] = loc_event[i][0,1] - loc_event[i][0,0] + 1

# I: All participants (no differentiation based on priming)
## Step 1: Dictionary of scored sheets of individual participants
## Step 2: Participant x Story Sentence matrix
## Step 3: Participant x Event matrix
### Part 1: Social Events
### Part 2: Location Events
## Step 4: Average Participant x Event matrix

In [4]:
# Step 1: Dictionary of scored sheets of individual participants
rs_dict = {}

for name in sheet_names:
    rs_dict[name] = pd.read_excel(xls43, name)

In [5]:
# Step 2: Participant x Story sentence matrix with '1' values for sentences that that 
#participant mentioned in their free recall

#find the number of sentences in story 43
n_sent = story_stats['43'][3]
n_participants = len(sheet_names)
subj_sent_m = np.zeros((n_participants, n_sent))
count = 0

for name in sheet_names:
    for i in range(n_sent):
        val = np.sum(rs_dict[name][i+1])
        if val > 0:
            val = 1
        subj_sent_m[count, i] = val
    count += 1

In [6]:
# Step 3: Participant x Event matrix
# Part 1: Social Events

soc_partic_event = np.zeros((n_participants, 4))

for k in range(0, n_participants):
    #events
    for i in range(1,5):
        start = int(soc_event[i][0,0])
        stop = int(soc_event[i][0,1])
        count = 0
        for j in range(start, stop):
            if subj_sent_m[k][j] == 1:
                count += 1
        soc_partic_event[k][i-1] = count/soc_event[i][0,2]

In [7]:
soc_partic_event

array([[0.4       , 0.25      , 0.25      , 0.5       ],
       [0.2       , 0.375     , 0.41666667, 0.        ],
       [0.4       , 0.5       , 0.25      , 0.        ],
       [0.2       , 0.5       , 0.33333333, 0.25      ],
       [0.2       , 0.125     , 0.16666667, 0.25      ],
       [0.2       , 0.25      , 0.16666667, 0.25      ],
       [0.        , 0.375     , 0.41666667, 0.5       ],
       [0.6       , 0.875     , 0.16666667, 0.75      ],
       [0.2       , 0.625     , 0.41666667, 0.25      ],
       [0.4       , 0.125     , 0.08333333, 0.        ],
       [0.6       , 0.75      , 0.5       , 0.        ],
       [0.2       , 0.625     , 0.25      , 0.25      ],
       [0.4       , 0.375     , 0.41666667, 0.25      ]])

In [8]:
# Part 2: Location Events

loc_partic_event = np.zeros((n_participants, 4))

for k in range(0, n_participants):
    #events
    for i in range(1,5):
        start = int(loc_event[i][0,0])
        stop = int(loc_event[i][0,1])
        count = 0
        for j in range(start, stop):
            if subj_sent_m[k][j] == 1:
                count += 1
        loc_partic_event[k][i-1] = count/loc_event[i][0,2]

In [9]:
# Step 4: Average Participant x Event Matrix

avg_soc_part_event = np.mean(soc_partic_event, axis = 0)
avg_loc_part_event = np.mean(loc_partic_event, axis = 0)

# II: Social Primed Participants
## Step 1: Participant x Story Sentence matrix
## Step 3: Participant x Event matrix
### Part 1: Social Events
### Part 2: Location Events
## Step 4: Average Participant x Event matrix

In [10]:
#Part 1: make a participant x story sentence matrix with '1' values for sentences that that 
#participant mentioned in their free recall for social primed participants

#find the number of sentences in story 43
n_sent = story_stats['43'][3]

#the number of social primed participants
n_social = (sum( x == 'Social' for x in subj_schemas['43'].values()))

#make empty participant x story sentences matrix
soc_subj_sent_m = np.zeros((n_social, n_sent))
count = 0
for name in sheet_names:
    if subj_schemas['43'][name] == 'Social':
        for i in range(n_sent):
            val = np.sum(rs_dict[name][i+1])
            if val > 0:
                val = 1
            soc_subj_sent_m[count, i] = val
        count += 1

In [11]:
# Step 2: Participant x Event matrix
# Part 1: Social Events

SP_soc_partic_event = np.zeros((n_social, 4))

for k in range(0, n_social):
    #events
    for i in range(1,5):
        start = int(soc_event[i][0,0])
        stop = int(soc_event[i][0,1])
        count = 0
        for j in range(start, stop):
            if subj_sent_m[k][j] == 1:
                count += 1
        SP_soc_partic_event[k][i-1] = count/soc_event[i][0,2]

In [12]:
# Part 2: Location Events

SP_loc_partic_event = np.zeros((n_social, 4))

for k in range(0, n_social):
    #events
    for i in range(1,5):
        start = int(loc_event[i][0,0])
        stop = int(loc_event[i][0,1])
        count = 0
        for j in range(start, stop):
            if subj_sent_m[k][j] == 1:
                count += 1
        SP_loc_partic_event[k][i-1] = count/loc_event[i][0,2]

In [13]:
# Step 3: Average Participant x Event matrix

avg_SP_soc_part_event = np.mean(SP_soc_partic_event, axis = 0)
avg_SP_loc_part_event = np.mean(SP_loc_partic_event, axis = 0)

# II: Location Primed Participants
## Step 1: Participant x Story Sentence matrix
## Step 3: Participant x Event matrix
### Part 1: Social Events
### Part 2: Location Events
## Step 4: Average Participant x Event matrix

In [14]:
#Part 1: make a participant x story sentence matrix with '1' values for sentences that that 
#participant mentioned in their free recall for social primed participants

#find the number of sentences in story 43
n_sent = story_stats['43'][3]

#the number of social primed participants
n_location = (sum( x == 'Location' for x in subj_schemas['43'].values()))

#make empty participant x story sentences matrix
loc_subj_sent_m = np.zeros((n_location, n_sent))
count = 0
for name in sheet_names:
    if subj_schemas['43'][name] == 'Location':
        for i in range(n_sent):
            val = np.sum(rs_dict[name][i+1])
            if val > 0:
                val = 1
            loc_subj_sent_m[count, i] = val
        count += 1

In [15]:
# Step 2: Participant x Event matrix
# Part 1: Social Events

LP_soc_partic_event = np.zeros((n_location, 4))

for k in range(0, n_location):
    #events
    for i in range(1,5):
        start = int(soc_event[i][0,0])
        stop = int(soc_event[i][0,1])
        count = 0
        for j in range(start, stop):
            if subj_sent_m[k][j] == 1:
                count += 1
        LP_soc_partic_event[k][i-1] = count/soc_event[i][0,2]

In [16]:
# Part 2: Location Events

LP_loc_partic_event = np.zeros((n_location, 4))

for k in range(0, n_location):
    #events
    for i in range(1,5):
        start = int(loc_event[i][0,0])
        stop = int(loc_event[i][0,1])
        count = 0
        for j in range(start, stop):
            if subj_sent_m[k][j] == 1:
                count += 1
        LP_loc_partic_event[k][i-1] = count/loc_event[i][0,2]

In [17]:
# Step 3: Average Participant x Event matrix

avg_LP_soc_part_event = np.mean(LP_soc_partic_event, axis = 0)
avg_LP_loc_part_event = np.mean(LP_loc_partic_event, axis = 0)

In [18]:
print(avg_LP_loc_part_event)
np.mean(avg_LP_loc_part_event)

[0.1        0.425      0.18571429 0.39090909]


0.2754058441558441

In [20]:
print(avg_LP_soc_part_event)
np.mean(avg_LP_soc_part_event)

[0.28       0.4        0.26666667 0.275     ]


0.30541666666666667

In [22]:
print(avg_SP_loc_part_event)
np.mean(avg_SP_loc_part_event)

[0.08333333 0.5        0.25       0.36363636]


0.29924242424242425

In [23]:
print(avg_SP_soc_part_event)
np.mean(avg_SP_soc_part_event)

[0.3     0.40625 0.3125  0.1875 ]


0.3015625

In [24]:
LP_soc_partic_event

array([[0.4       , 0.25      , 0.25      , 0.5       ],
       [0.2       , 0.375     , 0.41666667, 0.        ],
       [0.4       , 0.5       , 0.25      , 0.        ],
       [0.2       , 0.5       , 0.33333333, 0.25      ],
       [0.2       , 0.125     , 0.16666667, 0.25      ],
       [0.2       , 0.25      , 0.16666667, 0.25      ],
       [0.        , 0.375     , 0.41666667, 0.5       ],
       [0.6       , 0.875     , 0.16666667, 0.75      ],
       [0.2       , 0.625     , 0.41666667, 0.25      ],
       [0.4       , 0.125     , 0.08333333, 0.        ]])