## FaceNet single-predictor models
Set of models investigating face perception:
- single-predictor model with binary regressor coding for presence of any face (**any_faces**)
- **any_faces** + regressor coding for first appearance face for the first time (**first_time_face**)
- **any_faces** + log of time since last appearance of detected face (mean across faces if more than one are present in the frame, **log_mean_time_since**) 
- **any_faces** + log of time since last appearance of detected face (max across multiple faces, **log_max_time_since**) 
- **any_faces** + log of cumulative time the detected face has been on screen (mean across faces, **log_mean_face_time_cum**)
- **any_faces** + log of cumulative time the detected face has been on screen (max across faces, **log_max_face_time_cum**)

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from tools.create import create_set_models
from tools.utils import dump_collection, load_collection
from tools.viz import (plot_regressor, plot_regressor,
                       plot_metrics, plot_contrast_by_dataset, plot_contrast_by_analysis,
                       plot_analysis_grid)
from pyns import Neuroscout
import numpy as np
from matplotlib import pyplot as plt
from pathlib import Path
%matplotlib inline

In [3]:
api = Neuroscout()

## Define predictors and confounds

In [106]:
predictors = [['any_faces']]
other = ['first_time_face', 'log_mean_time_since', 'log_max_time_since', 'log_mean_face_time_cum', 'log_max_face_time_cum']
for p in other:
    predictors.append(['any_faces', p])

In [107]:
confounds = ['a_comp_cor_00', 'a_comp_cor_01', 'a_comp_cor_02', 'a_comp_cor_03', 'a_comp_cor_04', 'a_comp_cor_05', 'trans_x', 'trans_y', 'trans_z', 'rot_x', 'rot_y', 'rot_z']

## Create models

In [108]:
# mdict = {}
# for pset in predictors:
#    mdict['+'.join(pset)] = create_set_models(pset, confounds, name='+'.join(pset), datasets=['NaturalisticNeuroimagingDatabase'])

In [7]:
filename = Path('models') / 'facenet.json'

In [8]:
mdict = load_collection(filename)

In [124]:
# dump_collection(mdict, filename)

# FaceNet -- face switch

In [91]:
preds = ['first_time_face', 'log_max_time_since', 'any_faces']

In [92]:
transformations = [
    {
        'Input': ['log_max_time_since', 'first_time_face'],
        'Name': 'ToDense',
        'SamplingRate': 2
        
    },
    {
        'Input': ['log_max_time_since'],
        'Name': 'Threshold',
        'Threshold': 0.5,
        'Binarize': True,
        'Output': ['time_since_temp']
        
    },
    {
        'Input': ['time_since_temp', 'first_time_face'],
        'Output': 'face_switch',
        'Name': 'Or',
    },
    {
        'Input': ['any_faces', 'face_switch'],
        'Name': 'Convolve'
    }
]

In [None]:
# mdict['any_faces+face_switch'] = create_set_models(predictors=preds, confounds=confounds, name='any_faces+face_switch', transformations=transformations)

In [33]:
for analysis in mdict['any_faces+face_switch']:
    an = analysis['analysis']
    an.model['Steps'][0]['Model']['X'] = ['face_switch', 'any_faces',
         'a_comp_cor_00',
         'a_comp_cor_01',
         'a_comp_cor_02',
         'a_comp_cor_03',
         'a_comp_cor_04',
         'a_comp_cor_05',
         'trans_x',
         'trans_y',
         'trans_z',
         'rot_x',
         'rot_y',
         'rot_z'
    ]
    an.model['Steps'][0]['DummyContrasts']['Conditions'] = ['face_switch', 'any_faces']
    an.model['Steps'][0]['Transformations'] = transformations
    an.push()

In [67]:
for analysis in mdict['any_faces+face_switch']:
    if analysis['dataset'] == 'NaturalisticNeuroimagingDatabase':
        an = analysis['analysis']
        an.model['Steps'][0]['Model']['X'] = ['face_switch', 'any_faces']
        an.push()

# All Model

In [577]:
all_name = 'any_faces+face_switch+log_mean_time_since+first_time_face+log_mean_face_time_cum'
transformations = [
    {
        'Input': ['log_mean_time_since', 'first_time_face'],
        'Name': 'ToDense',
        'SamplingRate': 2
        
    },
    {
        'Input': ['log_mean_time_since'],
        'Name': 'Threshold',
        'Threshold': 0.5,
        'Binarize': True,
        'Output': ['time_since_temp']
        
    },
    {
        'Input': ['time_since_temp', 'first_time_face'],
        'Output': 'face_switch',
        'Name': 'Or',
    },
    {
        'Input': ['any_faces', 'face_switch', 'first_time_face', 'log_mean_time_since', 'log_mean_face_time_cum'],
        'Name': 'Convolve'
    }
]

all_preds = ['first_time_face', 'log_mean_time_since', 'any_faces', 'log_mean_face_time_cum']
# mdict[all_name] = create_set_models(
#     predictors=all_preds, confounds=confounds, name=name, transformations=transformations)

In [227]:
for analysis in mdict[all_name]:
    an = analysis['analysis']
    if dataset != 'NaturalisticNeuroimagingDatabase':
        all_inputs = all_preds + ['face_switch'] + confounds
    else:
        all_inputs = all_preds + ['face_switch']
    an.model['Steps'][0]['Model']['X'] = all_inputs
    an.model['Steps'][0]['DummyContrasts']['Conditions'] = all_inputs
    an.model['Steps'][0]['Transformations'] = transformations
    an.push()

### face_switch + time_cum + speech + shot_change

In [582]:
all_speech_shot_name = '+'.join(hrf_vars)
hrf_vars = ['speech', 'shot_change','face_switch', 'log_mean_face_time_cum']
transformations = [
    {
        'Input': ['log_mean_time_since', 'first_time_face'],
        'Name': 'ToDense',
        'SamplingRate': 2
        
    },
    {
        'Input': ['log_mean_time_since'],
        'Name': 'Threshold',
        'Threshold': 0.5,
        'Binarize': True,        
    },
    {
        'Input': ['log_mean_time_since', 'first_time_face'],
        'Output': 'face_switch',
        'Name': 'Or',
    },
    {
        'Input': hrf_vars,
        'Name': 'Convolve'
    }
]

input_preds = ['first_time_face', 'log_mean_time_since', 'log_mean_face_time_cum', 'shot_change', 'speech']
# mdict[all_speech_shot_name] = create_set_models(
#     predictors=input_preds, confounds=confounds, name=all_speech_shot_name, transformations=transformations)

In [583]:
for analysis in mdict[all_speech_shot_name]:
    an = analysis['analysis']
    an.model['Steps'][0]['Model']['X'] = hrf_vars + confounds
    an.model['Steps'][0]['DummyContrasts']['Conditions'] = hrf_vars
    an.model['Steps'][0]['Transformations'] = transformations
    an.push()

### anyfaces + above

In [584]:
hrf_vars = ['any_faces', 'speech', 'shot_change','face_switch', 'log_mean_face_time_cum']
name = '+'.join(hrf_vars)
transformations = [
    {
        'Input': ['log_mean_time_since', 'first_time_face'],
        'Name': 'ToDense',
        'SamplingRate': 2
        
    },
    {
        'Input': ['log_mean_time_since'],
        'Name': 'Threshold',
        'Threshold': 0.5,
        'Binarize': True,        
    },
    {
        'Input': ['log_mean_time_since', 'first_time_face'],
        'Output': 'face_switch',
        'Name': 'Or',
    },
    {
        'Input': hrf_vars,
        'Name': 'Convolve'
    }
]

input_preds = ['any_faces', 'first_time_face', 'log_mean_time_since', 'log_mean_face_time_cum', 'shot_change', 'speech']
# mdict[name] = create_set_models(
#     predictors=input_preds, confounds=confounds, name=name, transformations=transformations)

for analysis in mdict[name]:
    an = analysis['analysis']
    an.model['Steps'][0]['Model']['X'] = hrf_vars + confounds
    an.model['Steps'][0]['DummyContrasts']['Conditions'] = hrf_vars
    an.model['Steps'][0]['Transformations'] = transformations
    an.push()

### Notes:

Model w/ too many correlated predictors was less consistent across datasets


Models to run:


- "back up" plan: pairwise models including speech, and face_switch + shot change

- face_switch + time_since + first_timeface (lower priority)


## any_faces + speech + shot_change

In [585]:
hrf_vars = ['any_faces', 'speech', 'shot_change']
name = '+'.join(hrf_vars)

# mdict[name] = create_set_models(
#     predictors=hrf_vars, confounds=confounds, name=name)

## any_faces + speech + shot_change + face_switch

In [589]:
hrf_vars = ['any_faces', 'speech', 'shot_change','face_switch']
name = '+'.join(hrf_vars)
transformations = [
    {
        'Input': ['log_mean_time_since', 'first_time_face'],
        'Name': 'ToDense',
        'SamplingRate': 2
        
    },
    {
        'Input': ['log_mean_time_since'],
        'Name': 'Threshold',
        'Threshold': 0.5,
        'Binarize': True,        
    },
    {
        'Input': ['log_mean_time_since', 'first_time_face'],
        'Output': 'face_switch',
        'Name': 'Or',
    },
    {
        'Input': hrf_vars,
        'Name': 'Convolve'
    }
]

input_preds = ['any_faces', 'first_time_face', 'log_mean_time_since', 'shot_change', 'speech']
# mdict[name] = create_set_models(
#     predictors=input_preds, confounds=confounds, name=name, transformations=transformations)

for analysis in mdict[name]:
    an = analysis['analysis']
    an.model['Steps'][0]['Model']['X'] = hrf_vars + confounds
    an.model['Steps'][0]['DummyContrasts']['Conditions'] = hrf_vars
    an.model['Steps'][0]['Transformations'] = transformations
    an.push()

### any_faces + speech

In [4]:
hrf_vars = ['any_faces', 'speech']
name = '+'.join(hrf_vars)

# mdict[name] = create_set_models(predictors=hrf_vars, confounds=confounds, name=name)

In [72]:
for di in mdict[name]:
    sub_id = api.runs.get(di['analysis'].runs[0])['subject']
    run_ids = [r['id'] for r in api.runs.get(subject=sub_id)]
    run_ids = [r for r in run_ids if r in di['analysis'].runs]
    di['analysis'].generate_report(run_id=run_ids)

In [73]:
corrs = []
for di in mdict[name]:
    dms = di['analysis'].get_report()['result']['design_matrix']
    for dm in dms:
        df = pd.read_csv(dm)
        corrs.append(df['any_faces'].corr(df['speech']))

In [75]:
min(corrs)

-0.5569070716274516

In [76]:
max(corrs)

0.5726311513890121

In [78]:
import numpy as np

In [79]:
np.mean(corrs)

0.1891768047871348

In [115]:
for analysis_dict in mdict[name]:
    analysis = analysis_dict['analysis']
    if analysis.get_status()['status'] in 'DRAFT':
        analysis.compile()
    else:
        print(f"{analysis_dict['dataset']}, {analysis_dict['task']}, {analysis.status}, {analysis.hash_id}")

Raiders, raiders, PENDING, spcpc
SchematicNarrative, perception, PENDING, n7v25
SherlockMerlin, MerlinMovie, PASSED, bju9h
SherlockMerlin, SherlockMovie, PASSED, 9rup3
Sherlock, sherlockPart1, PASSED, hvdjn
LearningTemporalStructure, movie, PENDING, fcwn8
Budapest, movie, PENDING, 4p9ps
NaturalisticNeuroimagingDatabase, 12yearsaslave, PASSED, 83x35
NaturalisticNeuroimagingDatabase, 500daysofsummer, PASSED, pr9jk
NaturalisticNeuroimagingDatabase, backtothefuture, PASSED, 54hrh
NaturalisticNeuroimagingDatabase, citizenfour, PASSED, j4tmp
NaturalisticNeuroimagingDatabase, littlemisssunshine, PASSED, r64jf
NaturalisticNeuroimagingDatabase, pulpfiction, PASSED, ay2pw
NaturalisticNeuroimagingDatabase, split, PASSED, fmkdi
NaturalisticNeuroimagingDatabase, theprestige, PASSED, zy9sp
NaturalisticNeuroimagingDatabase, theshawshankredemption, PASSED, qsevk
NaturalisticNeuroimagingDatabase, theusualsuspects, PASSED, q58ai


In [122]:
' '.join([a['hash_id'] for a in mdict[name] if a['dataset'] != 'NaturalisticNeuroimagingDatabase'])

'spcpc n7v25 bju9h 9rup3 hvdjn fcwn8 4p9ps'

In [123]:
' '.join([a['hash_id'] for a in mdict[name] if a['dataset'] == 'NaturalisticNeuroimagingDatabase'])

'83x35 pr9jk 54hrh j4tmp r64jf ay2pw fmkdi zy9sp qsevk q58ai'

### any_faces + speech + face_time_cum (includes NNDB)

In [32]:
hrf_vars = ['any_faces', 'speech', 'log_mean_face_time_cum']
name = '+'.join(hrf_vars)

# mdict[name] = create_set_models(predictors=hrf_vars, confounds=confounds, name=name)

In [51]:
for analysis_dict in mdict[name]:
    analysis = analysis_dict['analysis']
    if analysis.get_status()['status'] in 'DRAFT':
        analysis.compile()
    else:
        print(f"{analysis_dict['dataset']}, {analysis_dict['task']}, {analysis.status}, {analysis.hash_id}")

Raiders, raiders, PASSED, a9b39
SchematicNarrative, perception, PASSED, s4iks
SherlockMerlin, MerlinMovie, PASSED, 85tf7
SherlockMerlin, SherlockMovie, PASSED, h4bs4
Sherlock, sherlockPart1, PASSED, 5gneb
LearningTemporalStructure, movie, PASSED, 3i8ro
Budapest, movie, PASSED, osho8
NaturalisticNeuroimagingDatabase, 12yearsaslave, PASSED, ci2ha
NaturalisticNeuroimagingDatabase, 500daysofsummer, PASSED, 978ux
NaturalisticNeuroimagingDatabase, backtothefuture, PASSED, 327hp
NaturalisticNeuroimagingDatabase, citizenfour, PASSED, 4b8bj
NaturalisticNeuroimagingDatabase, littlemisssunshine, PASSED, qpzch
NaturalisticNeuroimagingDatabase, pulpfiction, PASSED, 8c62p
NaturalisticNeuroimagingDatabase, split, PASSED, 6quaw
NaturalisticNeuroimagingDatabase, theprestige, PASSED, 6bvcp
NaturalisticNeuroimagingDatabase, theshawshankredemption, PASSED, npcgr
NaturalisticNeuroimagingDatabase, theusualsuspects, PASSED, 4y66v


In [79]:
' '.join([an['hash_id'] for an in mdict[name]
          if an['analysis'].get_status()['status'] == 'PASSED' and not an['analysis'].get_uploads()
         and len(an['analysis'].runs) > 30])

''

In [83]:
' '.join([an['hash_id'] for an in mdict[name]
          if an['analysis'].get_status()['status'] == 'PASSED' and not an['analysis'].get_uploads()
         and an['dataset'] == 'NaturalisticNeuroimagingDatabase' and len(an['analysis'].runs) < 7])

'6quaw 4y66v'

In [84]:
' '.join([an['hash_id'] for an in mdict[name]
          if an['analysis'].get_status()['status'] == 'PASSED' and not an['analysis'].get_uploads()
         and an['dataset'] == 'NaturalisticNeuroimagingDatabase' and len(an['analysis'].runs) > 16])

''

In [82]:
' '.join([an['hash_id'] for an in mdict[name]
          if an['analysis'].get_status()['status'] == 'PASSED' and not an['analysis'].get_uploads()
         and len(an['analysis'].runs) < 30 and an['dataset'] != 'NaturalisticNeuroimagingDatabase'])

''

### any_faces + shot_change + speech + time_cum

In [27]:
hrf_vars = ['any_faces', 'speech', 'shot_change','log_mean_face_time_cum']
name = '+'.join(hrf_vars)

# mdict[name] = create_set_models(
#     predictors=hrf_vars, confounds=confounds, name=name)

In [602]:
for analysis_dict in mdict[name]:
    analysis = analysis_dict['analysis']
    if analysis.get_status()['status'] in 'DRAFT':
        analysis.compile()
    else:
        print(f"{analysis_dict['dataset']}, {analysis_dict['task']}, {analysis.status}, {analysis.hash_id}")

Budapest, PASSED, h57v2
LearningTemporalStructure, PASSED, oyndv
Raiders, PASSED, ggic7
SchematicNarrative, PASSED, n3pj7
Sherlock, PASSED, nyb95
SherlockMerlin, PASSED, dci4u
SherlockMerlin, PASSED, ovbpo


In [603]:
' '.join([an['hash_id'] for an in mdict[name]
          if an['analysis'].get_status()['status'] == 'PASSED' and not an['analysis'].get_uploads()
         and len(an['analysis'].runs) > 30])

''

In [604]:
' '.join([an['hash_id'] for an in mdict[name]
          if an['analysis'].get_status()['status'] == 'PASSED' and not an['analysis'].get_uploads()
         and len(an['analysis'].runs) < 30])

''