In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set("paper", "white")
from pyns import Neuroscout
import math

api = Neuroscout()

In [102]:
datasets = api.datasets.get()
print(f'dataset count = {len(api.datasets.get())}\n')
print('Dataset Name, ID, n_tasks, n_runs:\n')
dataset_ids = []
dataset_names = []
dataset_n_runs = []
dataset_tasks = []
for i in datasets:
    dataset_ids.append(i['id'])
    dataset_names.append(i['name'])
    dataset_tasks.append(len(i['tasks']))
    dataset_n_runs.append(i['tasks'][0]['n_runs_subject'])
    print(i['name'], i['id'], len(i['tasks']), i['tasks'][0]['n_runs_subject'])

dataset count = 12

Dataset Name, ID, n_tasks, n_runs:

Budapest 27 1 5
HealthyBrainNetwork 8 1 1
SchematicNarrative 20 1 4
studyforrest 11 1 8
Raiders 10 1 8
Life 9 1 4
ParanoiaStory 18 1 3
Sherlock 21 1 1
SherlockMerlin 5 2 1
LearningTemporalStructure 19 1 3
ReadingBrainProject 29 1 5
NaturalisticNeuroimagingDatabase 28 10 1


In [103]:
dataset_tasks

[1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 10]

In [7]:
def get_predictors(run_id):
    # input: a neuroscout run_id 
    # outputs:
    # - a pandas dataframe of predictors
    # - list of ids
    # - list of names
    # - list of modality
    predictors=api.predictors.get(run_id=run_id)
    predictor_ids = []
    predictor_names = []
    predictor_modality = []
    for i in predictors:
        if not i['source'] == 'fmriprep' and not i['mean'] == None and str(i['name']).find("bert") < 0:
            predictor_ids.append(i['id'])
            predictor_names.append(i['name'])
            try:
                predictor_modality.append(i['extracted_feature']['modality'])
            except:
                predictor_modality.append(None)
                
    df_predictors=pd.DataFrame(data= np.array([predictor_ids,predictor_modality,predictor_names]).T , columns=['id','modality','names'])
    df_predictors = df_predictors.sort_values(by=['id','names','modality'])
    predictor_ids= df_predictors['id'].to_numpy()
    predictor_names= df_predictors['names'].to_numpy()
    predictor_modality= df_predictors['modality'].to_numpy()



    return(df_predictors, predictor_ids, predictor_names, predictor_modality)

## which predictors are common to all av clips? (except NND)

In [104]:
#dataset_ids = [27,28,8,20,11,10,9,21,5]
#dataset_names = ['Budapest','NaturalisticNeuroimagingDatabase','HealthyBrainNetwork','SchematicNarrative','studyforrest','Raiders','Life','Sherlock','SherlockMerlin']
dataset_ids = [27,8,20,11,10,9,21,5]
dataset_names = ['Budapest','HealthyBrainNetwork','SchematicNarrative','studyforrest','Raiders','Life','Sherlock','SherlockMerlin']
dataset_tasks = [1, 1, 1, 1, 1, 1, 1, 2]
dataset_n_runs = [5, 1, 4, 8, 8, 4, 1, 1]

In [9]:
predictor_list_list = []
predictor_names_list = []
for i,d in enumerate(dataset_ids):
    subject = api.runs.get(dataset_id=d)[0]['subject']
    #print(subject)
    run_id=api.runs.get(dataset_id=d, subject=subject)[0]['id']
    #print(run_id)
    run_duration=api.runs.get(dataset_id=d, subject=subject)[0]['duration']
    
    df_predictors, predictor_ids, predictor_names, predictor_modality = get_predictors(run_id)
    predictor_list_list.append(predictor_ids)
    predictor_names_list.append(predictor_names)

In [11]:
predictor_names_intersect = predictor_names_list[0].copy()

for i in predictor_names_list[1:]:
    predictor_names_intersect = np.intersect1d(predictor_names_intersect,i)
   # print(predictor_names_intersect)
predictor_names_intersect

array(['abstract', 'action', 'alphabet', 'animal', 'architecture', 'art',
       'as-Alarm', 'as-Animal', 'as-Engine', 'as-Explosion', 'as-Fire',
       'as-Glass', 'as-Hands', 'as-Liquid', 'as-Mechanisms', 'as-Music',
       'as-Musical instrument', 'as-Noise', 'as-Silence', 'as-Speech',
       'as-Thunderstorm', 'as-Tools', 'as-Vehicle', 'as-Water',
       'as-Whistling', 'as-Wild animals', 'as-Wind', 'as-Wood', 'blur',
       'brightness', 'building', 'business', 'car', 'child',
       'chroma_cqt_0', 'chroma_cqt_1', 'chroma_cqt_10', 'chroma_cqt_11',
       'chroma_cqt_2', 'chroma_cqt_3', 'chroma_cqt_4', 'chroma_cqt_5',
       'chroma_cqt_6', 'chroma_cqt_7', 'chroma_cqt_8', 'chroma_cqt_9',
       'city', 'color', 'competition', 'creativity', 'dark', 'daylight',
       'design', 'desktop', 'empty', 'equipment', 'face', 'family',
       'fashion', 'furniture', 'girl', 'hand', 'home', 'horizontal',
       'illustration', 'image', 'indoors', 'landscape', 'light', 'man',
       'mel_0', 

## get these features from all datasets

In [124]:
api.datasets.get(8)['tasks'][0]['id']


7

In [109]:
t=1
datasets[8]['tasks'][t]['id']

45

In [128]:
for i,ds_id in enumerate(dataset_ids):
    ds_name = dataset_names[i]
    ds_n_runs = dataset_n_runs[i]
    ds_n_tasks = dataset_tasks[i]
    subject = api.runs.get(dataset_id = dataset_ids[0])[0]['subject']
    
    #loop through tasks (eg sherlock merlin)
    #if ds_n_tasks > 1:
    for t in np.arange(ds_n_tasks):
        task_id = api.datasets.get(ds_id)['tasks'][t]['id']

        #loop through runs (eg budapest)
        if ds_n_runs>1:
            for n in np.arange(1,ds_n_runs+1):    
                print(ds_name,n)
                run_id=api.runs.get(dataset_id=ds_id, subject=subject,task_id=task_id,number=n)[0]['id']
                run_duration=api.runs.get(dataset_id=ds_id, subject=subject,number=n)[0]['duration']
                print(ds_name,t,n)
        else:
            run_id=api.runs.get(dataset_id=ds_id, subject=subject,task_id=task_id)[0]['id']
            run_duration=api.runs.get(dataset_id=ds_id, subject=subject)[0]['duration']
            print(ds_name,t,0)

#     #
#     else:
#         runs = api.runs.get(dataset_id=ds_id, subject=subject)

        
        
#         task_list = []
#         task_names = []
#         run_ids = []
#         for r in runs:
#             task_list.append(r['task'])
#             task_names.append(r['task_name'])
#             run_ids.append(r['id'])
#         task_list,ind = np.unique(np.asarray(task_list),return_index=True)

#         task_name_list = []
#         run_id_list = []
#         for i in ind:
#             task_name_list.append(task_names[i])
#             run_id_list.append(run_ids[i])
#         print(task_list)
#         print(task_name_list)
#         print(run_id_list)

Budapest 1
Budapest 0 1
Budapest 2
Budapest 0 2
Budapest 3
Budapest 0 3
Budapest 4
Budapest 0 4
Budapest 5
Budapest 0 5


IndexError: list index out of range

In [None]:
all_feats = []
for number in np.arange(5):
    number+=1
    run_id=api.runs.get(dataset_id=27, subject=subject,number=number)[0]['id']
    run_duration=api.runs.get(dataset_id=27, subject=subject,number=number)[0]['duration']
    df_predictors, predictor_ids, predictor_names, predictor_modality = get_predictors(run_id)
    feats = get_timeseries(predictor_ids,run_id,run_duration)
    all_feats.append(feats)