In [5]:
"""
first step in forward modeling analysis - getting the data from NeuroVault,
    labeled with cognitive concepts from the Cognitive Atlas

"""

from cognitiveatlas.api import get_task, get_concept

from pybraincompare.compare.maths import TtoZ
from pybraincompare.mr.datasets import get_standard_brain
from nilearn.image import resample_img
from pyneurovault import api

from glob import glob
from utils import (
   get_base, get_pwd, make_dirs
)

import nibabel
import numpy
import os
import pandas
import shutil
import sys

if sys.version_info < (3, 0):
    from exceptions import ValueError

# Get the base and present working directory
base = get_base()
here = get_pwd()

data_directory = os.path.abspath("%s/data" %(here))
results_directory = os.path.abspath("%s/results" %(here))

folders = [data_directory,results_directory]
make_dirs(folders)

# Get all collections
collections = api.get_collections()

# Filter images to those that have a DOI
collections = collections[collections.DOI.isnull()==False]

# Useless, but might as well save it
collections.to_csv("%s/collections_with_dois.tsv" %(results_directory),encoding="utf-8",sep="\t")


# Get image meta data for collections
images = api.get_images(collection_pks=collections.collection_id.tolist())

# load list of included image IDs (curated by Poldracklab) and exclude others
curated_images = pandas.read_csv('%s/included_images.csv' %(data_directory),header=None,names=['image_id'])
images = images.loc[images['image_id'].isin(curated_images['image_id'])]

## the following won't really have any effect, since the filtering
## is done by the explicit list above, but I'm leaving them
## to make it clear how the filtering was initially done
## however, if any of these criteria change in the neurovault
## database (modality, MNI, thresholded description, cognitive atlas task)
## this list could be filtered further

# Get rid of any not in MNI
images = images[images.not_mni == False]

# Get rid of thresholded images
images = images[images.is_thresholded == False]

# Remove single subject maps
images = images[images.analysis_level!='single-subject']
images = images[images.number_of_subjects!=1]

# Remove non fmri-BOLD
images = images[images.modality=='fMRI-BOLD']

# We can't use Rest or other/none
images = images[images.cognitive_paradigm_cogatlas_id.isnull()==False]
images = images[images.cognitive_paradigm_cogatlas.isin(["None / Other","rest eyes closed","rest eyes open"])==False]

# Limit to Z and T maps (all are Z and T)
z = images[images.map_type == "Z map"]
t = images[images.map_type == "T map"]

# Remove tmaps that do not have # subjects defined
t = t[t.number_of_subjects.isnull()==False]
images = z.append(t)

# Download images
standard = os.path.abspath("%s/mr/MNI152_T1_2mm_brain.nii.gz" %(here))
api.download_images(dest_dir=data_directory,images_df=images,target=standard)

# For T images, convert to Z. NeuroVault outputs two folders - original and resampled
resampled_dir = "%s/resampled" %(data_directory)

# We need to select a subset of the images, just the T Maps from the set
tmaps = [ "%s/%06d.nii.gz" %(resampled_dir,x) for x in t.image_id.tolist()]

# We need degrees of freedom to convert properly to Zstat maps.
dofs = []
for row in t.iterrows():
    dof = row[1].number_of_subjects -2
    dofs.append(dof)

# We will move converted Z maps, and as is Z maps, to a common folder
outfolder_z = "%s/resampled_z" %(data_directory)
make_dirs(outfolder_z)

for tt in range(0,len(tmaps)):
    tmap = tmaps[tt]
    dof = dofs[tt]
    zmap_new = "%s/%s" %(outfolder_z,os.path.split(tmap)[-1])
    if os.path.exists(zmap_new):
        continue
    if os.path.exists(tmap):
        TtoZ(tmap,output_nii=zmap_new,dof=dof)
    else:
        print('skipping due to missing tmap:%s'%tmap)

# Copy all (already) Z maps to the folder
zmaps = [ "%s/%06d.nii.gz" %(resampled_dir,x) for x in z.image_id.tolist()]
for zmap in zmaps:
    if not os.path.exists(zmap):
        print('skipping missing zmap: %s'%zmap)
        continue
    zmap_new = "%s/%s" %(outfolder_z,os.path.split(zmap)[-1])
    shutil.copyfile(zmap,zmap_new)

if len(glob("%s/*.nii.gz" %(outfolder_z))) != images.shape[0]:
    raise ValueError("ERROR: not all images were found in final folder %s" %(outfolder_z))

# We will actually need this one.
images.to_csv("%s/filtered_contrast_images.tsv" %(results_directory),encoding="utf-8",sep="\t")

# Give the user a warning if the number of images is different
if images.shape[0] != 93:
    print("Warning, the original analysis had 93 images, and this number has changed to %s." %(images.shape[0]))

# Finally, resample images to 4mm voxel for classification analysis
outfolder_z4mm = "%s/resampled_z_4mm" %(data_directory)
make_dirs(outfolder_z4mm)

maps = glob("%s/*.nii.gz" %(outfolder_z))
for mr in maps:
    image_name = os.path.basename(mr)
    print("Resampling %s to 4mm..." %(image_name))
    nii = nibabel.load(mr)
    nii_resamp = resample_img(nii,target_affine=numpy.diag([4,4,4]))
    nibabel.save(nii_resamp,"%s/%s" %(outfolder_z4mm,image_name))


BASE project directory is defined as -f
Creating directory /home/mjfang/forward-modeling-cognitive-concepts/results 
Extracting NeuroVault collections meta data...
http://neurovault.org/api/collections/?limit=100&format=json
Found 921 results.
Retrieving http://neurovault.org/api/collections/?format=json&limit=100&offset=100
Retrieving http://neurovault.org/api/collections/?format=json&limit=100&offset=200
Retrieving http://neurovault.org/api/collections/?format=json&limit=100&offset=300
Retrieving http://neurovault.org/api/collections/?format=json&limit=100&offset=400
Retrieving http://neurovault.org/api/collections/?format=json&limit=100&offset=500
Retrieving http://neurovault.org/api/collections/?format=json&limit=100&offset=600
Retrieving http://neurovault.org/api/collections/?format=json&limit=100&offset=700
Retrieving http://neurovault.org/api/collections/?format=json&limit=100&offset=800
Retrieving http://neurovault.org/api/collections/?format=json&limit=100&offset=900
Extractin

  "The behaviour of affine_transform with a one-dimensional "


Downloading /home/mjfang/forward-modeling-cognitive-concepts/data/original/0177.nii.gz
Extrapolating /home/mjfang/forward-modeling-cognitive-concepts/data/original/0177.nii.gz
Resampling /home/mjfang/forward-modeling-cognitive-concepts/data/original/0177.nii.gz
Downloading /home/mjfang/forward-modeling-cognitive-concepts/data/original/0178.nii.gz
Extrapolating /home/mjfang/forward-modeling-cognitive-concepts/data/original/0178.nii.gz
Resampling /home/mjfang/forward-modeling-cognitive-concepts/data/original/0178.nii.gz
Downloading /home/mjfang/forward-modeling-cognitive-concepts/data/original/0183.nii.gz
Extrapolating /home/mjfang/forward-modeling-cognitive-concepts/data/original/0183.nii.gz
Resampling /home/mjfang/forward-modeling-cognitive-concepts/data/original/0183.nii.gz
Downloading /home/mjfang/forward-modeling-cognitive-concepts/data/original/0135.nii.gz
Extrapolating /home/mjfang/forward-modeling-cognitive-concepts/data/original/0135.nii.gz
Resampling /home/mjfang/forward-modeli



Downloading /home/mjfang/forward-modeling-cognitive-concepts/data/original/2714.nii.gz
Extrapolating /home/mjfang/forward-modeling-cognitive-concepts/data/original/2714.nii.gz
Resampling /home/mjfang/forward-modeling-cognitive-concepts/data/original/2714.nii.gz
Downloading /home/mjfang/forward-modeling-cognitive-concepts/data/original/10924.nii.gz
Extrapolating /home/mjfang/forward-modeling-cognitive-concepts/data/original/10924.nii.gz
Resampling /home/mjfang/forward-modeling-cognitive-concepts/data/original/10924.nii.gz
Downloading /home/mjfang/forward-modeling-cognitive-concepts/data/original/0003.nii.gz
Extrapolating /home/mjfang/forward-modeling-cognitive-concepts/data/original/0003.nii.gz
Resampling /home/mjfang/forward-modeling-cognitive-concepts/data/original/0003.nii.gz
Downloading /home/mjfang/forward-modeling-cognitive-concepts/data/original/0004.nii.gz
Extrapolating /home/mjfang/forward-modeling-cognitive-concepts/data/original/0004.nii.gz
Resampling /home/mjfang/forward-mod

In [6]:
"""
second step in forward modeling analysis - generating data structures to look up associated Cognitive Atlas terms
    with the images downloaded in 0.neurovault_images.py

"""

from cognitiveatlas.api import get_concept

from glob import glob

import numpy
import os
import pandas
import pickle
import re
import sys

from utils import (
   get_base, get_pwd, make_dirs
)


# Get the base and present working directory
base = get_base()
here = get_pwd()

data = os.path.abspath("%s/data" %(here))
results = os.path.abspath("%s/results" %(here))

# Read in images metadata
images = pandas.read_csv("%s/filtered_contrast_images.tsv" %results,sep="\t",index_col=0)

unique_concepts = dict()
for row in images.iterrows():
    idx = row[1].image_id
    # There is a bug with getting contrasts for these two images - these I manually looked up (@vsoch):
    if idx == 109:
        unique_concepts[idx] = ["trm_567982752ff4a","trm_4a3fd79d0afcf","trm_5534111a8bc96",
                                "trm_557b48a224b95","trm_557b4a81a4a17","trm_4a3fd79d0b64e","trm_4a3fd79d0a33b",
                                "trm_557b4a7315f1b","trm_4a3fd79d0af71","trm_557b4b56de455","trm_557b4add1837e"]
    elif idx == 118:
        unique_concepts[idx] = ["trm_4a3fd79d0b642","trm_4a3fd79d0a33b","trm_557b4a7315f1b","trm_4a3fd79d0af71",
                                "trm_557b4b56de455"]
    else:
        contrast = row[1].cognitive_contrast_cogatlas_id
        concepts = get_concept(contrast_id=contrast)
        concepts = numpy.unique(concepts.pandas.id).tolist() 
        unique_concepts[idx] = concepts
    
all_concepts = []
for image_id,concepts in unique_concepts.items():
    for concept in concepts:
        if concept not in all_concepts:
            all_concepts.append(concept)


res = {"all_concepts":all_concepts,"unique_concepts":unique_concepts,"images":images}

## STEP 1: GENERATE IMAGE BY CONCEPT DATA FRAME
concept_df = pandas.DataFrame(0,columns=all_concepts,index=images.image_id.unique().tolist())
for image_id,concepts in unique_concepts.items():
    concept_df.loc[image_id,concepts] = 1   

res["concept_df"] = concept_df
pickle.dump(res,open("%s/concepts.pkl" %results,"wb"))
concept_df.to_csv("%s/concepts_binary_df.tsv" %results,sep="\t")

## STEP 2: Generate image lookup
image_folder = "%s/resampled_z_4mm" %(data)
files = glob("%s/*.nii.gz" %image_folder)

if len(files) == 0:
    print(("Error, did not find image files in %s. Did you generate them with 0.neurovault_images.py?" %(image_folder)))
else:
    lookup = dict()
    for f in files:
        image_id = int(os.path.basename(f).strip(".nii.gz"))
        if image_id in concept_df.index:
            lookup[image_id] = f
        else:
            print(("Cannot find image %s in concept data frame" %(image_id)))

    pickle.dump(lookup,open("%s/image_nii_lookup.pkl" %results,"wb"))


BASE project directory is defined as -f
http://cognitiveatlas.org/api/v-alpha/concept?contrast_id=cnt_553e7a5b2584d
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?contrast_id=cnt_5531474e117df
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?contrast_id=cnt_55314757a78a1
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?contrast_id=cnt_55314a1a5514c
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?contrast_id=cnt_55315226076d3
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?contrast_id=cnt_55315270d1e14
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?contrast_id=cnt_55315278ea6cc
Result Includes:<pandas:data frame><js

In [None]:
"""
permutation (third) step in forward modeling analysis - building a forward model, testing with 2 images held out. This is the submission script to run the analysis, submitting to a SLURM cluster. You MUST edit the last line of this script for your particular submission command. If you do not use this kind of cluster, you should edit the end of the script (where submission occurs) to fit your format.

  Classification framework
  for image1 in all images:
     for image2 in allimages:
         if image1 != image2:
             hold out image 1 and image 2, generate regression parameter matrix using other images
             generate predicted image for image 1 [PR1]
             generate predicted image for image 2 [PR2]
             classify image 1 as fitting best to PR1 or PR2
             classify image 2 as fitting best to PR1 or PR2
"""

import os
import pandas
import sys

from utils import (
   get_base, get_pwd, make_dirs
)

# VARIABLES FOR SLURM
max_runtime="2-00:00"                    # Two days. Each script needs ~10-15 minutes, 30 is recommended for buffer
memory="32000"                           # 16000 might also work
submission_command="sbatch"                  # Your cluster submission command, eg sbatch, qsub
submission_args="-p russpold --qos russpold" # Does not need spaces to left and right


# Get the base and present working directory
base = get_base()
here = get_pwd()

data = os.path.abspath("%s/data" %(base))
results = os.path.abspath("%s/results" %(base))
output_folder = "%s/permutations" %results  

# Make the output directory
make_dirs(output_folder,reason="for permutation results.")

# Images by Concepts data frame
labels_tsv = "%s/concepts_binary_df.tsv" %results
images = pandas.read_csv(labels_tsv,sep="\t",index_col=0)
image_lookup = "%s/image_nii_lookup.pkl" %results

# We will need these folders to exist for job and output files
log_folders = ["%s/.out" %here,"%s/.job" %here]
make_dirs(log_folders)    

# Image metadata with number of subjects included
contrast_file = "%s/filtered_contrast_images.tsv" %results

for image1_holdout in images.index.tolist():
    print "Parsing %s" %(image1_holdout)
    for image2_holdout in images.index.tolist():
        if (image1_holdout != image2_holdout) and (image1_holdout < image2_holdout):
            output_file = "%s/%s_%s_perform.pkl" %(output_folder,image1_holdout,image2_holdout)
            if not os.path.exists(output_file):
                
                
                
#                 job_id = "%s_%s" %(image1_holdout,image2_holdout)
#                 filey = "%s/.job/class_%s.job" %(here,job_id)
#                 filey = open(filey,"w")
#                 filey.writelines("#!/bin/bash\n")
#                 filey.writelines("#SBATCH --job-name=%s\n" %(job_id))
#                 filey.writelines("#SBATCH --output=%s/.out/%s.out\n" %(here,job_id))
#                 filey.writelines("#SBATCH --error=%s/.out/%s.err\n" %(here,job_id))
#                 filey.writelines("#SBATCH --time=%s\n" %(max_runtime))
#                 filey.writelines("#SBATCH --mem=%s\n" %(memory))
#                 filey.writelines("python %s/2.encoding_regression_performance.py %s %s %s %s %s %s" %(here, 
#                                                                                                       image1_holdout, 
#                                                                                                       image2_holdout, 
#                                                                                                       output_file, 
#                                                                                                       labels_tsv, 
#                                                                                                       image_lookup, 
#                                                                                                       contrast_file))
#                 filey.close()
#                 os.system("%s %s " + "%s/.job/class_%s.job" %(submission_command,
#                                                               submission_args,
#                                                               here,
#                                                               job_id))
