In [1]:
#Goal: Given fMRI scan, predict concepts associated. 
#Train a binary classifier to predict whether a certain concept is
#associated w/ the scan, for each concept. (Leave-one-out?)

from pybraincompare.compare.maths import calculate_correlation
from pybraincompare.compare.mrutils import get_images_df
from pybraincompare.mr.datasets import get_standard_mask
from pybraincompare.mr.transformation import *
import pandas
from sklearn.naive_bayes import GaussianNB
import matplotlib.pyplot as plt
import os
import cPickle as pickle
from sklearn.svm import SVC
from utils import (
   get_base, get_pwd, make_dirs
)
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.linear_model import LogisticRegression
import numpy as np
from cognitiveatlas.api import get_concept
from sklearn.metrics import hamming_loss

base = "/home/mjfang/forward-modeling-cognitive-concepts"
results = os.path.abspath("%s/results" %(base))

labels_tsv = "%s/concepts_binary_df.tsv" %results
contrast_file = "%s/filtered_contrast_images.tsv" %results
image_lookup = "%s/image_nii_lookup.pkl" %results

Y = pandas.read_csv(labels_tsv,sep="\t",index_col=0)
image_df = pandas.read_csv(contrast_file,sep="\t",index_col=0)
image_df.index = image_df.image_id

# We should standardize cognitive concepts before modeling
# http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
# scaled = pandas.DataFrame(StandardScaler().fit_transform(X))
# scaled.columns = X.columns
# scaled.index = X.index
# X = scaled


# Dictionary to look up image files (4mm)
lookup = pickle.load(open(image_lookup,"rb"))

# Get standard mask, 4mm
standard_mask=get_standard_mask(4)

# We will save data to dictionary
result = dict()

concepts = Y.columns.tolist()

# We will go through each voxel (column) in a data frame of image data
image_paths = lookup.values()
mr = get_images_df(file_paths=image_paths,mask=standard_mask)
image_ids = [int(os.path.basename(x).split(".")[0]) for x in image_paths]
mr.index = image_ids

 
norm = pandas.DataFrame(columns=mr.columns)

# Normalize the image data by number of subjects
#V* = V/sqrt(S) 
for row in mr.iterrows():
    subid = row[0]
    number_of_subjects = image_df.loc[subid].number_of_subjects.tolist()
    norm_vector = row[1]/numpy.sqrt(number_of_subjects)
    norm.loc[subid] = norm_vector
del mr

#maybe split this for SLURM

# for concept in concepts:
#     print concept




In [2]:
conceptToName = dict()
for concept in concepts:
    try:
        conceptToName[concept] = get_concept(concept).json[0]['name']
    except:
        print "invalid: ", concept

http://cognitiveatlas.org/api/v-alpha/concept?id=trm_4a3fd79d0af71
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_557b4817db34d
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_557b48f22ba99
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_557b4a7315f1b
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_557b4a913f8cc
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_557b4b47d994a
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_55ef153d47bc0
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_557b495cdde57
Result In

In [48]:
print conceptToName

{'trm_557b4b9ccdc4a': u'visual word recognition', 'trm_56798c5f25b0c': u'context representation', 'trm_4a3fd79d0b5a7': u'working memory', 'trm_55ef273a77a86': u'auditory tone perception', 'trm_557b493e4203a': u'positive feedback processing', 'trm_4a3fd79d0afcf': u'risk', 'trm_557b4b27dfd5e': u'visual form discrimination', 'trm_557b495cdde57': u'potential monetary reward', 'trm_557b471bc6cd8': u'animacy decision', 'trm_557b476527a27': u'auditory tone discrimination', 'trm_557b4b7e68727': u'visual place recognition', 'trm_557b493133416': u'place maintenance', 'trm_559f09a5cdca9': u'subjective food value', 'trm_557b4abe521af': u'spatial selective attention', 'trm_557b4b56de455': u'visual object detection', 'trm_558c7b707f606': u'left hand response execution', 'trm_558c7b14c076e': u'auditory sentence recognition', 'trm_5678a999f1c19': u'numerical comparison', 'trm_557b4817db34d': u'economic value processing', 'trm_4a3fd79d0b1b2': u'spatial attention', 'trm_4a3fd79d09c28': u'categorization'

In [47]:
names = []
for concept in concepts:
    if concept in conceptToName.keys():
        names.append(conceptToName[concept])
    else:
        names.append(concept)
#count rows
import pandas as pd
numPos = np.sum(Y.loc[image_ids, concepts].values, axis = 0) #num positive by concept

#co-occurrence matrix
cooccur = np.zeros((len(concepts), len(concepts)))
for img in image_ids:
    img_Y = Y.loc[img, :].values
    positives = np.where(img_Y == 1)[0]
#     print positives
    for i in range(len(positives)):
        for j in range(len(positives)):
            if i != j:
                cooccur[positives[i], positives[j]] += 1
print cooccur




pd.DataFrame(data=cooccur, index=names, columns=names).to_csv("cooccur.csv")


[[ 0.  1.  3. ...,  1.  0.  0.]
 [ 1.  0.  1. ...,  0.  0.  0.]
 [ 3.  1.  0. ...,  0.  0.  0.]
 ..., 
 [ 1.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  1.]
 [ 0.  0.  0. ...,  0.  1.  0.]]


In [11]:
result.json[0]

{u'alias': u'action selection, motor execution',
 u'concept_class': u'',
 u'def_event_stamp': u'2015-11-08 23:32:53',
 u'def_id': u'def_563fdba594d80',
 u'def_id_user': u'usr_51bc76ae9636c',
 u'definition_text': u'The selection of one action from a limited set of possible actions.',
 u'event_stamp': u'2009-06-22 19:12:56',
 u'id': u'trm_4a3fd79d0af71',
 u'id_concept_class': u'ctp_C7',
 u'id_user': u'usr_0000000000',
 u'name': u'response selection',
 u'relationships': [{u'direction': u'child',
   u'id': u'trm_559f0a129c136',
   u'relationship': u'part of'},
  {u'direction': u'parent',
   u'id': u'trm_4a3fd79d09735',
   u'relationship': u'part of'},
  {u'direction': u'parent',
   u'id': u'trm_4a3fd79d0a038',
   u'relationship': u'part of'}],
 u'type': u'concept'}

In [5]:
#Task description: we have 93? labels, we want to specify which labels are a part of which other ones 
#try 1: we'll just try to link each node with immediate relations to the others. Keep track of singletons that result. 
conceptSet = set(concepts)
conceptJSONs = []
invalid = []
for concept in concepts:
    try:
        conceptJSONs.append(get_concept(id=concept))
    except:
        conceptJSONs.append(None)
        invalid.append(concepts)
        

http://cognitiveatlas.org/api/v-alpha/concept?id=trm_4a3fd79d0af71
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_557b4817db34d
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_557b48f22ba99
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_557b4a7315f1b
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_557b4a913f8cc
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_557b4b47d994a
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_55ef153d47bc0
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_557b495cdde57
Result In

In [23]:
numConnected = 0
singletons = 0
for concept, conceptJSON in zip(concepts, conceptJSONs):
    isConnected = False
    res = conceptJSON.json[0]
    if res.has_key('relationships'):
        for relation in res['relationships']:
            if relation['id'] in conceptSet:
                isConnected = True
        if isConnected:
            numConnected += 1
    else:
        singletons += 1
print numConnected, len(concepts), singletons

36 92 1


In [3]:
# save json, 
# list of ids of parents. 

class HierarchyNode:
    def __init__(self, json):
        self.json = json
        self.parentsList = None
    #
    def getRelations(self, conceptSet, cache):
        #do BFS of parents and children to determine relations. Terminate when a child in the list is reached, or a parent in list is reached. 
        
        #first, the parents:
        parentsList = [] #list of ids
        seenSet = set()
        parentQueue = []
        parentQueue.append(self.json)
        seenSet.add(self.json['id'])
        while parentQueue:
            currJson = parentQueue.pop(0)
            if currJson.has_key('relationships'):
                for relation in currJson['relationships']:
                    if relation['direction'] == 'parent':
                        if relation['id'] in conceptSet:
                            parentsList.append(relation['id'])
                        else:
                            invalid = False
                            if relation['id'] not in cache.keys():
                                try:
                                    res = get_concept(id=relation['id']).json[0]
                                    cache[relation['id']] = res
                                except:
                                    print "invalid id: ", relation['id']
                                    invalid = True
                            if not invalid:
                                if relation['id'] not in seenSet:
                                    parentQueue.append(cache[relation['id']])
                                    seenSet.add(relation['id'])
        self.parentsList = parentsList
        

In [10]:
cache = dict()
for concept, conceptJSON in zip(concepts, conceptJSONs):
    if conceptJSON == None:
        cache[concept] = None
    else:
        cache[concept] = conceptJSON.json[0]
    
nodes = []
conceptToNodes = {}

for concept in concepts:
    if cache[concept] == None:
        continue
    node = HierarchyNode(cache[concept])
    node.getRelations(conceptSet, cache)
    nodes.append(node)
    conceptToNodes[concept] = node

http://cognitiveatlas.org/api/v-alpha/concept?id=trm_4a3fd79d09735
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_4a3fd79d0a038
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_4a3fd79d0b607
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_4a3fd79d0b613
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_4aae62e4ad209
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_558c736199abd
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_4a3fd79d0a723
Result Includes:<pandas:data frame><json:dict><txt:str><url:str>
http://cognitiveatlas.org/api/v-alpha/concept?id=trm_5595bd89d77c6
Result In

In [24]:
#repair tree
Y_new = Y.copy()
for concept in concepts:
    if concept in conceptToNodes.keys():
        node = conceptToNodes[concept]
        for img in image_ids:
#             print Y_new.loc[img, concept]
            if Y_new.loc[img, concept] == 1:
                for parentConcept in node.parentsList:
                    Y_new.set_value(img, parentConcept, 1)


In [25]:
print np.sum(Y.loc[:, :].values)
print np.sum(Y_new.loc[:, :].values)

320
522


In [10]:
import networkx as nx
import matplotlib.pyplot as plt

In [33]:
#construct tree
G = nx.DiGraph()
rootNodes = []
for node in nodes:
    
    if node.json['id'] not in G:
        G.add_node(node.json['id'], name=node.json['name'] + " " + str(np.sum(Y.loc[:, node.json['id']])))
        for parent in node.parentsList:
            if parent not in G:
                G.add_node(parent, name=None)
            G.add_edge(parent, node.json['id'])
    else:
        if G.node[node.json['id']]['name'] == None:
            G.node[node.json['id']]['name'] = node.json['name'] + str(np.sum(Y.loc[:, node.json['id']]))

In [37]:
from networkx.drawing.nx_agraph import graphviz_layout
# nx.write_dot(G,'test.dot')
plt.clf()
# same layout using matplotlib with no labels
plt.title("draw_networkx")
pos=graphviz_layout(G,prog='dot')
pos_higher = {}
y_off = 10  # offset on the y axis
counter = 0
for k, v in pos.items():
    pos_higher[k] = (v[1], v[0] + y_off)
    pos[k] = (v[1], v[0])


fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 50
fig_size[1] = 30
plt.rcParams["figure.figsize"] = fig_size
labels = nx.get_node_attributes(G, 'name')

nx.draw(G,pos, arrows=False)
# print pos_higher,"\n",  pos
nx.draw_networkx_labels(G, pos_higher, labels)

plt.draw()
# plt.show()
plt.savefig('fig.png')


'1.11'