In [1]:
import requests
import json

## Getting the results through a query

In [99]:
def send_query( executer_url , user, query):
    data = dict(query=query )
    url = '%s/query' % executer_url 
    response=requests.post(url, json=data, auth=(user, 'pass'))
    if response.status_code != 200:
        raise Exception (response)
    else:
        r = json.loads(response.text)
        return r

In [332]:
executer='http://diascld10.epfl.ch:54321'
user = 'federation'
query = """
select distinct patient_id,( select variable_name as name, value from partition) as vars
    from exam_value
    where 
         variable_name = "DX_bl" or
         variable_name = "CDRSB_bl" or
         variable_name = "MMSE_bl" or
         variable_name = "AGE" or
         variable_name = "PTGENDER" or
         variable_name = "PTEDUCAT" 
    group by patient_id
"""
result = send_query(executer, user,query )

## Getting the list of different types of diagnosis (DX_bl)

In [333]:
user = 'federation'
query = """
select distinct value
    from exam_value
    where  variable_name = "DX_bl"
"""

result2 = send_query(executer, user,query )
result2

{u'compilationTime': 286,
 u'executionTime': 216,
 u'output': [u'CN', u'LMCI', u'AD', u'EMCI', u'SMC']}

In [334]:
#creating helper dictionary to map diagnosis to a float value
dx= dict()
for n,d in enumerate(result2['output']):
    dx[d]=float(n)
    print d ,"->" ,dx[d]

CN -> 0.0
LMCI -> 1.0
AD -> 2.0
EMCI -> 3.0
SMC -> 4.0


## we will try the random forest classifier
this is as see in example 
http://scikit-learn.org/stable/auto_examples/ensemble/plot_forest_importances.html

In [335]:
import numpy as np
import sklearn as sk
import sklearn.datasets as skd
import sklearn.ensemble as ske
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline

### We need to transform all values to float for the algorithm

In [345]:
import random

def get_dx_value(value):
    return dx[value]

random.seed()
data = result['output']
trainX = []
trainY = []
for p in data:
    values = dict()
    y = None
    # transforms vars to a dict and converts the values to float
    for v in p['vars']:
        name =  v['name']
        value = v['value']
        if name == 'DX_bl':
            #y value for training 
            y = dx[value]
        elif  name != 'PTGENDER':
            try:
                values[name] = float(value)
            except:
                print "could not transform %s->'%s'" % (name, value)
                raise
        elif value == 'Male':
            values[name] = 1.0
        else:
            values[name] = 0.0
    
    values['Random'] = 100*random.random()
    # sorts the keys so that all values always in the same order
    keys = values.keys()
    keys.sort()
    trainX.append([values[k] for k in keys])
    trainY.append(y)

In [346]:
keys

[u'AGE', u'CDRSB_bl', u'MMSE_bl', u'PTEDUCAT', u'PTGENDER', 'Random']

In [347]:
forest = ske.ExtraTreesClassifier()
forest.fit(trainX, trainY)

fet_ind = np.argsort(forest.feature_importances_)[::-1]
fet_imp = forest.feature_importances_[fet_ind]

for n, idx in enumerate(fet_ind):
    print "%d,%s,%f " % (n,keys[idx], fet_imp[n])

0,CDRSB_bl,0.359581 
1,MMSE_bl,0.193137 
2,AGE,0.165960 
3,Random,0.153084 
4,PTEDUCAT,0.105578 
5,PTGENDER,0.022660 


In [348]:
forest.feature_importances_

array([ 0.16595984,  0.35958056,  0.19313734,  0.10557811,  0.02265982,
        0.15308432])

### This is using a random forest classifier

In [349]:
reg = ske.RandomForestClassifier()

reg.fit(trainX, trainY);

fet_ind = np.argsort(reg.feature_importances_)[::-1]
fet_imp = reg.feature_importances_[fet_ind]

for n, idx in enumerate(fet_ind):
    print "%d,%s,%f " % (n,keys[idx], fet_imp[n]) 

0,CDRSB_bl,0.350056 
1,MMSE_bl,0.187951 
2,AGE,0.169573 
3,Random,0.165663 
4,PTEDUCAT,0.102449 
5,PTGENDER,0.024308 


### This is using a random forest regressor instead of a ExtraTreesClassifier

In [350]:
reg = ske.RandomForestRegressor()

reg.fit(trainX, trainY);

fet_ind = np.argsort(reg.feature_importances_)[::-1]
fet_imp = reg.feature_importances_[fet_ind]

for n, idx in enumerate(fet_ind):
    print "%d,%s,%f " % (n,keys[idx], fet_imp[n]) 

0,AGE,0.317125 
1,Random,0.248967 
2,CDRSB_bl,0.217214 
3,PTEDUCAT,0.104212 
4,MMSE_bl,0.084938 
5,PTGENDER,0.027544 


In [328]:
l = [a[4] for a in trainX]

print reduce(lambda x, y: x + y, l) / len(l)

0.568221070812
