In [18]:
import requests
import json
from tabulate import tabulate

Our list of targets

In [70]:
targets = ['ENSG00000069696', 'ENSG00000144285']
targets_string = ', '.join('"{0}"'.format(t) for t in targets)

Make the API call with our list of targets to find the associations. Set facets to true.

In [71]:
url = 'https://www.targetvalidation.org/api/latest/public/association/filter'
headers = {"Accept": "application/json"}
# There may be an easier way of building these parameters...
data = "{\"target\":[" + targets_string + "], \"facets\":true}"

response = requests.post(url, headers=headers, data=data)
output = response.json()

Print out all the json returned just for reference

In [72]:
print json.dumps(output, indent=2)

{
  "from": 0, 
  "facets": {
    "therapeutic_area": {
      "buckets": [
        {
          "unique_target_count": {
            "value": 2
          }, 
          "unique_disease_count": {
            "value": 285
          }, 
          "doc_count": 324, 
          "key": "efo_0000508", 
          "label": "genetic disorder"
        }, 
        {
          "unique_target_count": {
            "value": 2
          }, 
          "unique_disease_count": {
            "value": 115
          }, 
          "doc_count": 132, 
          "key": "efo_0000651", 
          "label": "phenotype"
        }, 
        {
          "unique_target_count": {
            "value": 2
          }, 
          "unique_disease_count": {
            "value": 86
          }, 
          "doc_count": 120, 
          "key": "efo_0000618", 
          "label": "nervous system disease"
        }, 
        {
          "unique_target_count": {
            "value": 2
          }, 
          "unique_disease_count": {
  

The therapeutic area facets look interesting - lets iterate through these and display

In [67]:
therapeuticareas = []

for bucket in output['facets']['therapeutic_area']['buckets']:
    therapeuticareas.append({
            'target_count' : bucket['unique_target_count']['value'], 
            'disease_count' : bucket['unique_disease_count']['value'],
            'therapeutic_area' : bucket['label'],
            'key' : bucket['key']
        })

Sort by target count and then disease count

In [68]:
therapeuticareas = sorted(therapeuticareas, key=lambda k: (k['target_count'],k['disease_count']), reverse=True) 

Using the python [tabulate](https://pypi.python.org/pypi/tabulate) library to render a pretty table of our extracted therapeutic areas.
Note: You may need to run `pip install tabulate` in your python environment

In [69]:
print tabulate(therapeuticareas, headers="keys", tablefmt="grid")

+------------------------------+-----------------+-------------+----------------+
| therapeutic_area             |   disease_count | key         |   target_count |
| genetic disorder             |             285 | efo_0000508 |              2 |
+------------------------------+-----------------+-------------+----------------+
| phenotype                    |             115 | efo_0000651 |              2 |
+------------------------------+-----------------+-------------+----------------+
| nervous system disease       |              86 | efo_0000618 |              2 |
+------------------------------+-----------------+-------------+----------------+
| eye disease                  |              80 | efo_0003966 |              2 |
+------------------------------+-----------------+-------------+----------------+
| neoplasm                     |              49 | efo_0000616 |              2 |
+------------------------------+-----------------+-------------+----------------+
| metabolic dise

Lets just consider the first 5 top therapeutic areas

In [98]:
therapeuticareas = therapeuticareas[:5]
print tabulate(therapeuticareas, headers="keys", tablefmt="grid")

+------------------------+-----------------+-------------+----------------+
| therapeutic_area       |   disease_count | key         |   target_count |
| genetic disorder       |             285 | efo_0000508 |              2 |
+------------------------+-----------------+-------------+----------------+
| phenotype              |             115 | efo_0000651 |              2 |
+------------------------+-----------------+-------------+----------------+
| nervous system disease |              86 | efo_0000618 |              2 |
+------------------------+-----------------+-------------+----------------+
| eye disease            |              80 | efo_0003966 |              2 |
+------------------------+-----------------+-------------+----------------+
| neoplasm               |              49 | efo_0000616 |              2 |
+------------------------+-----------------+-------------+----------------+


Now for each of those identify the top 5 diseases. Unfortunately we don't get the disease names in the facets, just the codes. Is this is the right approach then an API change???

In [102]:
for therapeuticarea in therapeuticareas:
    print "Therapeutic area: " + therapeuticarea['therapeutic_area']
    data = "{\"target\":[" + targets_string + "], \"facets\":true, \"therapeutic_area\":[\"" + therapeuticarea['key'] + "\"]}"
    response = requests.post(url, headers=headers, data=data)
    output = response.json()
    
    diseases = []

    for bucket in output['facets']['disease']['buckets']:
        diseases.append({
            'target_count' : bucket['unique_target_count']['value'], 
            'doc_count' : bucket['doc_count'],
            'key' : bucket['key']
        })
    
    # Sort and take top 5
    diseases = sorted(diseases, key=lambda k: (k['target_count'],k['doc_count']), reverse=True) 
    diseases = diseases[:5]
    
    print tabulate(diseases, headers="keys", tablefmt="grid")
    print ""

Therapeutic area: genetic disorder
+-------------+-----------------+----------------+
|   doc_count | key             |   target_count |
|           2 | Orphanet_101435 |              2 |
+-------------+-----------------+----------------+
|           2 | Orphanet_101953 |              2 |
+-------------+-----------------+----------------+
|           2 | Orphanet_139009 |              2 |
+-------------+-----------------+----------------+
|           2 | Orphanet_1478   |              2 |
+-------------+-----------------+----------------+
|           2 | Orphanet_156638 |              2 |
+-------------+-----------------+----------------+

Therapeutic area: phenotype
+-------------+-------------+----------------+
|   doc_count | key         |   target_count |
|           2 | EFO_0003108 |              2 |
+-------------+-------------+----------------+
|           2 | EFO_0003765 |              2 |
+-------------+-------------+----------------+
|           2 | EFO_0003843 |             