In [145]:
import openai

import numpy as np

import pandas as pd

import requests

import os

from numpy.linalg import norm

from openai.embeddings_utils import cosine_similarity

In [142]:
api_key = os.environ['OPENAI_API_KEY']
openai.api_key = api_key

In [19]:
data = 'Homo sapiens HepG2 genetically modified (insertion) using CRISPR targeting H. sapiens PIN1'

In [20]:
model = 'text-embedding-ada-002'

In [21]:
response = openai.Embedding.create(
    input=data,
    model=model,
)

In [28]:
search_embedding = np.array(response['data'][0]['embedding'])

In [29]:
response = openai.Embedding.create(
    input='chip-seq experiments in humans',
    model=model,
)

In [30]:
query_embedding = np.array(response['data'][0]['embedding'])

In [31]:
query_embedding

array([-0.02876394,  0.00399126,  0.00338419, ..., -0.02887126,
       -0.01529426,  0.0010498 ])

In [34]:
search_embedding.dot(query_embedding)

0.8424903302990354

In [42]:
np.dot(query_embedding, search_embedding) / norm(search_embedding) * norm(query_embedding)

0.8424903587550906

In [49]:
QE = np.random.random((5, len(query_embedding)))

In [54]:
QE.dot(search_embedding)

array([-0.61798462, -0.45261477, -0.57118642, -0.81987014, -0.51185065])

In [66]:
QE = np.array([query_embedding for i in range(10)])

In [67]:
QE.dot(search_embedding)

array([0.84249033, 0.84249033, 0.84249033, 0.84249033, 0.84249033,
       0.84249033, 0.84249033, 0.84249033, 0.84249033, 0.84249033])

In [85]:
def get_embedding(text, model='text-embedding-ada-002'):
    response = openai.Embedding.create(
        input=text,
        model=model,
    )
    return np.array(response['data'][0]['embedding'])

In [213]:
experiment_schema = requests.get('https://www.encodeproject.org/profiles/experiment?format=json')
experiment_boost_values = experiment_schema.json()['boost_values']

In [262]:
experiment_boost_values

{'accession': 20.0,
 '@type': 1.0,
 'alternate_accessions': 1.0,
 'assay_term_name': 20.0,
 'assay_term_id': 1.0,
 'assay_title': 20.0,
 'assay_slims': 5.0,
 'dbxrefs': 1.0,
 'aliases': 1.0,
 'biosample_ontology.term_id': 1.0,
 'biosample_ontology.term_name': 10.0,
 'biosample_ontology.classification': 10.0,
 'biosample_ontology.organ_slims': 1.0,
 'biosample_ontology.cell_slims': 1.0,
 'biosample_ontology.developmental_slims': 1.0,
 'category_slims': 1.0,
 'objective_slims': 1.0,
 'type_slims': 1.0,
 'assay_synonyms': 1.0,
 'biosample_ontology.synonyms': 1.0,
 'files.accession': 1.0,
 'files.alternate_accessions': 1.0,
 'files.file_format': 1.0,
 'files.output_type': 1.0,
 'files.md5sum': 1.0,
 'files.assembly': 5.0,
 'files.replicate.experiment.assay_term_name': 5.0,
 'files.replicate.experiment.assay_title': 5.0,
 'replicates.library.accession': 1.0,
 'replicates.library.alternate_accessions': 1.0,
 'replicates.library.aliases': 1.0,
 'replicates.library.biosample.accession': 1.0,
 

In [256]:
filtered_experiment_boost_values = {k: v for k, v in experiment_boost_values.items() if v > 1 and not k.startswith('files')}

In [257]:
filtered_experiment_boost_values

{'accession': 20.0,
 'assay_term_name': 20.0,
 'assay_title': 20.0,
 'assay_slims': 5.0,
 'biosample_ontology.term_name': 10.0,
 'biosample_ontology.classification': 10.0,
 'replicates.library.biosample.health_status': 2.0,
 'replicates.library.biosample.age': 15.0,
 'replicates.library.biosample.age_display': 5.0,
 'replicates.library.biosample.age_units': 2.0,
 'replicates.library.biosample.sex': 3.0,
 'replicates.library.biosample.life_stage': 2.0,
 'replicates.library.biosample.treatments.treatment_term_name': 3.0,
 'replicates.library.biosample.treatments.duration': 3.0,
 'replicates.library.biosample.phase': 2.0,
 'replicates.library.biosample.donor.organism.name': 10.0,
 'award.project': 8.0,
 'lab.title': 5.0,
 'target.aliases': 3.0,
 'target.genes.symbol': 10.0,
 'target.label': 10.0,
 'target.organism.name': 5.0,
 'target.organism.scientific_name': 5.0,
 'internal_tags': 5.0,
 'biosample_summary': 12.0,
 'description': 8.0}

In [274]:
experiment_fields = '&'.join([f'field={k}' for k, v in filtered_experiment_boost_values.items()])

In [275]:
experiment_fields

'field=accession&field=assay_term_name&field=assay_title&field=assay_slims&field=biosample_ontology.term_name&field=biosample_ontology.classification&field=replicates.library.biosample.health_status&field=replicates.library.biosample.age&field=replicates.library.biosample.age_display&field=replicates.library.biosample.age_units&field=replicates.library.biosample.sex&field=replicates.library.biosample.life_stage&field=replicates.library.biosample.treatments.treatment_term_name&field=replicates.library.biosample.treatments.duration&field=replicates.library.biosample.phase&field=replicates.library.biosample.donor.organism.name&field=award.project&field=lab.title&field=target.aliases&field=target.genes.symbol&field=target.label&field=target.organism.name&field=target.organism.scientific_name&field=internal_tags&field=biosample_summary&field=description'

In [276]:
url = 'https://www.encodeproject.org/search/?type=Experiment&format=json&' + experiment_fields

In [277]:
url

'https://www.encodeproject.org/search/?type=Experiment&format=json&field=accession&field=assay_term_name&field=assay_title&field=assay_slims&field=biosample_ontology.term_name&field=biosample_ontology.classification&field=replicates.library.biosample.health_status&field=replicates.library.biosample.age&field=replicates.library.biosample.age_display&field=replicates.library.biosample.age_units&field=replicates.library.biosample.sex&field=replicates.library.biosample.life_stage&field=replicates.library.biosample.treatments.treatment_term_name&field=replicates.library.biosample.treatments.duration&field=replicates.library.biosample.phase&field=replicates.library.biosample.donor.organism.name&field=award.project&field=lab.title&field=target.aliases&field=target.genes.symbol&field=target.label&field=target.organism.name&field=target.organism.scientific_name&field=internal_tags&field=biosample_summary&field=description'

In [316]:
ed = requests.get('https://www.encodeproject.org/search/?type=Experiment&format=json&frame=object').json()['@graph']

In [317]:
ed

[{'@id': '/experiments/ENCSR398OAO/',
  '@type': ['Experiment', 'Dataset', 'Item'],
  'accession': 'ENCSR398OAO',
  'aliases': ['barbara-wold:seqfish_E7948794'],
  'alternate_accessions': [],
  'analyses': ['/analyses/ENCAN425XBT/'],
  'assay_slims': ['Single cell'],
  'assay_term_id': 'OBI:0003106',
  'assay_term_name': 'seqFISH',
  'assay_title': 'seqFISH',
  'assembly': ['GRCh38'],
  'audit': {'INTERNAL_ACTION': [{'path': '/experiments/ENCSR398OAO/',
     'level_name': 'INTERNAL_ACTION',
     'level': 30,
     'name': 'audit_experiment_released_with_unreleased_files',
     'detail': 'Released dataset {ENCSR398OAO|/experiments/ENCSR398OAO/} contains file {ENCFF381DOD|/files/ENCFF381DOD/} that has not been released.',
     'category': 'mismatched file status'},
    {'path': '/experiments/ENCSR398OAO/',
     'level_name': 'INTERNAL_ACTION',
     'level': 30,
     'name': 'audit_experiment_released_with_unreleased_files',
     'detail': 'Released dataset {ENCSR398OAO|/experiments/ENCSR3

In [279]:
sed = [serialize_dict_to_text(e) for e in requests.get(url).json()['@graph']]

In [281]:
esed = [get_embedding(e) for e in sed]

In [328]:
query = get_embedding('human atac-seq')

In [329]:
similarities = np.array(esed).dot(query)

In [330]:
[(similarities[i], sed[i]) for i in similarities.argsort()][::-1]

[(0.858506019610657,
  'id is /experiments/ENCSR659ANG/, type are Experiment, Dataset, Item, accession is ENCSR659ANG, assay slims are Single cell, DNA accessibility, assay term name is single-nucleus ATAC-seq, assay title is snATAC-seq, award  project is ENCODE, biosample ontology  term name is liver, classification is tissue, biosample summary is Homo sapiens liver tissue male adult (31 years), lab  title is Michael Snyder, Stanford, replicates are library  biosample  age units is year, donor  organism  name is human, sex is male, age display is 31 years, life stage is adult, health status is healthy, age is 31'),
 (0.8556850793025383,
  'id is /experiments/ENCSR497ZST/, type are Experiment, Dataset, Item, accession is ENCSR497ZST, assay slims are Single cell, DNA accessibility, assay term name is single-nucleus ATAC-seq, assay title is snATAC-seq, award  project is ENCODE, biosample ontology  term name is liver, classification is tissue, biosample summary is Homo sapiens liver tissu

In [207]:
skip_keys = [
    'status',
    'schema_version'
]


def serialize_dict_to_text(dict_):
    result = []
    for k, v in dict_.items():
        if k in skip_keys or not v:
            continue
        serialized_key = k.replace('_', ' ').replace('@', '')
        if isinstance(v, str):
            serialized_value = v
            verb = 'is'
        elif isinstance(v, list):
            if v and isinstance(v[0], dict):
                serialized_value = ', '.join([serialize_dict_to_text(vv) for vv in v])
            else:
                serialized_value = ', '.join([str(vv) for vv in v])
            verb = 'are'
        elif isinstance(v, dict):
            serialized_value = serialize_dict_to_text(v)
            verb = ''
        result.append(f'{serialized_key} {verb} {serialized_value}')
    return ', '.join(result)

In [208]:
serialize_dict_to_text(experiment_json)

'assay term name is ChIP-seq, biosample ontology is /biosample-types/cell_line_EFO_0002067/, accession is ENCSR668LDD, dbxrefs are GEO:GSE96303, date released is 2016-11-08, doi is 10.17989/ENCSR668LDD, internal tags are ccre_inputv1, ENCYCLOPEDIAv1, RegulomeDB_2_2, ENCYCLOPEDIAv2, lab is /labs/bradley-bernstein/, award is /awards/U54HG006991/, aliases are bradley-bernstein:Project Element 96, target is /targets/H3K4me3-human/, assay title is Histone ChIP-seq, assay slims are DNA binding, biosample summary is Homo sapiens K562, assay term id is OBI:0000716, id is /experiments/ENCSR668LDD/, type are Experiment, Dataset, Item, uuid is 934eed39-3a71-403c-85d0-d7b055f1269b, assembly are GRCh38, hg19, hub is /experiments/ENCSR668LDD/@@hub/hub.txt, default analysis is /analyses/ENCAN646APS/, life stage age is adult 53 years'

In [209]:
import requests
import json

r = requests.get('https://www.encodeproject.org/experiments/ENCSR398OAO/?format=json&frame=object')

e = serialize_dict_to_text(r.json())
#e = json.dumps(r.json())
e

"assay term name is seqFISH, biosample ontology is /biosample-types/tissue_UBERON_0009834/, accession is ENCSR398OAO, analyses are /analyses/ENCAN425XBT/, internal tags are RushAD, notes is This experiment's biosample ontology was changed from UBERON:0006483 to UBERON:0009834 on March 24, 2023., date created is 2023-01-12T18:28:13.986475+00:00, submitted by is /users/bc5b62f7-ce28-4a1e-b6b3-81c9c5a86d7a/, lab is /labs/barbara-wold/, award is /awards/UM1HG009443/, aliases are barbara-wold:seqfish_E7948794, internal status is unreviewed, date released is 2023-04-12, bio replicate count is 2023-04-12, tech replicate count is 2023-04-12, replication type is unreplicated, assay title is seqFISH, assay slims are Single cell, replicates are /replicates/a53697fb-fd8a-4bd7-8781-2e8229284d63/, simple biosample summary is female adult (90 or above years) with Alzheimer's disease, biosample summary is Homo sapiens with Alzheimer's disease; dorsolateral prefrontal cortex tissue female adult (90 or 

In [210]:
ee = get_embedding(e)
ee

array([-0.02995173, -0.0008647 , -0.00480067, ..., -0.01531177,
       -0.02154005, -0.02848213])

In [None]:
sq

In [200]:
serialize_dict_to_text(embedded_json)

'assay term name is seqFISH, biosample ontology  term name is dorsolateral prefrontal cortex, term id is UBERON:0009834, classification is tissue, id is /biosample-types/tissue_UBERON_0009834/, type are BiosampleType, Item, uuid is 45000d13-e0a2-4097-8882-efd6de929e29, name is tissue_UBERON_0009834, organ slims are brain, developmental slims are ectoderm, system slims are central nervous system, accession is ENCSR398OA'

In [147]:
cosine_similarity(sq1, qe1)

0.8103541864738757

In [74]:
other_qe

array([ 0.00700889, -0.025697  ,  0.00135049, ..., -0.01850348,
       -0.00406516, -0.01134415])

In [75]:
other_qe.dot(search_embedding)

0.6976445883953505

In [76]:
experiment_json = {
  "assay_term_name": "ChIP-seq",
  "biosample_ontology": "/biosample-types/cell_line_EFO_0002067/",
  "documents": [
    
  ],
  "accession": "ENCSR668LDD",
  "dbxrefs": [
    "GEO:GSE96303"
  ],
  "date_released": "2016-11-08",
  "doi": "10.17989/ENCSR668LDD",
  "internal_tags": [
    "ccre_inputv1",
    "ENCYCLOPEDIAv1",
    "RegulomeDB_2_2",
    "ENCYCLOPEDIAv2"
  ],
  "lab": "/labs/bradley-bernstein/",
  "award": "/awards/U54HG006991/",
  "aliases": [
    "bradley-bernstein:Project Element 96"
  ],
  "target": "/targets/H3K4me3-human/",
  "assay_title": "Histone ChIP-seq",
  "assay_slims": [
    "DNA binding"
  ],
  "biosample_summary": "Homo sapiens K562",
  "assay_term_id": "OBI:0000716",
  "@id": "/experiments/ENCSR668LDD/",
  "@type": [
    "Experiment",
    "Dataset",
    "Item"
  ],
  "uuid": "934eed39-3a71-403c-85d0-d7b055f1269b",
  "assembly": [
    "GRCh38",
    "hg19"
  ],
  "hub": "/experiments/ENCSR668LDD/@@hub/hub.txt",
  "default_analysis": "/analyses/ENCAN646APS/",
  "life_stage_age": "adult 53 years",
  "perturbed": False
}

In [77]:
import json
json.dumps(experiment_json)

'{"assay_term_name": "ChIP-seq", "biosample_ontology": "/biosample-types/cell_line_EFO_0002067/", "documents": [], "accession": "ENCSR668LDD", "dbxrefs": ["GEO:GSE96303"], "date_released": "2016-11-08", "doi": "10.17989/ENCSR668LDD", "internal_tags": ["ccre_inputv1", "ENCYCLOPEDIAv1", "RegulomeDB_2_2", "ENCYCLOPEDIAv2"], "lab": "/labs/bradley-bernstein/", "award": "/awards/U54HG006991/", "aliases": ["bradley-bernstein:Project Element 96"], "target": "/targets/H3K4me3-human/", "assay_title": "Histone ChIP-seq", "assay_slims": ["DNA binding"], "biosample_summary": "Homo sapiens K562", "assay_term_id": "OBI:0000716", "@id": "/experiments/ENCSR668LDD/", "@type": ["Experiment", "Dataset", "Item"], "uuid": "934eed39-3a71-403c-85d0-d7b055f1269b", "assembly": ["GRCh38", "hg19"], "hub": "/experiments/ENCSR668LDD/@@hub/hub.txt", "default_analysis": "/analyses/ENCAN646APS/", "life_stage_age": "adult 53 years", "perturbed": false}'

In [79]:
response = openai.Embedding.create(
    input=json.dumps(experiment_json),
    model=model,
)
json_se = np.array(response['data'][0]['embedding'])

In [83]:
json_se.dot(search_embedding)

0.7695160541969724

In [84]:
json_se.dot(other_qe)

0.6603502565919628

In [86]:
sq1 = get_embedding('hitone chip-seq')
sq2 = get_embedding('bernstein h3k4me3')
sq3 = get_embedding('dnase-seq in mouse')

In [98]:
sq4 = get_embedding('human k562 chip-seq')

In [101]:
sq5 = get_embedding('experiments from 2016')
sq6 = get_embedding('experiments from 2021')

In [104]:
sq7 = get_embedding('histone experiments with modifications')
sq8 = get_embedding('histone experiments without modifications')

In [117]:
sq9 = get_embedding('rnaseq experiments in humans')

In [190]:
sq10 = get_embedding('brain disease seqfish')

In [205]:
sq11 = get_embedding('RegulomeDB_2_2')

In [206]:
np.array([sq1, sq2, sq3, sq4, sq5, sq6, sq7, sq8, sq9, sq10, sq11]).dot(ee)

array([0.79441493, 0.74870951, 0.77158409, 0.81824848, 0.76988819,
       0.76155237, 0.7745468 , 0.7753202 , 0.827635  , 0.81640042,
       0.75514401])

In [199]:
np.array([sq1, sq2, sq3, sq4, sq5, sq6, sq7, sq8, sq9, sq10]).dot(ee)

array([0.76153303, 0.71985766, 0.74406305, 0.77101183, 0.75918754,
       0.75499772, 0.76230868, 0.76395487, 0.78932559, 0.76656155])

In [211]:
np.array([sq1, sq2, sq3, sq4, sq5, sq6, sq7, sq8, sq9, sq10]).dot(ee)

array([0.78619998, 0.73743609, 0.76219066, 0.80785142, 0.75728962,
       0.75034573, 0.76550438, 0.76375062, 0.81299056, 0.80497396])

In [118]:
np.array([sq1, sq2, sq3, sq4, sq5, sq6, sq7, sq8, sq9]).dot(json_se)

array([0.80254325, 0.73394638, 0.76781338, 0.81651989, 0.76715129,
       0.75394662, 0.78960473, 0.78593004, 0.80943841])

In [94]:
', '.join([f'{k} is {v}' for k, v in experiment_json.items()])

"assay_term_name is ChIP-seq, biosample_ontology is /biosample-types/cell_line_EFO_0002067/, documents is [], accession is ENCSR668LDD, dbxrefs is ['GEO:GSE96303'], date_released is 2016-11-08, doi is 10.17989/ENCSR668LDD, internal_tags is ['ccre_inputv1', 'ENCYCLOPEDIAv1', 'RegulomeDB_2_2', 'ENCYCLOPEDIAv2'], lab is /labs/bradley-bernstein/, award is /awards/U54HG006991/, aliases is ['bradley-bernstein:Project Element 96'], target is /targets/H3K4me3-human/, assay_title is Histone ChIP-seq, assay_slims is ['DNA binding'], biosample_summary is Homo sapiens K562, assay_term_id is OBI:0000716, @id is /experiments/ENCSR668LDD/, @type is ['Experiment', 'Dataset', 'Item'], uuid is 934eed39-3a71-403c-85d0-d7b055f1269b, assembly is ['GRCh38', 'hg19'], hub is /experiments/ENCSR668LDD/@@hub/hub.txt, default_analysis is /analyses/ENCAN646APS/, life_stage_age is adult 53 years, perturbed is False"

In [95]:
qe1 = get_embedding(', '.join([f'{k} is {v}' for k, v in experiment_json.items()]))

In [106]:
np.array([sq1, sq2, sq3, sq4, sq5, sq6, sq7, sq8]).dot(qe1)

array([0.81035421, 0.73668   , 0.76648463, 0.83231609, 0.74976241,
       0.73861326, 0.78623521, 0.78088318])

In [109]:
qe2 = get_embedding('\n'.join([f'{k} is {v}' for k, v in experiment_json.items()]))

In [110]:
qe2

array([-0.03783723,  0.01904208, -0.01407578, ..., -0.01706654,
       -0.01935762, -0.03377638])

In [111]:
np.array([sq1, sq2, sq3, sq4, sq5, sq6, sq7, sq8]).dot(qe2)

array([0.80412292, 0.7234614 , 0.75828566, 0.82241037, 0.74127714,
       0.73066543, 0.77568972, 0.7720825 ])

In [114]:
import json
de1 = get_embedding(json.dumps(rna_experiment_json))

In [119]:
np.array([sq1, sq2, sq3, sq4, sq5, sq6, sq7, sq8, sq9]).dot(de1)

array([0.76879342, 0.72376057, 0.75531387, 0.80018248, 0.76546465,
       0.75072645, 0.76948263, 0.76973566, 0.81039042])

In [132]:
sq10 = get_embedding('ENCFF536AWU')

In [133]:
sq10.dot(de1)

0.7235610595533178

In [134]:
np.array([qe2, de1, json_se]).dot(sq10)

array([0.72539142, 0.72356106, 0.73523376])

In [112]:
rna_experiment_json = {
  "assay_term_name": "RNA-seq",
  "biosample_ontology": "/biosample-types/cell_line_EFO_0002067/",
  "documents": [
    "/documents/b7614f73-ed27-41d0-8325-3731f74d2be8/",
    "/documents/c12cbfbc-a858-4b6d-ad10-a4fc84d6d4bf/"
  ],
  "references": [
    
  ],
  "schema_version": "37",
  "accession": "ENCSR115PIZ",
  "alternate_accessions": [
    
  ],
  "analyses": [
    "/analyses/ENCAN087GQB/"
  ],
  "description": "Homo sapiens K562 treated with Chaetocin for 48 hours",
  "dbxrefs": [
    "GEO:GSE219476"
  ],
  "date_released": "2022-02-17",
  "doi": "10.17989/ENCSR115PIZ",
  "internal_tags": [
    
  ],
  "status": "released",
  "date_created": "2021-12-07T14:17:27.839124+00:00",
  "submitted_by": "/users/9e077d38-a99b-4f84-8c79-6c75cf505731/",
  "lab": "/labs/will-greenleaf/",
  "award": "/awards/UM1HG009436/",
  "aliases": [
    "will-greenleaf:K562_Chaetocin_48"
  ],
  "possible_controls": [
    
  ],
  "supersedes": [
    
  ],
  "related_files": [
    
  ],
  "internal_status": "release ready",
  "bio_replicate_count": 2,
  "tech_replicate_count": 2,
  "replication_type": "isogenic",
  "objective_slims": [
    
  ],
  "type_slims": [
    
  ],
  "category_slims": [
    
  ],
  "assay_title": "total RNA-seq",
  "assay_slims": [
    "Transcription"
  ],
  "replicates": [
    "/replicates/03b0c30b-b65d-4a57-a866-c2a306b04161/",
    "/replicates/747b8efb-576a-4c88-a38d-6ffa9a8133f2/"
  ],
  "simple_biosample_summary": "treated with 10 nM Chaetocin for 48 hours",
  "biosample_summary": "Homo sapiens K562 treated with 10 nM Chaetocin for 48 hours",
  "assay_term_id": "OBI:0001271",
  "@id": "/experiments/ENCSR115PIZ/",
  "@type": [
    "Experiment",
    "Dataset",
    "Item"
  ],
  "uuid": "71e7a555-b27a-4b35-92f3-10c0b5a1fa87",
  "original_files": [
    "/files/ENCFF089UOR/",
    "/files/ENCFF988OKV/",
    "/files/ENCFF536AWU/",
    "/files/ENCFF812STJ/",
    "/files/ENCFF883AKC/",
    "/files/ENCFF286KKZ/",
    "/files/ENCFF646ADP/",
    "/files/ENCFF227LIQ/",
    "/files/ENCFF934XOC/",
    "/files/ENCFF413AFV/",
    "/files/ENCFF829LCN/",
    "/files/ENCFF520EBC/",
    "/files/ENCFF361SIG/",
    "/files/ENCFF704MIL/",
    "/files/ENCFF132EEP/",
    "/files/ENCFF165DQJ/",
    "/files/ENCFF347IVK/",
    "/files/ENCFF189YJW/"
  ],
  "contributing_files": [
    "/files/ENCFF471EAM/",
    "/files/GRCh38_EBV.chrom.sizes/",
    "/files/ENCFF285DRD/",
    "/files/ENCFF598IDH/"
  ],
  "files": [
    "/files/ENCFF089UOR/",
    "/files/ENCFF988OKV/",
    "/files/ENCFF536AWU/",
    "/files/ENCFF812STJ/",
    "/files/ENCFF883AKC/",
    "/files/ENCFF286KKZ/",
    "/files/ENCFF646ADP/",
    "/files/ENCFF227LIQ/",
    "/files/ENCFF934XOC/",
    "/files/ENCFF413AFV/",
    "/files/ENCFF829LCN/",
    "/files/ENCFF520EBC/",
    "/files/ENCFF361SIG/",
    "/files/ENCFF704MIL/",
    "/files/ENCFF132EEP/",
    "/files/ENCFF165DQJ/",
    "/files/ENCFF347IVK/",
    "/files/ENCFF189YJW/"
  ],
  "revoked_files": [
    
  ],
  "assembly": [
    "GRCh38"
  ],
  "hub": "/experiments/ENCSR115PIZ/@@hub/hub.txt",
  "default_analysis": "/analyses/ENCAN087GQB/",
  "related_series": [
    "/treatment-time-series/ENCSR075MHG/"
  ],
  "superseded_by": [
    
  ],
  "related_annotations": [
    
  ],
  "life_stage_age": "adult 53 years",
  "perturbed": False
}

In [None]:
How to keep track of numpy indices?
Should embedded json or free text?