# transform into immport templates

In [1]:
import json
import pickle
from glob import glob
from dateutil.parser import parse

In [2]:
def get_properties(template: dict, prop_ignore:list = None) -> list: 
    if prop_ignore is None: 
        prop_ignore = ['schemaVersion', 'templateType', 'validationLevel', 'userDefinedId']

    properties = [k for k in template['properties'].keys() if k not in prop_ignore]
    return properties

In [3]:
def load_template(file_path): 
    with open(file_path, 'r', encoding='utf-8') as f: 
        return json.load(f)

In [5]:
# Open the file in binary read mode
with open('./data/publications_2024-08-12.pkl', 'rb') as file:
    # Use pickle to deserialize the object
    records = pickle.load(file)

# records['result'].pop('uids', None)

In [6]:
pubmed_template = load_template('../immport/dev/data/immport_templates/json-templates/basic_study_design.studyPubmed.json')
properties = get_properties(pubmed_template)
properties

['pubmedId',
 'doi',
 'title',
 'journal',
 'year',
 'month',
 'issue',
 'pages',
 'authors']

In [41]:
def process_pubmed_pubs(record: dict, properties:list) -> dict: 
    mapping = {
        'pubmedId':'uid',
        'journal':'fulljournalname'
    }

    starter = {k:v for k,v in record.items() if k in properties}
    for k,v in mapping.items(): 
        starter[k] = record[v]

    # cleanup
    starter['authors'] = [a['name'] for a in starter['authors']]
    pubdate = parse(record['pubdate'])
    starter['year'] = pubdate.year
    starter['month'] = pubdate.month
    if 'abstract' in record.keys(): 
        starter['abtract'] = record['abstract']
    
    try: 
        for d in record['articleids']:
            if d['idtype'] == 'doi': 
                starter['doi'] = d['value'] 
    except Exception as e:
        print(e) 
    if len(set(properties) - set(starter.keys())) > 0: 
        print('Missing properties: ', set(properties) - set(starter.keys()))

    return starter

In [42]:
publications = {}
for k,v in records.items(): 
    publications[v['uid']] = process_pubmed_pubs(v,properties)

publications

{'38995971': {'authors': ['Petrucciani A',
   'Hoerter A',
   'Kotze L',
   'Du Plessis N',
   'Pienaar E'],
  'title': 'Agent-based model predicts that layered structure and 3D movement work synergistically to reduce bacterial load in 3D in vitro models of tuberculosis granuloma.',
  'issue': '7',
  'pages': 'e1012266',
  'pubmedId': '38995971',
  'journal': 'PLoS computational biology',
  'year': 2024,
  'month': 7,
  'abtract': 'Tuberculosis (TB) remains a global public health threat. Understanding the dynamics of host-pathogen interactions within TB granulomas will assist in identifying what leads to the successful elimination of infection. In vitro TB models provide a controllable environment to study these granuloma dynamics. Previously we developed a biomimetic 3D spheroid granuloma model that controls bacteria better than a traditional monolayer culture counterpart. We used agent-based simulations to predict the mechanistic reason for this difference. Our calibrated simulations

In [43]:
study_templates = glob('../immport/dev/data/immport_templates/json-templates/basic_study*')
study_templates

['../immport/dev/data/immport_templates/json-templates\\basic_study_design.armOrCohort.json',
 '../immport/dev/data/immport_templates/json-templates\\basic_study_design.inclusionExclusion.json',
 '../immport/dev/data/immport_templates/json-templates\\basic_study_design.json',
 '../immport/dev/data/immport_templates/json-templates\\basic_study_design.plannedVisit.json',
 '../immport/dev/data/immport_templates/json-templates\\basic_study_design.study.json',
 '../immport/dev/data/immport_templates/json-templates\\basic_study_design.study2ConditionOrDisease.json',
 '../immport/dev/data/immport_templates/json-templates\\basic_study_design.study2Protocol.json',
 '../immport/dev/data/immport_templates/json-templates\\basic_study_design.studyCategorization.json',
 '../immport/dev/data/immport_templates/json-templates\\basic_study_design.studyFile.json',
 '../immport/dev/data/immport_templates/json-templates\\basic_study_design.studyLink.json',
 '../immport/dev/data/immport_templates/json-templ

In [44]:
from pathlib import Path

In [45]:
# basic study design
templates = {}
prop_ignore = ['schemaVersion', 'templateType', 'validationLevel', 'userDefinedId']
for t in study_templates: 
    template_name = Path(t).stem.replace('basic_study_design.', '')
    with open(t, 'r', encoding='utf-8') as f: 
        template = json.load(f)
        templates[template_name] = [k for k in template['properties'].keys() if k not in prop_ignore]
templates

{'armOrCohort': ['name', 'description', 'typeReported'],
 'inclusionExclusion': ['criterion', 'criterionCategory'],
 'basic_study_design': ['fileName',
  'name',
  'study',
  'studyCategorization',
  'study2ConditionOrDisease',
  'armOrCohort',
  'studyPersonnel',
  'plannedVisit',
  'inclusionExclusion',
  'study2Protocol',
  'studyFile',
  'studyLink',
  'studyPubmed'],
 'plannedVisit': ['name',
  'orderNumber',
  'minStartDay',
  'maxStartDay',
  'startRule',
  'endRule'],
 'study': ['briefTitle',
  'officialTitle',
  'briefDescription',
  'description',
  'interventionAgent',
  'endpoints',
  'sponsoringOrganization',
  'ageUnit',
  'actualStartDate',
  'hypothesis',
  'objectives',
  'targetEnrollment',
  'minimumAge',
  'maximumAge'],
 'study2ConditionOrDisease': ['conditionReported'],
 'study2Protocol': ['protocolId'],
 'studyCategorization': ['researchFocus'],
 'studyFile': ['fileName', 'description', 'studyFileType'],
 'studyLink': ['name', 'value'],
 'studyPersonnel': ['honor

In [46]:
study = templates['study']
study

['briefTitle',
 'officialTitle',
 'briefDescription',
 'description',
 'interventionAgent',
 'endpoints',
 'sponsoringOrganization',
 'ageUnit',
 'actualStartDate',
 'hypothesis',
 'objectives',
 'targetEnrollment',
 'minimumAge',
 'maximumAge']

In [47]:
mapping = {
    'briefTitle': 'title', 
    'officialTitle': 'title',
    'briefDescription': 'abstract',
    'description': 'abstract',
    # endpoints: mesh terms,
    # sponsoringOrganization: organizations
}

In [49]:
publications

{'38995971': {'authors': ['Petrucciani A',
   'Hoerter A',
   'Kotze L',
   'Du Plessis N',
   'Pienaar E'],
  'title': 'Agent-based model predicts that layered structure and 3D movement work synergistically to reduce bacterial load in 3D in vitro models of tuberculosis granuloma.',
  'issue': '7',
  'pages': 'e1012266',
  'pubmedId': '38995971',
  'journal': 'PLoS computational biology',
  'year': 2024,
  'month': 7,
  'abtract': 'Tuberculosis (TB) remains a global public health threat. Understanding the dynamics of host-pathogen interactions within TB granulomas will assist in identifying what leads to the successful elimination of infection. In vitro TB models provide a controllable environment to study these granuloma dynamics. Previously we developed a biomimetic 3D spheroid granuloma model that controls bacteria better than a traditional monolayer culture counterpart. We used agent-based simulations to predict the mechanistic reason for this difference. Our calibrated simulations

In [48]:
study_filled_temps = {}
for p,d in publications.itemskeys():
    # print(p)
    pub_study = {k:'' for k in study}
    for k,v in mapping.items(): 
        try: 
            pub_study[k] = p[v]
        except: 
            pass
    pub_study['sponsoringOrganization'] = "Seattle Children's Research Institute"

    study_filled_temps[p] = pub_study

SyntaxError: invalid syntax (3839562436.py, line 1)

In [33]:
study_filled_temps

[{'briefTitle': 'Agent-based model predicts that layered structure and 3D movement work synergistically to reduce bacterial load in 3D in vitro models of tuberculosis granuloma.',
  'officialTitle': 'Agent-based model predicts that layered structure and 3D movement work synergistically to reduce bacterial load in 3D in vitro models of tuberculosis granuloma.',
  'briefDescription': '',
  'description': '',
  'interventionAgent': '',
  'endpoints': '',
  'sponsoringOrganization': "Seattle Children's Research Institute",
  'ageUnit': '',
  'actualStartDate': '',
  'hypothesis': '',
  'objectives': '',
  'targetEnrollment': '',
  'minimumAge': '',
  'maximumAge': ''},
 {'briefTitle': 'Reappraising the Role of T Cell-Derived IFN-γ in Restriction of Mycobacterium tuberculosis in the Murine Lung.',
  'officialTitle': 'Reappraising the Role of T Cell-Derived IFN-γ in Restriction of Mycobacterium tuberculosis in the Murine Lung.',
  'briefDescription': '',
  'description': '',
  'interventionA

In [None]:
study['sponsoringOrganization'] = "Seattle Children's Research Institute"