In [18]:
import isaid_helpers
import requests
import pickle
import click
import datetime
import os
from pylinkedcmd import isaid, utilities
import pandas as pd


In [9]:
def get_raw_model_items():
    r = requests.get(isaid_helpers.model_catalog_api)
    if r.status_code == 200:
        return r.json()["items"]

In [10]:
%%time
if click.confirm('Are you sure you want to bother ScienceBase with a request to get all model items?', default=True):
    model_items = get_raw_model_items()
    pickle.dump(model_items, open(isaid_helpers.f_raw_model_catalog, "wb"))
    print(
        isaid_helpers.f_raw_model_catalog, 
        "CREATED", 
        datetime.datetime.fromtimestamp(os.path.getmtime(isaid_helpers.f_raw_model_catalog))
    )
else:
    model_items = pickle.load(open(isaid_helpers.f_raw_model_catalog, "rb"))
    print("model_items loaded to memory from cache file")
    

Are you sure you want to bother ScienceBase with a request to get all model items? [Y/n]: Y
usgs_model_catalog.p CREATED 2021-06-07 16:25:59.385455
CPU times: user 46.9 ms, sys: 17.3 ms, total: 64.2 ms
Wall time: 4.92 s


In [27]:
def graphable_model_items(nodified_models, return_format="list"):
    model_items = [i["properties"] for i in nodified_models]
    
    if return_format == "list":
        return model_items
    elif return_format == "dataframe":
        return pd.DataFrame(model_items)
    
def graphable_model_contacts(nodified_models, return_format="list"):
    all_contacts = list()
    for item in [i for i in nodified_models if "identified_points_of_contact" in i]:
        [i.update({"model_url": item["properties"]["url"]}) for i in item["identified_points_of_contact"]]
        all_contacts.extend(item["identified_points_of_contact"])
        
    if return_format == "list":
        return all_contacts
    elif return_format == "dataframe":
        return pd.DataFrame(all_contacts)
    

In [14]:
nodified_model_items = [isaid.model_node_from_sb_item(i) for i in model_items]

In [26]:
[i["identified_points_of_contact"] for i in nodified_model_items if "identified_points_of_contact" in i]

[[{'node_type': 'Person',
   'name': 'GS-W FACET',
   'reference': 'https://www.sciencebase.gov/catalog/item/5eb4485582ce25b5135abf14',
   'date_qualifier': '2020-09-25T21:37:46.156Z',
   'relationship_type': 'POINT_OF_CONTACT',
   'email': 'gs-w_facet@usgs.gov',
   'identifier_sciencebase': 'https://www.sciencebase.gov/directory/person/78586',
   'model_url': 'https://www.sciencebase.gov/catalog/item/5eb4485582ce25b5135abf14'},
  {'node_type': 'Person',
   'name': 'Kristina G Hopkins',
   'reference': 'https://www.sciencebase.gov/catalog/item/5eb4485582ce25b5135abf14',
   'date_qualifier': '2020-09-25T21:37:46.156Z',
   'relationship_type': 'POINT_OF_CONTACT',
   'email': 'khopkins@usgs.gov',
   'orcid': '0000-0003-1699-9384',
   'identifier_sciencebase': 'https://www.sciencebase.gov/directory/person/70269',
   'model_url': 'https://www.sciencebase.gov/catalog/item/5eb4485582ce25b5135abf14'}],
 [{'node_type': 'Person',
   'name': 'Frederick D Day-Lewis',
   'reference': 'https://www.s

In [28]:
graphable_model_contacts(nodified_model_items, return_format="dataframe").head()

Unnamed: 0,node_type,name,reference,date_qualifier,relationship_type,email,identifier_sciencebase,model_url,orcid
0,Person,GS-W FACET,https://www.sciencebase.gov/catalog/item/5eb44...,2020-09-25T21:37:46.156Z,POINT_OF_CONTACT,gs-w_facet@usgs.gov,https://www.sciencebase.gov/directory/person/7...,https://www.sciencebase.gov/catalog/item/5eb44...,
1,Person,Kristina G Hopkins,https://www.sciencebase.gov/catalog/item/5eb44...,2020-09-25T21:37:46.156Z,POINT_OF_CONTACT,khopkins@usgs.gov,https://www.sciencebase.gov/directory/person/7...,https://www.sciencebase.gov/catalog/item/5eb44...,0000-0003-1699-9384
2,Person,Frederick D Day-Lewis,https://www.sciencebase.gov/catalog/item/5eb44...,2020-09-25T21:40:48.281Z,POINT_OF_CONTACT,daylewis@usgs.gov,https://www.sciencebase.gov/directory/person/2384,https://www.sciencebase.gov/catalog/item/5eb44...,0000-0003-3526-886X
3,Person,Daniel R Schlaepfer,https://www.sciencebase.gov/catalog/item/5eb44...,2020-12-03T18:58:22.811Z,POINT_OF_CONTACT,dschlaepfer@usgs.gov,https://www.sciencebase.gov/directory/person/7...,https://www.sciencebase.gov/catalog/item/5eb44...,
4,Person,John B Bradford,https://www.sciencebase.gov/catalog/item/5eb44...,2020-12-03T18:58:22.811Z,POINT_OF_CONTACT,jbradford@usgs.gov,https://www.sciencebase.gov/directory/person/2...,https://www.sciencebase.gov/catalog/item/5eb44...,0000-0001-9257-6303


In [22]:
for item in nodified_model_items:
    print(item.keys())

dict_keys(['properties', 'identified_points_of_contact', 'identified_organizations', 'source_repositories', 'identified_datasets'])
dict_keys(['properties', 'identified_points_of_contact', 'identified_organizations', 'identified_source_repositories', 'identified_references'])
dict_keys(['properties', 'identified_organizations', 'identified_references', 'source_repositories'])
dict_keys(['properties', 'identified_points_of_contact', 'identified_organizations', 'source_repositories', 'identified_references'])
dict_keys(['properties', 'identified_organizations', 'source_repositories', 'identified_references'])
dict_keys(['properties', 'identified_points_of_contact', 'identified_organizations', 'source_repositories', 'references'])
dict_keys(['properties', 'identified_points_of_contact', 'identified_organizations', 'identified_source_repositories', 'identified_datasets'])
dict_keys(['properties', 'source_repositories', 'references'])
dict_keys(['properties', 'identified_points_of_contact',