In [58]:
import requests
import pandas as pd

In [8]:
url = "https://civicdb.org/api/graphql"

In [20]:
body = """
query getAssertions($assertCursor: String) {
	assertions(after: $assertCursor) {
    edges {
      cursor
      node {
        molecularProfile {
          evidenceItems {
            edges {
              node {
                status
                source {
                  authorString
                  citation
                  fullJournalTitle
                }
              }
            }
          }
        }
      }
    }
  }
}
"""

In [21]:
response = requests.post(url=url, json={"query": body})

In [22]:
len(response.json()["data"]["assertions"]["edges"])

50

In [24]:
response.json()["data"]["assertions"]['edges'][-1]['node']

{'molecularProfile': {'evidenceItems': {'edges': [{'node': {'status': 'ACCEPTED',
      'source': {'authorString': 'G Smith, R Bounds, H Wolf, R J C Steele, F A Carey, C R Wolf',
       'citation': 'Smith et al., 2010, Br. J. Cancer',
       'fullJournalTitle': 'British journal of cancer'}}}]}}}

In [44]:
profiles = []

def get_all_assertions():
    variables = {"assertCursor": ""}
    while True:
        response = requests.post(url=url, json={"query": body, "variables": variables})
        response_assertions = response.json()["data"]["assertions"]["edges"]
        if len(response_assertions) == 0:
            break
        for assertion in response_assertions:
            profiles.append(assertion["node"]["molecularProfile"])
        variables["assertCursor"] = response_assertions[-1]["cursor"]

        
get_all_assertions()

56


In [78]:
def get_evidence():
    query = """
    query getEv($cursor:String) {
      evidenceItems(after:$cursor) {
        edges {
          cursor
          node {
            id
            status
            assertions {
              id
            }
            evidenceDirection
            evidenceRating
            evidenceLevel
            evidenceType
            source {
              authorString
              sourceUrl
              sourceType
              title
              pmcId
              journal
            }
          }
        }
      }
    }
    """
    variables = {"cursor": ""}
    ev_items = []
    while True:
        response = requests.post(url=url, json={"query": query, "variables": variables})
        response_items = response.json()["data"]["evidenceItems"]["edges"]
        if len(response_items) == 0:
            break
        for item in response_items:
            ev_items.append(item["node"])
        variables["cursor"] = response_items[-1]["cursor"]
    return ev_items
ev_items = get_evidence()

In [115]:
ev_items_transformed = [
    [e['id'], e['status'], [a['id'] for a in e['assertions']], 
     e['evidenceDirection'], e['evidenceRating'], e['evidenceLevel'], e['evidenceType'],
     e['source']['authorString'], e['source']['sourceUrl'], 
     e['source']['sourceType'], e['source']['title'], e['source']['pmcId'], e['source']['journal']]
    for e in ev_items
]
ev_items_df = pd.DataFrame(
    ev_items_transformed,
    columns=[
        'id', 'status', 'assertion_ids', 'ev_direction', 'ev_rating', 'ev_level', 'ev_type',
        'authors', 'url', 'type', 'title', 'pmc_id', 'journal'
    ]
)

In [116]:
ev_items_df["journal_cleaned"] = ev_items_df["journal"].str.lower().apply(lambda s: s.replace(".", ""))
ct = ev_items_df["journal_cleaned"].value_counts()
ct[ct < 2]

saudi j biol sci                    1
oncogenesis                         1
ann otol rhinol laryngol            1
amino acids                         1
cell physiol biochem                1
                                   ..
appl immunohistochem mol morphol    1
genet couns                         1
ann med                             1
surg neurol                         1
south asian j cancer                1
Name: journal_cleaned, Length: 212, dtype: int64

In [118]:
ev_items_df.to_csv("civic_evidence_data.csv")