### Get dataframe of related diseases


In [None]:

import SPARQLWrapper

sparql = SPARQLWrapper.SPARQLWrapper("https://query.wikidata.org/sparql")

query = '''
PREFIX target: <http://www.wikidata.org/entity/Q41112>

SELECT
  ?count
  ?gene_count
  ?symptom_count
  ?disease ?diseaseLabel
  ?genes
  ?symptoms
{
  {
    SELECT ?disease (COUNT(?gene) AS ?gene_count) (GROUP_CONCAT(?gene_label; separator=" // ") AS ?genes) WHERE {
      target: wdt:P2293 ?gene .
      ?gene wdt:P2293 ?disease .
      FILTER (target: != ?disease)
      ?gene rdfs:label ?gene_label
      FILTER(lang(?gene_label) = "en")
    }
    GROUP BY ?disease
  }
  UNION
  {
    SELECT
      ?disease (COUNT(?symptom) AS ?symptom_count) (GROUP_CONCAT(?symptom_label; separator=" // ") AS ?symptoms)
    {
      target: wdt:P780 ?symptom .
      ?disease wdt:P780 ?symptom .
      FILTER (target: != ?disease)
      ?symptom rdfs:label ?symptom_label . FILTER(lang(?symptom_label) = "en")
    }
    GROUP BY ?disease
  }

  # Aggregate count
  BIND((COALESCE(?symptom_count, 0) + COALESCE(?gene_count, 0)) AS ?count)

  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY DESC(?count)
'''

# Set the query language to SPARQL
sparql.setQuery(query)

# Set the return format to JSON
sparql.setReturnFormat(SPARQLWrapper.JSON)

# Execute the query and store the results
results = sparql.query().convert()


In [None]:
reformatted_dict = {}

entities = []
result_list = results['results']['bindings']
for res in result_list:
    for res_key, res_value in res.items():
        if res_key == 'disease':
            uri = res_value['value']
            splitted_uri = uri.split('/')
            entity_id = splitted_uri[-1]
            entities.append(entity_id)
            reformatted_dict[entity_id] = res

print(reformatted_dict)

In [None]:
import pandas as pd

df = pd.DataFrame.from_dict(reformatted_dict)
df = df.transpose()

print(df.columns)
print(df)

### Get publications per year for all related diseases

In [None]:
# get publications per year of every disease that is asscociated with schizophrenia


for index, row in df.iterrows():
    print(index)
    prefix_target = \
        f'''
PREFIX target: <http://www.wikidata.org/entity/{index}>
SELECT
  (STR(?year_) AS ?year)
  (COUNT(?work) AS ?number_of_publications)


GROUP BY ?year
ORDER BY ?year
'''

    query = query.format(prefix_target)


    # Set the query language to SPARQL
    sparql.setQuery(query)

    # Set the return format to JSON
    sparql.setReturnFormat(SPARQLWrapper.JSON)

    # Execute the query and store the results
    results = sparql.query().convert()
    print(results)

In [3]:
import SPARQLWrapper

In [4]:
# try for some entities
entities = ['Q12174', 'Q50349648', 'Q844935']
for entity in entities:


    query = f'''
    PREFIX target: <http://www.wikidata.org/entity/{entity}>

    # Inspired from LEGOLAS - http://abel.lis.illinois.edu/legolas/
    # Shubhanshu Mishra, Vetle Torvik
    select ?year (count(?work) as ?number_of_publications) where {{
      {{
        select (str(?year_) as ?year) (0 as ?pages) where {{
          # default values = 0
          ?year_item wdt:P31 wd:Q577 .
          ?year_item wdt:P585 ?date .
          bind(year(?date) as ?year_)
          {{
            select (min(?year_) as ?earliest_year) where {{
              {{ ?work wdt:P921/wdt:P31*/wdt:P279* target: . }}
              union {{ ?work wdt:P921/wdt:P361+ target: . }}
              union {{ ?work wdt:P921/wdt:P1269+ target: . }}
              ?work wdt:P577 ?publication_date .
              bind(year(?publication_date) as ?year_)
            }}
          }}
          bind(year(now()) as ?next_year)
          filter (?year_ >= ?earliest_year && ?year_ <= ?next_year)
        }}
      }}
      union {{
        select ?work (min(?years) as ?year) where {{
          {{ ?work wdt:P921/wdt:P31*/wdt:P279* target: . }}
          union {{ ?work wdt:P921/wdt:P361+ target: . }}
          union {{ ?work wdt:P921/wdt:P1269+ target: . }}
          ?work wdt:P577 ?dates .
          bind(str(year(?dates)) as ?years) .
        }}
        group by ?work
      }}
    }}
    group by ?year
    order by ?year
    '''

    sparql = SPARQLWrapper.SPARQLWrapper("https://query.wikidata.org/sparql")

    # Set the query language to SPARQL
    sparql.setQuery(query)

    # Set the return format to JSON
    sparql.setReturnFormat(SPARQLWrapper.JSON)

    # Execute the query and store the results
    results = sparql.query().convert()
    print(results)

{'head': {'vars': ['year', 'number_of_publications']}, 'results': {'bindings': [{'year': {'type': 'literal', 'value': '1863'}, 'number_of_publications': {'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'type': 'literal', 'value': '1'}}, {'year': {'type': 'literal', 'value': '1865'}, 'number_of_publications': {'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'type': 'literal', 'value': '0'}}, {'year': {'type': 'literal', 'value': '1870'}, 'number_of_publications': {'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'type': 'literal', 'value': '0'}}, {'year': {'type': 'literal', 'value': '1880'}, 'number_of_publications': {'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'type': 'literal', 'value': '1'}}, {'year': {'type': 'literal', 'value': '1885'}, 'number_of_publications': {'datatype': 'http://www.w3.org/2001/XMLSchema#integer', 'type': 'literal', 'value': '1'}}, {'year': {'type': 'literal', 'value': '1888'}, 'number_of_publications': {'datatype': 'http://w