Import libraries

In [1]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Output, Input
import dash_bootstrap_components as dbc
import pandas as pd
import plotly.express as px

The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import dash_core_components as dcc
The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  import dash_html_components as html


Get Dataframe from COVID data

In [2]:
import SPARQLWrapper

sparql = SPARQLWrapper.SPARQLWrapper("https://query.wikidata.org/sparql")

query_covid = '''
PREFIX target: <http://www.wikidata.org/entity/Q84263196>

SELECT
  ?count
  ?gene_count
  ?symptom_count
  ?disease ?diseaseLabel
  ?genes
  ?symptoms
{
  {
    SELECT ?disease (COUNT(?gene) AS ?gene_count) (GROUP_CONCAT(?gene_label; separator=" // ") AS ?genes) WHERE {
      target: wdt:P2293 ?gene .
      ?gene wdt:P2293 ?disease .
      FILTER (target: != ?disease)
      ?gene rdfs:label ?gene_label
      FILTER(lang(?gene_label) = "en")
    }
    GROUP BY ?disease
  }
  UNION
  {
    SELECT
      ?disease (COUNT(?symptom) AS ?symptom_count) (GROUP_CONCAT(?symptom_label; separator=" // ") AS ?symptoms)
    {
      target: wdt:P780 ?symptom .
      ?disease wdt:P780 ?symptom .
      FILTER (target: != ?disease)
      ?symptom rdfs:label ?symptom_label . FILTER(lang(?symptom_label) = "en")
    }
    GROUP BY ?disease
  }

  # Aggregate count
  BIND((COALESCE(?symptom_count, 0) + COALESCE(?gene_count, 0)) AS ?count)

  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY DESC(?count)
'''

sparql.setQuery(query_covid)
sparql.setReturnFormat(SPARQLWrapper.JSON)
results_covid = sparql.query().convert()

reformatted_dict_covid = {}

entities_covid = []
result_list_covid = results_covid['results']['bindings']
for res in result_list_covid:
    for res_key, res_value in res.items():
        if res_key == 'disease':
            uri = res_value['value']
            splitted_uri = uri.split('/')
            entity_id = splitted_uri[-1]
            entities_covid.append(entity_id)
            reformatted_dict_covid[entity_id] = res

df_covid = pd.DataFrame.from_dict(reformatted_dict_covid)
df_covid = df_covid.transpose()
# print(df.columns)

In [3]:
# extract data from dictionaries
df_covid = df_covid.apply(lambda x: x.apply(lambda y: y['value'] if type(y) == dict else y))

for index, row in df_covid.iterrows():
    row['symptoms'] = row['symptoms'].split(' // ')

In [4]:
df_covid


Unnamed: 0,disease,symptom_count,symptoms,diseaseLabel,count
Q21396183,http://www.wikidata.org/entity/Q21396183,11,"[headache, nausea, cough, dyspnea, anorexia, f...",arsenic pentoxide exposure,11
Q6137239,http://www.wikidata.org/entity/Q6137239,9,"[headache, cough, dyspnea, fever, myalgia, chi...",hantavirus pulmonary syndrome,9
Q1432397,http://www.wikidata.org/entity/Q1432397,9,"[headache, cough, fever, myalgia, chills, diar...",Kyasanur forest disease,9
Q21173343,http://www.wikidata.org/entity/Q21173343,9,"[headache, anemia, nausea, cough, anosmia, dys...",cadmium oxide exposure,9
Q21173341,http://www.wikidata.org/entity/Q21173341,9,"[headache, anemia, nausea, cough, anosmia, dys...",cadmium dust exposure,9
...,...,...,...,...,...
Q21174190,http://www.wikidata.org/entity/Q21174190,1,[headache],Diisobutyl ketone exposure,1
Q21174168,http://www.wikidata.org/entity/Q21174168,1,[headache],"1,3-Dichloropropene exposure",1
Q21174156,http://www.wikidata.org/entity/Q21174156,1,[headache],"3,3'-Dichlorobenzidine exposure",1
Q21174149,http://www.wikidata.org/entity/Q21174149,1,[headache],Dibutyl phosphate exposure,1


In [5]:
# get second dataframe with schizophrenia data

sparql_schiz = SPARQLWrapper.SPARQLWrapper("https://query.wikidata.org/sparql")

query_schiz = '''
PREFIX target: <http://www.wikidata.org/entity/Q41112>

SELECT
  ?count
  ?gene_count
  ?symptom_count
  ?disease ?diseaseLabel
  ?genes
  ?symptoms
{
  {
    SELECT ?disease (COUNT(?gene) AS ?gene_count) (GROUP_CONCAT(?gene_label; separator=" // ") AS ?genes) WHERE {
      target: wdt:P2293 ?gene .
      ?gene wdt:P2293 ?disease .
      FILTER (target: != ?disease)
      ?gene rdfs:label ?gene_label
      FILTER(lang(?gene_label) = "en")
    }
    GROUP BY ?disease
  }
  UNION
  {
    SELECT
      ?disease (COUNT(?symptom) AS ?symptom_count) (GROUP_CONCAT(?symptom_label; separator=" // ") AS ?symptoms)
    {
      target: wdt:P780 ?symptom .
      ?disease wdt:P780 ?symptom .
      FILTER (target: != ?disease)
      ?symptom rdfs:label ?symptom_label . FILTER(lang(?symptom_label) = "en")
    }
    GROUP BY ?disease
  }

  # Aggregate count
  BIND((COALESCE(?symptom_count, 0) + COALESCE(?gene_count, 0)) AS ?count)

  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY DESC(?count)
'''

sparql_schiz.setQuery(query_covid)
sparql_schiz.setReturnFormat(SPARQLWrapper.JSON)
results_schiz = sparql_schiz.query().convert()

reformatted_dict_schiz = {}

entities_schiz = []
result_list_schiz = results_schiz['results']['bindings']
for res in result_list_schiz:
    for res_key, res_value in res.items():
        if res_key == 'disease':
            uri = res_value['value']
            splitted_uri = uri.split('/')
            entity_id = splitted_uri[-1]
            entities_covid.append(entity_id)
            reformatted_dict_schiz[entity_id] = res

df_schiz = pd.DataFrame.from_dict(reformatted_dict_schiz)
df_schiz = df_schiz.transpose()

In [6]:
df_schiz

Unnamed: 0,disease,symptom_count,symptoms,diseaseLabel,count
Q21396183,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'headache // naus...","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...
Q6137239,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'headache // coug...","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...
Q1432397,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'headache // coug...","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...
Q706845,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'headache // fati...","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...
Q154874,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'headache // fati...","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...
...,...,...,...,...,...
Q21174190,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'headache'}","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...
Q21174168,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'headache'}","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...
Q21174156,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'headache'}","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...
Q21174149,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'headache'}","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...


In [7]:
# extract data from dictionaries
df_schiz = df_schiz.apply(lambda x: x.apply(lambda y: y['value'] if type(y) == dict else y))

for index, row in df_schiz.iterrows():
    row['symptoms'] = row['symptoms'].split(' // ')

In [8]:
df_schiz

Unnamed: 0,disease,symptom_count,symptoms,diseaseLabel,count
Q21396183,http://www.wikidata.org/entity/Q21396183,11,"[headache, nausea, cough, dyspnea, anorexia, f...",arsenic pentoxide exposure,11
Q6137239,http://www.wikidata.org/entity/Q6137239,9,"[headache, cough, dyspnea, fever, myalgia, chi...",hantavirus pulmonary syndrome,9
Q1432397,http://www.wikidata.org/entity/Q1432397,9,"[headache, cough, nausea, fever, myalgia, chil...",Kyasanur forest disease,9
Q706845,http://www.wikidata.org/entity/Q706845,9,"[headache, fatigue, cough, nausea, fever, myal...",Lassa fever,9
Q154874,http://www.wikidata.org/entity/Q154874,9,"[headache, fatigue, nausea, fever, encephaliti...",yellow fever,9
...,...,...,...,...,...
Q21174190,http://www.wikidata.org/entity/Q21174190,1,[headache],Diisobutyl ketone exposure,1
Q21174168,http://www.wikidata.org/entity/Q21174168,1,[headache],"1,3-Dichloropropene exposure",1
Q21174156,http://www.wikidata.org/entity/Q21174156,1,[headache],"3,3'-Dichlorobenzidine exposure",1
Q21174149,http://www.wikidata.org/entity/Q21174149,1,[headache],Dibutyl phosphate exposure,1


Build Dash application

In [28]:
# Balkendiagramm erstellen
bar_fig = px.bar(df_covid, x='diseaseLabel', y='symptom_count', color='diseaseLabel')

# Scatter-Plot erstellen
scatter_fig = px.scatter(df_covid, x='count', y='symptoms', color='diseaseLabel', size_max=10)

# Dashboard-Layout definieren
app = dash.Dash(__name__)
app.layout = html.Div(children=[
    html.H1(children='Krankheitssymptome'),

    html.Div(children='''
        Balkendiagramm der häufigsten Symptome
    '''),

    dcc.Graph(
        id='bar-chart',
        figure=bar_fig
    ),

    html.Div(children='''
        Scatter-Plot der Anzahl der Symptome pro Krankheit
    '''),

    dcc.Graph(
        id='scatter-plot',
        figure=scatter_fig
    )
])

if __name__ == '__main__':
    app.run_server(debug=False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__'
 * Debug mode: off


In [2]:
# app for two different diseases

app2 = dash.Dash(__name__)
app2.layout = html.Div([
    html.H1('Disease'),
    dcc.Dropdown(
        id='disease-dropdown',
        options=[
            {'label': 'Covid', 'value': 'Covid'},
            {'label': 'Schizophrenia', 'value': 'Schizophrenia'}
        ],
        value='Covid'
    ),
    dcc.Graph(id='symptoms-counts')
])

# Dashboard-Callback definieren
@app2.callback(
    dash.dependencies.Output('symptom-counts', 'figure'),
    [dash.dependencies.Input('disease-dropdown', 'value')]
)
def update_symptom_counts(disease):
    if disease == 'Covid':
        df = df_covid
    elif disease == 'Schizophrenia':
        df = df_schiz
    else:
        raise ValueError("Unbekannte Krankheit ausgewählt.")

    # Balkendiagramm erstellen
    #bar_fig = px.bar(df, x='diseaseLabel', y='symptom_count', color='diseaseLabel')
    # Create a Circos plot using Plotly Express
    fig = px.parallel_categories(df, dimensions=['diseaseLabel', 'symptoms'], color='count')
    return fig

NameError: name 'dash' is not defined

In [6]:
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objs as go
import dash_cytoscape as cyto


# app for two different diseases

app2 = dash.Dash(__name__)

# Define the dropdown options
dropdown_options = [
    {'label': 'Schizophrenia', 'value': 'schizophrenia'},
    {'label': 'COVID-19', 'value': 'covid'}
]

# Define the layout
app2.layout = html.Div([
    dcc.Dropdown(
        id='disease-dropdown',
        options=dropdown_options,
        value='schizophrenia'
    ),
    cyto.Cytoscape(
        id='disease-graph',
        layout={'name': 'circle'},
        style={'width': '100%', 'height': '800px'},
        elements=[]
    )
])

# Define the callback to update the graph when the dropdown is changed
@app2.callback(
    dash.dependencies.Output('disease-graph', 'elements'),
    [dash.dependencies.Input('disease-dropdown', 'value')]
)
def update_graph(disease):
    if disease == 'schizophrenia':
        df = df_schiz
    else:
        df = df_covid

    nodes = [{'data': {'id': row['disease'], 'label': row['disease']}} for _, row in df.iterrows()]

    edges = []
    for i in range(len(df)):
        for j in range(i+1, len(df)):
            shared_symptoms = set(df.iloc[i]['symptoms']) & set(df.iloc[j]['symptoms'])
            if len(shared_symptoms) > 0:
                edge = {
                    'data': {
                        'source': df.iloc[i]['disease'],
                        'target': df.iloc[j]['disease'],
                        'weight': len(shared_symptoms),
                        'label': str(len(shared_symptoms))
                    }
                }
                edges.append(edge)

    return nodes + edges

if __name__ == '__main__':
    app2.run_server(debug=False)


127.0.0.1 - - [04/May/2023 12:52:59] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [04/May/2023 12:52:59] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [04/May/2023 12:52:59] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [04/May/2023 12:52:59] "GET /_favicon.ico?v=2.7.0 HTTP/1.1" 200 -
127.0.0.1 - - [04/May/2023 12:52:59] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 304 -


Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Users\lucys\anaconda3\envs\scholia_visualization\lib\site-packages\flask\app.py", line 2525, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\lucys\anaconda3\envs\scholia_visualization\lib\site-packages\flask\app.py", line 1822, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\lucys\anaconda3\envs\scholia_visualization\lib\site-packages\flask\app.py", line 1820, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\lucys\anaconda3\envs\scholia_visualization\lib\site-packages\flask\app.py", line 1796, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
  File "C:\Users\lucys\anaconda3\envs\scholia_visualization\lib\site-packages\dash\dash.py", line 1274, in dispatch
    ctx.run(
  File "C:\Users\lucys\anaconda3\envs\scholia_visualization\lib\site-packages\dash\_callback.py", lin

127.0.0.1 - - [04/May/2023 12:52:59] "POST /_dash-update-component HTTP/1.1" 500 -


Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:8050
Press CTRL+C to quit


# from documentation

In [None]:
# import libraries

import json
import urllib.request as urlreq
import dash
from dash.dependencies import Input, Output, State
import dash_bio as dashbio
from dash import html, dcc


In [4]:
# getting a more simple visualization than a circos plot to start

import SPARQLWrapper


sparql = SPARQLWrapper.SPARQLWrapper("https://query.wikidata.org/sparql")

query = '''
PREFIX target: <http://www.wikidata.org/entity/Q41112>

SELECT
  DISTINCT
  ?start_date
  ?trial ?trialLabel
  ?intervention ?interventionLabel
  ?sponsor ?sponsorLabel
WHERE {
  ?trial wdt:P31 wd:Q30612 ;
  wdt:P1050 / wdt:P279* target: .
  OPTIONAL {
    ?trial wdt:P580 ?starttime
    BIND(SUBSTR(STR(?starttime), 0, 11) AS ?start_date)
  }
  OPTIONAL { ?trial wdt:P4844 ?intervention }
  OPTIONAL { ?trial wdt:P859 ?sponsor }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en,da,de,es,fr,jp,nl,no,pl,ru,sv,zh". }
}
ORDER BY DESC(?starttime)
'''

# Set the query language to SPARQL
sparql.setQuery(query)

# Set the return format to JSON
sparql.setReturnFormat(SPARQLWrapper.JSON)

# Execute the query and store the results
results_trials = sparql.query().convert()

dict_trials = {}

trials = []
result_list = results_trials['results']['bindings']
for res in result_list:
    for res_key, res_value in res.items():
        if res_key == 'trial':
            uri = res_value['value']
            splitted_uri = uri.split('/')
            entity_id = splitted_uri[-1]
            trials.append(entity_id)
            dict_trials[entity_id] = res


trial_df = pd.DataFrame.from_dict(dict_trials)
trial_df = trial_df.transpose()

# extract data from dictionaries
trial_df = trial_df.apply(lambda x: x.apply(lambda y: y['value'] if type(y) == dict else y))


trial_df



Unnamed: 0,start_date,trial,trialLabel,intervention,interventionLabel,sponsor,sponsorLabel
Q113914424,2022-12-01,http://www.wikidata.org/entity/Q113914424,fMRI-based Neurofeedback to Relieve Drug-resis...,,,,
Q113932415,2022-10-01,http://www.wikidata.org/entity/Q113932415,Exercise-based Program for Rehabilitation of V...,,,,
Q113927226,2022-09-15,http://www.wikidata.org/entity/Q113927226,D-serine AudRem: R33 Phase,,,,
Q113941830,2022-09-01,http://www.wikidata.org/entity/Q113941830,EEG and Behavioral Correlates of Temporal Pred...,,,,
Q113943405,2022-08-01,http://www.wikidata.org/entity/Q113943405,Individualized Repetitive Transcranial Magneti...,,,,
...,...,...,...,...,...,...,...
Q66033186,,http://www.wikidata.org/entity/Q66033186,A Combination of Two Currently Approved Drugs ...,http://www.wikidata.org/entity/Q27225729,atomoxetine hydrochloride,,
Q66059716,,http://www.wikidata.org/entity/Q66059716,A Multi-Site Study of Strategies for Implement...,,,,
Q66059719,,http://www.wikidata.org/entity/Q66059719,Improving Health Services for Veterans With Sc...,,,,
Q66068233,,http://www.wikidata.org/entity/Q66068233,Prevalence of Diabetes Mellitus Among Patients...,,,,


In [3]:
import dash
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import plotly.express as px
from SPARQLWrapper import SPARQLWrapper, JSON

# Create a Dash app
app = dash.Dash(__name__)

# Define the disease entities
disease_entities = {
    "TESTTEST-19": "Q84263196",
    "Schizophrenia": "Q41112"
}
for disease_label, disease_value in disease_entities.items():


    # Define the Wikidata endpoint and the query
    endpoint = "https://query.wikidata.org/sparql"
    query_template = f'''
    #defaultView:BarChart
PREFIX target: <http://www.wikidata.org/entity/{disease_value}>
select ?year (count(?work) as ?number_of_publications) where {{
  {{
    select (str(?year_) as ?year) (0 as ?pages) where {{
      ?year_item wdt:P31 wd:Q577 .
      ?year_item wdt:P585 ?date .
      bind(year(?date) as ?year_)
      {{
        select (min(?year_) as ?earliest_year) where {{
          {{ ?work wdt:P921/wdt:P31*/wdt:P279* target: . }}
          union {{ ?work wdt:P921/wdt:P361+ target: . }}
          union {{ ?work wdt:P921/wdt:P1269+ target: . }}
          ?work wdt:P577 ?publication_date .
          bind(year(?publication_date) as ?year_)
        }}
      }}
      bind(year(now()) as ?next_year)
      filter (?year_ >= ?earliest_year && ?year_ <= ?next_year)
    }}
  }}
  union {{
    select ?work (min(?years) as ?year) where {{
      {{ ?work wdt:P921/wdt:P31*/wdt:P279* target: . }}
      union {{ ?work wdt:P921/wdt:P361+ target: . }}
      union {{ ?work wdt:P921/wdt:P1269+ target: . }}
      ?work wdt:P577 ?dates .
      bind(str(year(?dates)) as ?years) .
    }}
    group by ?work
  }}
}}
group by ?year
order by ?year
'''

# Define the layout of the app
app.layout = html.Div([
    # Dropdown menu to select the disease
    dcc.Dropdown(
        id="disease-dropdown",
        options=[{"label": disease, "value": entity_id} for disease, entity_id in disease_entities.items()],
        value=list(disease_entities.values())[0]
    ),
    # Bar plot of the trial start dates
    dcc.Graph(id="publications-per-year")
])

# Define the callback function to update the bar plot
@app.callback(
    dash.dependencies.Output("publications-per-year", "figure"),
    [dash.dependencies.Input("disease-dropdown", "value")]
)
def update_trial_start_dates_bar_plot(disease_entity):
    try:
        # Build the query string with the disease entity ID
        query = query_template.format(disease=disease_entity)

        # Run the query and get the results
        sparql = SPARQLWrapper(endpoint)
        sparql.setQuery(query)
        sparql.setReturnFormat(JSON)
        results = sparql.query().convert()

        # Convert the results to a Pandas DataFrame
        start_dates = [result["start_date"]["value"][:4] for result in results["results"]["bindings"]]
        year_counts = pd.Series(start_dates).value_counts().sort_index().to_frame().reset_index()
        year_counts.columns = ["year", "count"]

        # Create the bar plot
        fig = px.bar(year_counts, x="year", y="count")

        return fig
    except Exception as e:
        print(e.with_traceback())


if __name__ == '__main__':
    app.run_server(debug=False)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:8050
Press CTRL+C to quit
127.0.0.1 - - [04/May/2023 14:33:38] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [04/May/2023 14:33:39] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [04/May/2023 14:33:39] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [04/May/2023 14:33:39] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 304 -
127.0.0.1 - - [04/May/2023 14:33:39] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 304 -
127.0.0.1 - - [04/May/2023 14:33:39] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 304 -
127.0.0.1 - - [04/May/2023 14:33:39] "POST /_dash-update-component HTTP/1.1" 200 -


something wrong
Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:8050
Press CTRL+C to quit


In [2]:
import dash
from dash import dcc
from dash import html
import pandas as pd
import plotly.express as px
from SPARQLWrapper import SPARQLWrapper, JSON

# Define SPARQL endpoint
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

# Define SPARQL query
query = """
PREFIX target: <http://www.wikidata.org/entity/Q84263196>

# Inspired from LEGOLAS - http://abel.lis.illinois.edu/legolas/
# Shubhanshu Mishra, Vetle Torvik
select ?year (count(?work) as ?number_of_publications) where {
  {
    select (str(?year_) as ?year) (0 as ?pages) where {
      # default values = 0
      ?year_item wdt:P31 wd:Q577 .
      ?year_item wdt:P585 ?date .
      bind(year(?date) as ?year_)
      {
        select (min(?year_) as ?earliest_year) where {
          { ?work wdt:P921/wdt:P31*/wdt:P279* target: . }
          union { ?work wdt:P921/wdt:P361+ target: . }
          union { ?work wdt:P921/wdt:P1269+ target: . }
          ?work wdt:P577 ?publication_date .
          bind(year(?publication_date) as ?year_)
        }
      }
      bind(year(now()) as ?next_year)
      filter (?year_ >= ?earliest_year && ?year_ <= ?next_year)
    }
  }
  union {
    select ?work (min(?years) as ?year) where {
      { ?work wdt:P921/wdt:P31*/wdt:P279* target: . }
      union { ?work wdt:P921/wdt:P361+ target: . }
      union { ?work wdt:P921/wdt:P1269+ target: . }
      ?work wdt:P577 ?dates .
      bind(str(year(?dates)) as ?years) .
    }
    group by ?work
  }
}
group by ?year
order by ?year
"""

# Set the query and format to JSON
sparql.setQuery(query)
sparql.setReturnFormat(JSON)

# Execute the query and convert the results to a Pandas DataFrame
results = sparql.query().convert()
df = pd.json_normalize(results["results"]["bindings"])
df["year.value"] = pd.to_numeric(df["year.value"])
df["number_of_publications.value"] = pd.to_numeric(df["number_of_publications.value"])

# Create the bar chart using Plotly
fig = px.bar(df, x="year.value", y="number_of_publications.value")

# Create the Dash app and layout
app3 = dash.Dash(__name__)
app3.layout = html.Div(children=[
    html.H1("Publications per year on COVID-19"),
    dcc.Graph(id="publication-year-chart", figure=fig)
])

# Run the Dash app
if __name__ == '__main__':
    app3.run_server(debug=False)


Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__'
 * Debug mode: off


In [None]:
# dynamically for more diseases


import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.express as px
from SPARQLWrapper import SPARQLWrapper, JSON

# Define SPARQL endpoint
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

# Create a list of disease options for the dropdown menu
disease_options = [
    {'label': 'COVID-19', 'value': 'Q84263196'},
    {'label': 'Schizophrenia', 'value': 'Q41112'}
]


# Define a function to query the Wikidata endpoint and return a DataFrame
def query_wikidata(disease_id):
    # Define SPARQL query
    query = f"""
    PREFIX target: <http://www.wikidata.org/entity/{disease_id}>

    # Inspired from LEGOLAS - http://abel.lis.illinois.edu/legolas/
    # Shubhanshu Mishra, Vetle Torvik
    select ?year (count(?work) as ?number_of_publications) where {{
      {{
        select (str(?year_) as ?year) (0 as ?pages) where {{
          # default values = 0
          ?year_item wdt:P31 wd:Q577 .
          ?year_item wdt:P585 ?date .
          bind(year(?date) as ?year_)
          {{
            select (min(?year_) as ?earliest_year) where {{
              {{ ?work wdt:P921/wdt:P31*/wdt:P279* target: . }}
              union {{ ?work wdt:P921/wdt:P361+ target: . }}
              union {{ ?work wdt:P921/wdt:P1269+ target: . }}
              ?work wdt:P577 ?publication_date .
              bind(year(?publication_date) as ?year_)
            }}
          }}
          bind(year(now()) as ?next_year)
          filter (?year_ >= ?earliest_year && ?year_ <= ?next_year)
        }}
      }}
      union {{
        select ?work (min(?years) as ?year) where {{
          {{ ?work wdt:P921/wdt:P31*/wdt:P279* target: . }}
          union {{ ?work wdt:P921/wdt:P361+ target: . }}
          union {{ ?work wdt:P921/wdt:P1269+ target: . }}
          ?work wdt:P577 ?dates .
          bind(str(year(?dates)) as ?years) .
        }}
        group by ?work
      }}
    }}
    group by ?year
    order by ?year
    """

    # Set the query and format to JSON
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)

    # Execute the query and convert the results to a Pandas DataFrame
    results = sparql.query().convert()
    df = pd.json_normalize(results["results"]["bindings"])
    df["year.value"] = pd.to_numeric(df["year.value"])
    df["number_of_publications.value"] = pd.to_numeric(df["number_of_publications.value"])

    return df

# Create the Dash app and layout
app4 = dash.Dash(__name__)
app4.layout = html.Div(children=[
    html.H1("Publications per year"),
    dcc.Dropdown(
        id="disease-dropdown",
        options=disease_options,
        value=disease_options[0]['value']
    ),
    dcc.Graph(id="publication-year-chart")
])

# Define callback to update the figure
@app4.callback(
    Output(component_id="publication-year-chart", component_property="figure"),
    Input(component_id="disease-dropdown", component_property="value")
)
def update_figure(selected_disease):
    # Define SPARQL query with selected entity ID
    query = f"""
    PREFIX target: <http://www.wikidata.org/entity/{selected_disease}>

    # Inspired from LEGOLAS - http://abel.lis.illinois.edu/legolas/
    # Shubhanshu Mishra, Vetle Torvik
    select ?year (count(?work) as ?number_of_publications) where {{
      {{
        select (str(?year_) as ?year) (0 as ?pages) where {{
          # default values = 0
          ?year_item wdt:P31 wd:Q577 .
          ?year_item wdt:P585 ?date .
          bind(year(?date) as ?year_)
          {{
            select (min(?year_) as ?earliest_year) where {{
              {{ ?work wdt:P921/wdt:P31*/wdt:P279* target: . }}
              union {{ ?work wdt:P921/wdt:P361+ target: . }}
              union {{ ?work wdt:P921/wdt:P1269+ target: . }}
              ?work wdt:P577 ?publication_date .
              bind(year(?publication_date) as ?year_)
            }}
          }}
          bind(year(now()) as ?next_year)
          filter (?year_ >= ?earliest_year && ?year_ <= ?next_year)
        }}
      }}
      union {{
        select ?work (min(?years) as ?year) where {{
          {{ ?work wdt:P921/wdt:P31*/wdt:P279* target: . }}
          union {{ ?work wdt:P921/wdt:P361+ target: . }}
          union {{ ?work wdt:P921/wdt:P1269+ target: . }}
          ?work wdt:P577 ?dates .
          bind(str(year(?dates)) as ?years) .
        }}
        group by ?work
      }}
    }}
    group by ?year
    order by ?year
    """

    # Set the query and format to JSON
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)

    # Execute the query and convert the results to a Pandas DataFrame
    results = sparql.query().convert()
    df = pd.json_normalize(results["results"]["bindings"])
    df["year.value"] = pd.to_numeric(df["year.value"])
    df["number_of_publications.value"] = pd.to_numeric(df["number_of_publications.value"])

    # Create the bar chart using Plotly
    fig = px.bar(df, x="year.value", y="number_of_publications.value")

    return fig

# Run the Dash app
if __name__ == '__main__':
    app4.run_server(debug=False)# , host='0.0.0.0', port = 8080)

127.0.0.1 - - [04/May/2023 15:21:12] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [04/May/2023 15:21:12] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [04/May/2023 15:21:13] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 304 -
127.0.0.1 - - [04/May/2023 15:21:13] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 304 -
127.0.0.1 - - [04/May/2023 15:21:13] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 304 -
127.0.0.1 - - [04/May/2023 15:21:20] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [04/May/2023 15:21:20] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [04/May/2023 15:21:20] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [04/May/2023 15:21:20] "GET /_favicon.ico?v=2.7.0 HTTP/1.1" 200 -
127.0.0.1 - - [04/May/2023 15:21:20] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 304 -
127.0.0.1 - - [04/May/2023 15:21:20] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 304 -
127.0.0.1 - - [04/