In [60]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Output, Input
import dash_bootstrap_components as dbc
import pandas as pd
import SPARQLWrapper



In [61]:
import SPARQLWrapper

sparql = SPARQLWrapper.SPARQLWrapper("https://query.wikidata.org/sparql")

query_covid = '''
PREFIX target: <http://www.wikidata.org/entity/Q84263196>

SELECT
  ?count
  ?gene_count
  ?symptom_count
  ?disease ?diseaseLabel
  ?genes
  ?symptoms
{
  {
    SELECT ?disease (COUNT(?gene) AS ?gene_count) (GROUP_CONCAT(?gene_label; separator=" // ") AS ?genes) WHERE {
      target: wdt:P2293 ?gene .
      ?gene wdt:P2293 ?disease .
      FILTER (target: != ?disease)
      ?gene rdfs:label ?gene_label
      FILTER(lang(?gene_label) = "en")
    }
    GROUP BY ?disease
  }
  UNION
  {
    SELECT
      ?disease (COUNT(?symptom) AS ?symptom_count) (GROUP_CONCAT(?symptom_label; separator=" // ") AS ?symptoms)
    {
      target: wdt:P780 ?symptom .
      ?disease wdt:P780 ?symptom .
      FILTER (target: != ?disease)
      ?symptom rdfs:label ?symptom_label . FILTER(lang(?symptom_label) = "en")
    }
    GROUP BY ?disease
  }

  # Aggregate count
  BIND((COALESCE(?symptom_count, 0) + COALESCE(?gene_count, 0)) AS ?count)

  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY DESC(?count)
'''

In [62]:
# get third dataframe with migraine data



sparql_migraine = SPARQLWrapper.SPARQLWrapper("https://query.wikidata.org/sparql")

query_migraine = '''
PREFIX target: <http://www.wikidata.org/entity/Q133823>

SELECT
  ?count
  ?gene_count
  ?symptom_count
  ?disease ?diseaseLabel
  ?genes
  ?symptoms
{
  {
    SELECT ?disease (COUNT(?gene) AS ?gene_count) (GROUP_CONCAT(?gene_label; separator=" // ") AS ?genes) WHERE {
      target: wdt:P2293 ?gene .
      ?gene wdt:P2293 ?disease .
      FILTER (target: != ?disease)
      ?gene rdfs:label ?gene_label
      FILTER(lang(?gene_label) = "en")
    }
    GROUP BY ?disease
  }
  UNION
  {
    SELECT
      ?disease (COUNT(?symptom) AS ?symptom_count) (GROUP_CONCAT(?symptom_label; separator=" // ") AS ?symptoms)
    {
      target: wdt:P780 ?symptom .
      ?disease wdt:P780 ?symptom .
      FILTER (target: != ?disease)
      ?symptom rdfs:label ?symptom_label . FILTER(lang(?symptom_label) = "en")
    }
    GROUP BY ?disease
  }

  # Aggregate count
  BIND((COALESCE(?symptom_count, 0) + COALESCE(?gene_count, 0)) AS ?count)

  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY DESC(?count)
'''

sparql_migraine.setQuery(query_migraine)
sparql_migraine.setReturnFormat(SPARQLWrapper.JSON)
results_migraine = sparql_migraine.query().convert()

reformatted_dict_migraine = {}

entities_migraine = []
result_list_migraine = results_migraine['results']['bindings']
for res in result_list_migraine:
    for res_key, res_value in res.items():
        if res_key == 'disease':
            uri = res_value['value']
            splitted_uri = uri.split('/')
            entity_id = splitted_uri[-1]
            entities_migraine.append(entity_id)
            reformatted_dict_migraine[entity_id] = res

df_migraine = pd.DataFrame.from_dict(reformatted_dict_migraine)
df_migraine = df_migraine.transpose()

In [63]:
df_migraine

Unnamed: 0,disease,symptom_count,symptoms,diseaseLabel,count,gene_count,genes
Q21173555,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'vomiting // naus...","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...,,
Q109466005,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'headache // vomi...","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...,,
Q21396183,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'headache // vomi...","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...,,
Q21167678,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'headache // vomi...","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...,,
Q18558169,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'headache // vomi...","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...,,
...,...,...,...,...,...,...,...
Q83319,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'headache'}","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...,,
Q47790,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'headache'}","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...,,
Q44727,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'headache'}","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...,,
Q12125,"{'type': 'uri', 'value': 'http://www.wikidata....",{'datatype': 'http://www.w3.org/2001/XMLSchema...,"{'type': 'literal', 'value': 'headache'}","{'xml:lang': 'en', 'type': 'literal', 'value':...",{'datatype': 'http://www.w3.org/2001/XMLSchema...,,


In [64]:
# extract data from dictionaries
df_migraine = df_migraine.apply(lambda x: x.apply(lambda y: y['value'] if type(y) == dict else y))

for index, row in df_migraine.iterrows():
    if isinstance(row['symptoms'], str):
        row['symptoms'] = row['symptoms'].split(' // ')


df_migraine

Unnamed: 0,disease,symptom_count,symptoms,diseaseLabel,count,gene_count,genes
Q21173555,http://www.wikidata.org/entity/Q21173555,4,"[vomiting, nausea, burping, photophobia]",Phenacyl chloride exposure,4,,
Q109466005,http://www.wikidata.org/entity/Q109466005,4,"[headache, vomiting, nausea, photophobia]",purulent meningitis,4,,
Q21396183,http://www.wikidata.org/entity/Q21396183,4,"[headache, vomiting, nausea, photophobia]",arsenic pentoxide exposure,4,,
Q21167678,http://www.wikidata.org/entity/Q21167678,4,"[headache, vomiting, nausea, photophobia]",allyl alcohol exposure,4,,
Q18558169,http://www.wikidata.org/entity/Q18558169,4,"[headache, vomiting, nausea, photophobia]",Venezuelan equine encephalitis,4,,
...,...,...,...,...,...,...,...
Q83319,http://www.wikidata.org/entity/Q83319,1,[headache],typhoid fever,1,,
Q47790,http://www.wikidata.org/entity/Q47790,1,[headache],tetanus,1,,
Q44727,http://www.wikidata.org/entity/Q44727,1,[headache],chickenpox,1,,
Q12125,http://www.wikidata.org/entity/Q12125,1,[headache],common cold,1,,


In [65]:
type(df_migraine["symptoms"])

pandas.core.series.Series

In [66]:
# get third dataframe with migraine data

sparql_migraine = SPARQLWrapper.SPARQLWrapper("https://query.wikidata.org/sparql")

query_migraine = '''
PREFIX target: <http://www.wikidata.org/entity/Q133823>

SELECT
  ?count
  ?treatment_count
  ?disease ?diseaseLabel
  ?treatments
{
  {
    SELECT ?disease (COUNT(?treatment) AS ?treatment_count) (GROUP_CONCAT(?treatment_label; separator=" // ") AS ?treatments) WHERE {
      target: wdt:P2176 ?treatment .
      ?disease wdt:P2176 ?treatment .
      FILTER (target: != ?disease)
      ?treatment rdfs:label ?treatment_label
      FILTER(lang(?treatment_label) = "en")
    }
    GROUP BY ?disease
  }

  # Aggregate count
  BIND((COALESCE(?treatment_count, 0)) AS ?count)

  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY DESC(?count)
'''

sparql_migraine.setQuery(query_migraine)
sparql_migraine.setReturnFormat(SPARQLWrapper.JSON)
results_migraine = sparql_migraine.query().convert()

reformatted_dict_migraine = {}

entities_migraine = []
result_list_migraine = results_migraine['results']['bindings']
for res in result_list_migraine:
    for res_key, res_value in res.items():
        if res_key == 'disease':
            uri = res_value['value']
            splitted_uri = uri.split('/')
            entity_id = splitted_uri[-1]
            entities_migraine.append(entity_id)
            reformatted_dict_migraine[entity_id] = res

df_migraine2 = pd.DataFrame.from_dict(reformatted_dict_migraine)
df_migraine2= df_migraine2.transpose()

In [67]:
# extract data from dictionaries
df_migraine2 = df_migraine2.apply(lambda x: x.apply(lambda y: y['value'] if type(y) == dict else y))

for index, row in df_migraine2.iterrows():
    row['treatments'] = row['treatments'].split(' // ')

In [68]:
df_migraine2

Unnamed: 0,disease,treatment_count,treatments,diseaseLabel,count
Q81938,http://www.wikidata.org/entity/Q81938,11,"[paracetamol, amitriptyline, ibuprofen, venlaf...",pain,11
Q55781316,http://www.wikidata.org/entity/Q55781316,8,"[gabapentin, (S)-duloxetine, pregabalin, ibupr...",osteoarthritis susceptibility 1,8
Q154430,http://www.wikidata.org/entity/Q154430,6,"[(S)-duloxetine, pregabalin, propranolol, venl...",anxiety,6
Q945238,http://www.wikidata.org/entity/Q945238,6,"[gabapentin, (S)-duloxetine, pregabalin, diclo...",peripheral neuropathy,6
Q41571,http://www.wikidata.org/entity/Q41571,5,"[pregabalin, topiramate, zonisamide, lamotrigi...",epilepsy,5
...,...,...,...,...,...
Q42844,http://www.wikidata.org/entity/Q42844,1,[(S)-duloxetine],major depressive disorder,1
Q56003062,http://www.wikidata.org/entity/Q56003062,1,[paracetamol],susceptibility to severe influenza,1
Q326921,http://www.wikidata.org/entity/Q326921,1,[paracetamol],concussion,1
Q12125,http://www.wikidata.org/entity/Q12125,1,[paracetamol],common cold,1


In [69]:
df_migraine_explode2=df_migraine2.explode('treatments')
series2=df_migraine_explode2.treatments.value_counts()
drugs_count=pd.DataFrame(series2)
import plotly.express as px
fig = px.histogram(drugs_count, y=drugs_count["count"], x=drugs_count.index)
fig.show()

In [70]:
import plotly.graph_objects as go

# Get the counts of the treatments
treatment_counts = df_migraine_explode2['treatments'].value_counts()

# Create the pie chart
fig = go.Figure(data=[go.Pie(labels=treatment_counts.index, 
                             values=treatment_counts.values,
                             hole=.3)])  # Creating a "donut" style pie chart

# Add title to the plot
fig.update_layout(title_text='Percentage share of treatments for Migraine and related diseases')

# Display the figure
fig.show()

In [71]:
df_migraine_explode=df_migraine.explode('symptoms')
series=df_migraine_explode.symptoms.value_counts()
symptoms_count=pd.DataFrame(series)

symptoms_count

Unnamed: 0_level_0,count
symptoms,Unnamed: 1_level_1
nausea,351
headache,340
vomiting,332
photophobia,34
burping,3


In [72]:
import plotly.express as px
fig = px.histogram(symptoms_count, y=symptoms_count["count"], x=symptoms_count.index)
fig.show()

In [73]:
import plotly.express as px

migraine_symptoms_count = df_migraine.symptom_count.value_counts()
fig = px.histogram(migraine_symptoms_count, y=migraine_symptoms_count.values, x=migraine_symptoms_count.index)

fig.update_layout(
title="Migraine related disease number of symptoms",  # Set the title
xaxis_title="Number of symptoms per related disease",  # Set the x-axis label
yaxis_title="Number of diseases with x amount of symptoms",  # Set the y-axis label
width=500)  # Adjust this value to your liking
fig.show()

In [74]:
migraine_symptoms_count

symptom_count
1    229
2    173
3    147
4     11
Name: count, dtype: int64

In [82]:
import plotly.graph_objs as go
import dash
from dash import dcc
from dash import html
import dash_table
from jupyter_dash import JupyterDash
import pandas as pd
import plotly.express as px

# Define the function to generate the histogram
def plot_histogram(df):
    
    drugs_count = pd.DataFrame(series2)
    fig = px.histogram(drugs_count, y=drugs_count["count"], x=drugs_count.index)
    return fig

# Define the function to generate the pie chart
def plot_pie(df):
    
    fig = go.Figure(data=[go.Pie(labels=treatment_counts.index, 
                                 values=treatment_counts.values,
                                 hole=.3)])  
    fig.update_layout(title_text='Percentage share of treatments for Migraine and related diseases')
    return fig


# Define the function to generate the third histogram
def plot_symptoms_count_histogram(symptoms_count):
    fig = px.histogram(symptoms_count, y=symptoms_count["count"], x=symptoms_count.index)
    fig.update_layout(
        title="Migraine related diseases symptoms",
        xaxis_title="Symptoms per related disease",
        yaxis_title="Number of diseases with symptom",
        width=500) 
    return fig


# Define the function to generate the pie chart for symptoms
def plot_symptoms_pie(symptoms_count):
    fig = go.Figure(data=[go.Pie(labels=symptoms_count.index, 
                                 values=symptoms_count["count"],
                                 hole=.3)])  
    fig.update_layout(title_text='Percentage share of symptoms for Migraine and related diseases')
    return fig

# Define the function to generate the second histogram
def plot_symptoms_histogram(df):
    
    fig = px.histogram(migraine_symptoms_count, y=migraine_symptoms_count.values, x=migraine_symptoms_count.index)
    fig.update_layout(
        title="Migraine related disease number of symptoms",
        xaxis_title="Number of symptoms per related disease",
        yaxis_title="Number of diseases with x amount of symptoms",
        width=500)
    return fig

# Create a dash application
app = JupyterDash(__name__)

# Define the layout
app.layout = html.Div(children=[
    html.H1(children='Migraine Analysis'),
    dcc.Graph(
        id='histogram-graph',
        figure=plot_histogram(drugs_count)
    ),
    dcc.Graph(
        id='pie-chart',
        figure=plot_pie(treatment_counts)
    ),
    dcc.Graph(
        id='symptoms-histogram',
        figure=plot_symptoms_histogram(migraine_symptoms_count)
    ),

    dcc.Graph(
        id='symptoms-count-histogram',
        figure=plot_symptoms_pie(symptoms_count)  # symptoms_count needs to be defined somewhere
    ),
])

# Run the app
app.run_server(mode='inline')



Dash is running on http://127.0.0.1:8050/



In [80]:
import plotly.express as px
fig = px.histogram(symptoms_count, y=symptoms_count["count"], x=symptoms_count.index)
fig.update_layout(
title="Migraine related diseases symptoms",  # Set the title
xaxis_title="Symptoms per related disease",  # Set the x-axis label
yaxis_title="Number of diseases with symptom",  # Set the y-axis label
width=500) 
fig.show()