# Import required packages


In [13]:
import pandas as pd

In [3]:
!pip install SPARQLWrapper



In [4]:
import SPARQLWrapper

In [6]:
from SPARQLWrapper import SPARQLWrapper, JSON
import plotly.express as px

sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

In [9]:
def co_occuring_topics():
    sparql_query = '''
    PREFIX target: <https://www.wikidata.org/wiki/Q64513386>
    
    SELECT ?count (CONCAT("/topics/{{ q }},", SUBSTR(STR(?topic), 32)) AS ?countUrl)
           ?topic ?topicLabel (CONCAT("/topic/", SUBSTR(STR(?topic), 32)) AS ?topicUrl)
           ?example_work ?example_workLabel (CONCAT("/work/", SUBSTR(STR(?example_work), 32)) AS ?example_workUrl)
    WITH {
      SELECT (COUNT(?work) AS ?count) ?topic (SAMPLE(?work) AS ?example_work) WHERE {
        # Find works for the specific queried topic
          ?work wdt:P921/( wdt:P31*/wdt:P279* | wdt:P361+ | wdt:P1269+) target: .

        # Find co-occuring topics
        ?work wdt:P921 ?topic .

        # Avoid listing the queried topic
          FILTER (target: != ?topic)
      }
      GROUP BY ?topic
    } AS %result
    WHERE {
      # Label the results
      INCLUDE %result
      SERVICE wikibase:label { bd:serviceParam wikibase:language "en,da,de,es,fr,jp,nl,no,ru,sv,zh" . }
    }
    ORDER BY DESC(?count)
    '''
    
    # Set the query and format to JSON
    sparql.setQuery(sparql_query)
    sparql.setReturnFormat(JSON)

    # Execute the query and convert the results to a Pandas DataFrame
    results = sparql.query().convert()
    df = pd.json_normalize(results["results"]["bindings"])
    df["count.value"] = pd.to_numeric(df["count.value"])

    # filter the most relevant results
    df_filtered = df[df['count.value'] > 100]

    if not df_filtered.empty:
        fig = px.bar(df_filtered, y='topicLabel.value', x='count.value').update_layout(
            title='Publication Count by Co-Occurring Topics',
            xaxis=dict(title='Count'),
            yaxis=dict(title='Topic'))
        return fig

In [10]:
import plotly.graph_objects as go


def create_map():

    query = '''
    PREFIX target: <https://www.wikidata.org/wiki/Q64513386>

    SELECT
      ?location ?locationLabel
      ?geo
      ?example_work ?example_workLabel
      ?latitude ?longitude
    WITH {
      SELECT
        ?location ?geo ?latitude ?longitude
        (SAMPLE(?work) AS ?example_work)
      WHERE {
        # Find works that are marked with the main subject of the topic.
        ?work wdt:P921 / ( wdt:P31*/wdt:P279* | wdt:P361+ | wdt:P1269+ ) target: .

        # Identify co-occurring topics that are geo-locatable.
        ?work wdt:P921 ?location .
        ?location wdt:P625 ?geo .
        BIND(xsd:float(STRAFTER(str(?geo), "Point(")) AS ?latitude) .
        BIND(xsd:float(STRAFTER(str(?geo), " ")) AS ?longitude) .
      }
      GROUP BY ?location ?geo ?latitude ?longitude
    } AS %results
    WHERE {
      INCLUDE %results

      # Label the results
      SERVICE wikibase:label {
        bd:serviceParam wikibase:language "en,da,de,es,fr,jp,nl,no,ru,sv,zh".
      }
    }
    '''

    # Set the query and format to JSON
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)

    # Execute the query and convert the results to a Pandas DataFrame
    results = sparql.query().convert()
    df = pd.json_normalize(results["results"]["bindings"])

    # Extract the latitude and longitude values using regular expressions
    df["lat"] = df["geo.value"].str.extract(r"[\d.-]+\s+([\d.-]+)")
    df["lon"] = df["geo.value"].str.extract(r"([\d.-]+)\s+[\d.-]+")

    # Convert the extracted values to numeric
    df[["lat", "lon"]] = df[["lat", "lon"]].apply(pd.to_numeric)


     # Create the map figure using Plotly
    fig = go.Figure(
        go.Scattermapbox(
            lat=df["lat"],
            lon=df["lon"],
            mode="markers",
            marker=dict(size=5, color="blue"),
            text=df["locationLabel.value"],
            hoverinfo="text"
        )
    )

    fig.update_layout(
        mapbox=dict(
            accesstoken="pk.eyJ1IjoibHNjaHJldHQiLCJhIjoiY2xpaXRmMjUyMDFqODNjbHI1MG1ycnZndyJ9.TXJ8UKEEkreBV1QyPnbnqA",  # Replace with your Mapbox access token
            center=dict(lat=0, lon=0),  # Set the initial center of the map
            zoom=0.5,  # Set the initial zoom level
        ),
        title="Map Visualization",
    )

    return fig

In [13]:
def publications_per_year(disease):
    # Define SPARQL endpoint
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

    # Define SPARQL query
    query = f"""
    PREFIX target: <http://www.wikidata.org/entity/{disease}>

    # Inspired from LEGOLAS - http://abel.lis.illinois.edu/legolas/
    # Shubhanshu Mishra, Vetle Torvik
    select ?year (count(?work) as ?number_of_publications) where {{
      {{
        select (str(?year_) as ?year) (0 as ?pages) where {{
          # default values = 0
          ?year_item wdt:P31 wd:Q577 .
          ?year_item wdt:P585 ?date .
          bind(year(?date) as ?year_)
          {{
            select (min(?year_) as ?earliest_year) where {{
              {{ ?work wdt:P921/wdt:P31*/wdt:P279* target: . }}
              union {{ ?work wdt:P921/wdt:P361+ target: . }}
              union {{ ?work wdt:P921/wdt:P1269+ target: . }}
              ?work wdt:P577 ?publication_date .
              bind(year(?publication_date) as ?year_)
            }}
          }}
          bind(year(now()) as ?next_year)
          filter (?year_ >= ?earliest_year && ?year_ <= ?next_year)
        }}
      }}
      union {{
        select ?work (min(?years) as ?year) where {{
          {{ ?work wdt:P921/wdt:P31*/wdt:P279* target: . }}
          union {{ ?work wdt:P921/wdt:P361+ target: . }}
          union {{ ?work wdt:P921/wdt:P1269+ target: . }}
          ?work wdt:P577 ?dates .
          bind(str(year(?dates)) as ?years) .
        }}
        group by ?work
      }}
    }}
    group by ?year
    order by ?year
    """

    # Set the query and format to JSON
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)

    # Execute the query and convert the results to a Pandas DataFrame
    results = sparql.query().convert()
    df = pd.json_normalize(results["results"]["bindings"])
    df["year.value"] = pd.to_numeric(df["year.value"])
    df["number_of_publications.value"] = pd.to_numeric(df["number_of_publications.value"])

    # Create the bar chart using Plotly
    fig = px.bar(df, x="year.value", y="number_of_publications.value").update_layout(
    title='Publications per year',
    yaxis=dict(title='Count'),
    xaxis=dict(title='Year'))

    return fig

# publications_per_year('Q64513386')

In [None]:
!pip install jupyter-dash

In [None]:
import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output

# Create the Dash app
app = dash.Dash(__name__)

# Define the layout of the app
app.layout = html.Div(
    children=[
        html.H1("Bulimia Nervosa"),
        html.Div(
            children=[
                dcc.Graph(id="co-occurring-topics"),
                dcc.Graph(id="co-occurring-topics-map"),
                dcc.Graph(id="publications-per-year"),
            ],
            style={"display": "flex", "justify-content": "space-between"},
        ),
    ]
)

# Callback to update the co-occurring topics graph on page load
@app.callback(Output("co-occurring-topics", "figure"), [])
def update_co_occuring_topics():
    # Your co-occurring topics code here
    fig = co_occuring_topics()
    return fig

# Callback to update the co-occurring topics graph on page load
@app.callback(Output("co-occurring-topics-map", "figure"), [])
def update_co_occuring_topics_map():
    # Your co-occurring topics code here
    fig = create_map()
    return fig

# Callback to update the publications per year graph on page load
@app.callback(Output("publications-per-year", "figure"), [])
def update_publications_per_year():
    fig = publications_per_year('Q64513386')
    return fig

# Run the app
if __name__ == "__main__":
    app.run_server(app.run_server(port=8080), debug=True)