In [1]:
# !apt-get update -qq > /dev/null
# !apt-get install openjdk-8-jdk-headless -qq > /dev/null
# !wget -q https://downloads.apache.org/spark/spark-2.4.8/spark-2.4.8-bin-hadoop2.7.tgz
# !tar xf spark-2.4.8-bin-hadoop2.7.tgz
# !pip install -q findspark

In [2]:
import os
os.environ["JAVA_HOME"] = "C:\Program Files\Java\jdk1.8.0_202"
os.environ["SPARK_HOME"] = "C:\spark-3.2.0-bin-hadoop3.2"

import findspark
findspark.init()
findspark.find()

'C:\\spark-3.2.0-bin-hadoop3.2'

In [3]:
from pyspark import SparkContext
sc = SparkContext(appName="YourTest", master="local[*]")
# sc = SparkContext("local", "test-app")

In [4]:
# from pyspark.sql import SQLContext
# sqlContext = SQLContext(sc)

from pyspark.sql import SparkSession
import random

spark = SparkSession.builder.appName("YourTest").master("local[2]").config('spark.ui.port', random.randrange(4000,5000)).getOrCreate()

## Importing necessary packages

In [5]:
# #installing dash
# !pip install dash
# !pip install jupyter-dash
# !pip install dash_bootstrap_components

In [6]:
### importing libraries
#json
# import jsonlines
#spark
from pyspark.sql.functions import explode,col
from pyspark.sql import functions as F
from pyspark.sql.functions import concat_ws
#plotly
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
# matplotlib and wordcloud
import matplotlib.pyplot as plt
from wordcloud import WordCloud
#dash
from jupyter_dash import JupyterDash
from dash import dcc
import dash_bootstrap_components as dbc
from dash import html
from dash.dependencies import Input, Output 
# numerical computing
import pandas as pd
import numpy as np

## Data upload

In [7]:
# from google.colab import files
# uploaded = files.upload()

In [8]:
data = spark.read.json("sample.jsonl").sort("year",ascending=False)
data.cache()

DataFrame[abstract: string, authors: array<struct<affiliation:string,affiliationid:bigint,country:string,gridid:string,id:bigint,name:string,order:bigint>>, citation_for_year: array<struct<citationcount:bigint,year:string>>, citationcount: bigint, conferenceseriesid: bigint, confname: string, confplace: string, confseries: string, confseriesname: string, countries: struct<albania:double,algeria:double,argentina:double,australia:double,austria:double,azerbaijan:double,bahrain:double,bangladesh:double,belarus:double,belgium:double,bermuda:double,bolivia:double,bosnia and herzegovina:double,botswana:double,brazil:double,british virgin islands:double,brunei:double,bulgaria:double,cambodia:double,cameroon:double,canada:double,chile:double,china:double,colombia:double,costa rica:double,croatia:double,cuba:double,cyprus:double,czechia:double,democratic republic of the congo:double,denmark:double,ecuador:double,egypt:double,estonia:double,ethiopia:double,fiji:double,finland:double,france:doubl

In [9]:
data.count()

50000

In [10]:
data.createOrReplaceTempView("all_data")
all_data_sql = spark.sql("select * from all_data")
all_data_sql.select("references").show(5)

+--------------------+
|          references|
+--------------------+
|                null|
|[2912017389, 3931...|
|[2550146718, 2008...|
|[2563412847, 2060...|
|[2798728990, 2004...|
+--------------------+
only showing top 5 rows



In [11]:
testing2 = all_data_sql.select("id",explode("topics").alias("topic"))
testing2.createOrReplaceTempView('topics')

In [12]:
testing3=spark.sql(
    """select topic,   
    count(id) over (partition by topic) as total_papers_topic from topics"""
)
testing3.distinct().sort("total_papers_topic", ascending= False).show(10,False)

+-----------------------+------------------+
|topic                  |total_papers_topic|
+-----------------------+------------------+
|computer science       |50000             |
|artificial intelligence|11815             |
|computer network       |5796              |
|computer vision        |4943              |
|machine learning       |3969              |
|data mining            |3566              |
|real time computing    |3529              |
|pattern recognition    |3450              |
|distributed computing  |3447              |
|electronic engineering |2593              |
+-----------------------+------------------+
only showing top 10 rows



## Preparing the data

In [32]:
# Top papers by topic
def top_papers(n=5):
    return main_topic_df.select("id","papertitle","authors","year","citationcount")\
        .orderBy("citationcount", ascending=False).limit(n)\
        .select("id","papertitle", explode("authors").alias("author"),"year","citationcount")\
        .select("id","papertitle","year","citationcount", F.col("author").getItem("name").alias("name"))\
        .groupBy(["id","papertitle","year","citationcount"]).agg(F.collect_list("name").alias("authors"))#.show()


# For timeline chart. number of citations. Each dot represents a paper
def timeline():
    df = main_topic_df.select("id","year","citationcount")\
        .orderBy("year").toPandas()
    fig = px.scatter(df,x="year",y="citationcount", opacity=0.5)
    return fig

#explode authors
def exploded_authors():
    global exploded_authors_topic
    exploded_authors_topic = (main_topic_df.select("id",explode("authors").alias("author"),"citationcount")\
                            .select("id", \
                                F.col("author").getItem("country").alias("country"), \
                                F.col("author").getItem("name").alias("name"), \
                                F.col("author").getItem("id").alias("author_id"), \
                                F.col("author").getItem("affiliation").alias("affiliation"), \
                                "citationcount").cache())
    return exploded_authors_topic

# countries of institutions
def countries():
    df = exploded_authors_topic.select("id","country","citationcount").distinct()\
            .groupBy("country").agg(F.sum("citationcount").alias("citations")).toPandas()
    map_data = px.data.gapminder().query("year==2007")[["iso_alpha","country","continent"]]
    # df
    map_data = df[1:].merge(map_data,how="left").dropna()
    fig = px.choropleth(map_data, locations="iso_alpha",color="citations",
                     hover_name="country",color_continuous_scale=px.colors.sequential.Plasma)
    return fig


#Top authors for a topic
def top_authors(n=5):
    global top_authors_df
    top_authors_df = exploded_authors_topic.select("name","author_id","citationcount")\
        .groupBy(["name","author_id"]).agg(F.sum("citationcount").alias("citationcount"))\
        .filter("name is not null")\
        .orderBy("citationcount", ascending=False).limit(n).cache()
    return top_authors_df

    
#Top institutions for a topic
def top_institutions(n=5):
    global top_institutes_df
    top_institutes_df = exploded_authors_topic.select("id","affiliation","citationcount").distinct()\
        .groupBy(["affiliation"]).agg(F.sum("citationcount").alias("citationcount"))\
        .filter("affiliation is not null")\
        .orderBy("citationcount", ascending=False).limit(n)
    return top_institutes_df


def by_type():
    df = main_topic_df.select("id","type").distinct()\
        .filter("type is not null")\
        .groupBy("type").agg(F.count("id").alias("number of papers")).toPandas()
    return px.pie(df,names="type",values="number of papers")#.show()

    
def main_topic(topic_input, year_input):
    global main_topic_df
    main_topic_df = (all_data_sql.filter(F.array_contains("topics",topic_input.lower()))
             .filter(F.year("year").isin(list(range(int(year_input[0]), int(year_input[1])+1))))
             .select("id","type","papertitle","authors",F.year("year").alias("year"),"citationcount"))
    return main_topic_df




In [33]:
# top_author, top_institutes, top_paper, timeline_data, type_data, countries_data = main_topic(topic_input, year_input)

In [34]:
# top_institutes.toPandas()

## Filters

In [35]:
def distinct_topics():
    return spark.sql("select distinct topic from topics")

topic_names = [topic.title() for topic in distinct_topics().toPandas()["topic"].tolist()]
year_max = data.agg({"year":"max"}).toPandas()["max(year)"][0].split('-')[0]
year_min = data.agg({"year":"min"}).toPandas()["min(year)"][0].split('-')[0]

## MAIN DASHBOARD

In [36]:
app = JupyterDash(external_stylesheets=[dbc.themes.DARKLY])

app.layout = html.Div([ 
          dbc.Container([
             dbc.Row([
                  dbc.Col(
                          html.H3(children = "Exploring Research in CS with different Topics", 
                                  style = {'color':'#3838ff'},
                                  className="text-center mt-4 mb-5"), align="center", className="mt4")       
             ]),
            dbc.Row([
                  dbc.Col(
                      html.Div(children = [
                          html.H5("Topic", className="text-center mt-4 mb-5",
                                     style = {'width': '20vw'}),
                          dcc.Dropdown(id = "topic-select",
                                    options = [{'label': i, 'value': i} for i in topic_names],
                                    value = "Artificial Intelligence",
                                    style = {'width': '20vw','color':'black', }
                                    )], ),
                      align="center",
                      width = 2),
                dbc.Col(width=1),
                 dbc.Col(
                      html.Div(children = [
                          html.H5("Year Range", className="text-center mt-4 mb-5",
                                     style = {'width': '40vw','display': 'inline-block', 'padding':1}
                                    ),
                          dcc.RangeSlider(id = "year-select",
                                         min = int(year_min),
                                         max = int(year_max),
                                         step = 1,
                                         value = [2010, 2015],
                                         tooltip={"placement": "bottom", "always_visible": True},
#                                          style = {'width': '60vw','display': 'inline-block', 'padding':10}
                                         )
                      ]),
                    align="center",
                      width = 5)], 
                   justify = "center"),
              html.Br(),
              html.Br(),
              dbc.Row([
                  dbc.Col([
                          dcc.Graph(id="papers-table")
                      ], align = "center")
                  
              ]),
              html.Br(),
              html.Br(),
              dbc.Row([
                  dbc.Col(
                      html.Div([dcc.Graph(id = "scatter-plot")]),
                      width=7, align="center"),
                  dbc.Col(
                      html.Div([dcc.Graph(id = "pie-chart")]),
                      width=5, align="center"),
              ]),
              html.Br(),
              html.Br(),
              dbc.Row([
                  dbc.Col(
                     [ dcc.Graph(id="choropleth-chart")],
                       align = "center")
              ]),
              html.Br(),
              html.Br(),
              dbc.Row([
                  html.H5(children=["Top 5 Authors in the Selected Field"],style={'textAlign': 'center'}),
                  dbc.Col([
                       dbc.Card(dbc.CardBody(
                           [html.H4(id = "author-1",
                                        style={'textAlign': 'center','color': '#0000db','fontSize': 25,'height':"4vw"}),
                           html.H6(id = "author-1-citations",
                                        style={'textAlign': 'center','color': '#0000db','fontSize': 15,'height':"1vw"})])       
                               ) 
                      ]),
                  dbc.Col([
                       dbc.Card(dbc.CardBody(
                           [html.H4(id = "author-2",
                                        style={'textAlign': 'center','color': '#3838ff','fontSize': 25,'height':"4vw"}),
                           html.H6(id = "author-2-citations",
                                        style={'textAlign': 'center','color': '#3838ff','fontSize': 15,'height':"1vw"})])       
                               ) 
                      ]),
                  dbc.Col([
                       dbc.Card(dbc.CardBody(
                           [html.H4(id = "author-3",
                                        style={'textAlign': 'center','color': '#8585ff','fontSize': 25,'height':"4vw"}),
                           html.H6(id = "author-3-citations",
                                        style={'textAlign': 'center','color': '#8585ff','fontSize': 15,'height':"1vw"})])       
                               ) 
                      ]),
                  dbc.Col([
                       dbc.Card(dbc.CardBody(
                           [html.H4(id = "author-4",
                                        style={'textAlign': 'center','color': '#abbdf2','fontSize': 25,'height':"4vw"}),
                           html.H6(id = "author-4-citations",
                                        style={'textAlign': 'center','color': '#abbdf2','fontSize': 15,'height':"1vw"})])       
                               ) 
                      ]),
                  dbc.Col([
                       dbc.Card(dbc.CardBody(
                           [html.H4(id = "author-5",
                                        style={'textAlign': 'center','color': "#e0e0ff",'fontSize': 25,'height':"4vw"}),
                           html.H6(id = "author-5-citations",
                                        style={'textAlign': 'center','color': '#e0e0ff','fontSize': 15,'height':"1vw"})])       
                               ) 
                      ]),
              ]),
              html.Br(),
              html.Br(),
              dbc.Row([
                  html.H5(children=["Top 5 Institutions with Research in the Selected Field"],style={'textAlign': 'center'}),
                  dbc.Col([
                       dbc.Card(dbc.CardBody(
                           [html.H4(id = "institute-1",
                                        style={'textAlign': 'center','color': '#da9100','fontSize': 25,'height':"5vw"}),
                           html.H6(id = "institute-1-citations",
                                        style={'textAlign': 'center','color': '#da9100','fontSize': 15,'height':"1vw"})])       
                               ) 
                      ]),
                  dbc.Col([
                       dbc.Card(dbc.CardBody(
                           [html.H4(id = "institute-2",
                                        style={'textAlign': 'center','color': '#fcc200','fontSize': 25,'height':"5vw"}),
                           html.H6(id = "institute-2-citations",
                                        style={'textAlign': 'center','color': '#fcc200','fontSize': 15,'height':"1vw"})])       
                               ) 
                      ]),
                  dbc.Col([
                       dbc.Card(dbc.CardBody(
                           [html.H4(id = "institute-3",
                                        style={'textAlign': 'center','color': '#ffe347','fontSize': 25,'height':"5vw"}),
                           html.H6(id = "institute-3-citations",
                                        style={'textAlign': 'center','color': '#ffe347','fontSize': 15,'height':"1vw"})])       
                               ) 
                      ]),
                  dbc.Col([
                       dbc.Card(dbc.CardBody(
                           [html.H4(id = "institute-4",
                                        style={'textAlign': 'center','color': '#ffeb7a','fontSize': 25,'height':"5vw"}),
                           html.H6(id = "institute-4-citations",
                                        style={'textAlign': 'center','color': '#ffeb7a','fontSize': 15,'height':"1vw"})])       
                               ) 
                      ]),
                  dbc.Col([
                       dbc.Card(dbc.CardBody(
                           [html.H4(id = "institute-5",
                                        style={'textAlign': 'center','color': "#eee8aa",'fontSize': 25,'height':"5vw"}),
                           html.H6(id = "institute-5-citations",
                                        style={'textAlign': 'center','color': '#eee8aa','fontSize': 15,'height':"1vw"})])       
                               ) 
                      ]),
              ]),
              html.Br()

          ])
])

@app.callback(Output(component_id="papers-table", component_property="figure"),
               [Input(component_id="topic-select", component_property="value"),
                Input(component_id="year-select", component_property="value")])

def update_table(topic_input,year_input):
#     top_author, top_institutes, top_paper, timeline_data, type_data, countries_data = main_topic(topic_input, year_input)
    global main_topic_df
    main_topic_df = main_topic(topic_input, year_input).cache()
    
    df = top_papers(10).toPandas()

    fig = go.Figure(data=[go.Table(
          header=dict(values=["Title","Year of Publication","Citation Count","authors"],
                      fill_color='black',
                      align='left',
                      font=dict(color="white", size = 14)),
          cells=dict(values=[df.papertitle.apply(lambda x : x.capitalize()),df.year,df.citationcount,
                            df.authors.apply(lambda x: ', '.join(map(str, x))).apply(lambda x : x.title())],
                    fill_color='#d1d1ff', font=dict(color='black', size=14),
                    align='left'))
        ])
    title = f"Top Papers in {topic_input} from {year_input[0]} to {year_input[1]}"
    fig.layout.template = "plotly_dark"
    fig.update_layout(plot_bgcolor='rgba(0, 0, 0, 0)',paper_bgcolor='rgba(0, 0, 0, 0)',
                      title = title,title_x =0.5, margin=dict(l=20, r=20, t=30, b=20))
    return fig


## scatter plot
@app.callback(Output(component_id="scatter-plot", component_property="figure"),
               [Input(component_id="topic-select", component_property="value"),
                Input(component_id="year-select", component_property="value")])

def update_scatter(topic_input,year_input):
    df = main_topic_df.select("id","year","citationcount")\
        .orderBy("year").toPandas()
    title = f"Citation Count per Year for Papers in {topic_input}"
    fig = px.scatter(df,x="year",y="citationcount", opacity=0.5, title = title)
    fig.layout.template = "plotly_dark"
    fig.update_layout(plot_bgcolor='rgba(0, 0, 0, 0)',paper_bgcolor='rgba(0, 0, 0, 0)', title_x =0.5,
                     margin=dict(l=20, r=20, t=30, b=20))
    return fig

## pie chart
@app.callback(Output(component_id="pie-chart", component_property="figure"),
               [Input(component_id="topic-select", component_property="value"),
                Input(component_id="year-select", component_property="value")])

def update_pie(topic_input,year_input):
    df = main_topic_df.select("id","type").distinct()\
        .filter("type is not null")\
        .groupBy("type").agg(F.count("id").alias("number of papers")).toPandas()
    title = f"Types of Papers under the topic: {topic_input}"
    fig = px.pie(df,names="type",values="number of papers",
                hole = 0.7, title = title
                                     )
    fig.update_traces( title_font = dict(size=25,family='Verdana', 
                                        color='white'),
                       hoverinfo='label+percent',
                       textinfo='percent', textfont_size=12)
    
    fig.layout.template = "plotly_dark"
    fig.update_layout(plot_bgcolor='rgba(0, 0, 0, 0)',paper_bgcolor='rgba(0, 0, 0, 0)', title_x =0.5,
                     margin=dict(l=20, r=20, t=30, b=20))
    return fig

## choropleth chart
@app.callback(Output(component_id="choropleth-chart", component_property="figure"),
               [Input(component_id="topic-select", component_property="value"),
                Input(component_id="year-select", component_property="value")])

def display_choropleth(topic_input,year_input):
    exploded_authors_df = exploded_authors()
    fig = countries()
    title = f"Country wise Contribution in {topic_input.upper()} from {year_input[0]} to {year_input[1]}"
    fig.layout.template = "plotly_dark"
    fig.update_layout(plot_bgcolor='rgba(0, 0, 0, 0)',paper_bgcolor='rgba(0, 0, 0, 0)', title=title, title_x =0.5,
                     margin=dict(l=20, r=20, t=30, b=20))
    return fig

    
## author cards
@app.callback([Output(component_id="author-1", component_property="children"),
               Output(component_id="author-1-citations", component_property="children"),
               Output(component_id="author-2", component_property="children"),
               Output(component_id="author-2-citations", component_property="children"),
               Output(component_id="author-3", component_property="children"),
               Output(component_id="author-3-citations", component_property="children"),
               Output(component_id="author-4", component_property="children"),
               Output(component_id="author-4-citations", component_property="children"),
               Output(component_id="author-5", component_property="children"),
               Output(component_id="author-5-citations", component_property="children")
              ],
               [Input(component_id="topic-select", component_property="value"),
                Input(component_id="year-select", component_property="value")])
def update_author(topic_input,year_input):
    main_topic_df = main_topic(topic_input,year_input)
    exploded_authors_df = exploded_authors()
    top_authors_data = top_authors(5).toPandas()
    output = []
    for i in range(top_authors_data.shape[0]):
        output.append(top_authors_data['name'][i].title())
        output.append(f"Citation Count: {top_authors_data['citationcount'][i]}")
        
    if len(output) == 10:
        return output
    else:
        remaining_len = 10 - len(output)
        for i in range(remaining_len):
            output.append("NA")
        return output

    
# institution cards
@app.callback([Output(component_id="institute-1", component_property="children"),
               Output(component_id="institute-1-citations", component_property="children"),
               Output(component_id="institute-2", component_property="children"),
               Output(component_id="institute-2-citations", component_property="children"),
               Output(component_id="institute-3", component_property="children"),
               Output(component_id="institute-3-citations", component_property="children"),
               Output(component_id="institute-4", component_property="children"),
               Output(component_id="institute-4-citations", component_property="children"),
               Output(component_id="institute-5", component_property="children"),
               Output(component_id="institute-5-citations", component_property="children")
              ],
               [Input(component_id="topic-select", component_property="value"),
                Input(component_id="year-select", component_property="value")])
def update_institute(topic_input,year_input):
    main_topic_df = main_topic(topic_input,year_input)
    exploded_authors_df = exploded_authors()
    top_institutes_data = top_institutions(5).toPandas()
    output = []
    for i in range(top_institutes_data.shape[0]):
        output.append(top_institutes_data['affiliation'][i].title())
        output.append(f"Citation Count: {top_institutes_data['citationcount'][i]}")
        
    if len(output) == 10:
        return output
    else:
        remaining_len = 10 - len(output)
        for i in range(remaining_len):
            output.append("NA")
        return output



    
app.run_server(mode="external",port="8081", debug=True)

Dash app running on http://127.0.0.1:8081/


In [None]:

# ## author cards
# @app.callback([Output(component_id="author-1", component_property="children"),
#                Output(component_id="author-1-citations", component_property="children")],
#                [Input(component_id="topic-select", component_property="value"),
#                 Input(component_id="year-select", component_property="value")])
# def update_author(topic_input,year_input):
#     global top_authors_df
#     top_authors_df = top_authors().toPandas()
#     return [top_authors_df['name'][0].title(), f"Citation Count: {top_authors_df['citationcount'][0]}"]


# @app.callback([Output(component_id="author-2", component_property="children"),
#                Output(component_id="author-2-citations", component_property="children")],
#                [Input(component_id="topic-select", component_property="value"),
#                 Input(component_id="year-select", component_property="value")])
# def update_author(topic_input,year_input):
# #     top_authors_df = top_authors().toPandas() 
#     return [top_authors_df['name'][1].title(), f"Citation Count: {top_authors_df['citationcount'][1]}"]
    

# @app.callback([Output(component_id="author-3", component_property="children"),
#                Output(component_id="author-3-citations", component_property="children")],
#                [Input(component_id="topic-select", component_property="value"),
#                 Input(component_id="year-select", component_property="value")])
# def update_author(topic_input,year_input):
#     return [top_authors_df['name'][2].title(), f"Citation Count: {top_authors_df['citationcount'][2]}"]

# @app.callback([Output(component_id="author-4", component_property="children"),
#                Output(component_id="author-4-citations", component_property="children")],
#                [Input(component_id="topic-select", component_property="value"),
#                 Input(component_id="year-select", component_property="value")])
# def update_author(topic_input,year_input):
#     return [top_authors_df['name'][3].title(), f"Citation Count: {top_authors_df['citationcount'][3]}"]

# @app.callback([Output(component_id="author-5", component_property="children"),
#                Output(component_id="author-5-citations", component_property="children")],
#                [Input(component_id="topic-select", component_property="value"),
#                 Input(component_id="year-select", component_property="value")])
# def update_author(topic_input,year_input):
#     return [top_authors_df['name'][4].title(), f"Citation Count: {top_authors_df['citationcount'][4]}"]

   
# ## institution cards
# @app.callback([Output(component_id="institute-1", component_property="children"),
#                Output(component_id="institute-1-citations", component_property="children")],
#                [Input(component_id="topic-select", component_property="value"),
#                 Input(component_id="year-select", component_property="value")])
# def update_institute(topic_input,year_input):
#     global top_institutes_df
#     top_institutes_df = exploded_authors_topic.select("id","affiliation","citationcount").distinct()\
#                         .groupBy(["affiliation"]).agg(F.sum("citationcount").alias("citationcount"))\
#                         .filter("affiliation is not null")\
#                         .orderBy("citationcount", ascending=False).limit(5).toPandas()
# #     top_institutes_df = top_institutes.toPandas()
#     return [top_institutes_df['affiliation'][0].title(), f"Citation Count: {top_institutes_df['citationcount'][0]}"]


# @app.callback([Output(component_id="institute-2", component_property="children"),
#                Output(component_id="institute-2-citations", component_property="children")],
#                [Input(component_id="topic-select", component_property="value"),
#                 Input(component_id="year-select", component_property="value")])
# def update_institute(topic_input,year_input):
#     return [top_institutes_df['affiliation'][1].title(), f"Citation Count: {top_institutes_df['citationcount'][1]}"]

# @app.callback([Output(component_id="institute-3", component_property="children"),
#                Output(component_id="institute-3-citations", component_property="children")],
#                [Input(component_id="topic-select", component_property="value"),
#                 Input(component_id="year-select", component_property="value")])
# def update_institute(topic_input,year_input):
#     return [top_institutes_df['affiliation'][2].title(), f"Citation Count: {top_institutes_df['citationcount'][2]}"]


# @app.callback([Output(component_id="institute-4", component_property="children"),
#                Output(component_id="institute-4-citations", component_property="children")],
#                [Input(component_id="topic-select", component_property="value"),
#                 Input(component_id="year-select", component_property="value")])
# def update_institute(topic_input,year_input):
#     return [top_institutes_df['affiliation'][3].title(), f"Citation Count: {top_institutes_df['citationcount'][3]}"]

# @app.callback([Output(component_id="institute-5", component_property="children"),
#                Output(component_id="institute-5-citations", component_property="children")],
#                [Input(component_id="topic-select", component_property="value"),
#                 Input(component_id="year-select", component_property="value")])
# def update_institute(topic_input,year_input):
#     return [top_institutes_df['affiliation'][4].title(), f"Citation Count: {top_institutes_df['citationcount'][4]}"]