### **API SADVR - Portrait statistique:** Expertises de recherche  
https://www.cen.umontreal.ca/espacedoc/sadvr/

Ce NoteBook est destiné à l'extraction et la visualisation de statistiques relatives aux expertises de recherche de l'UdeM à partir de l'API de la vitrine de la recherche (SADVR). 
Celles-ci seront intégrées dans un tableau de bord permettant d'avoir un portrait d'ensemble des données.  

---

In [1]:
from dash import Dash, html, dcc, dash_table
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
import pandas as pd
import dash_bootstrap_components as dbc
from utils.SADVR_utils import *


app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Éléments de visuel
shadow = '2px 2px 6px lightgrey'

# Commennter à partir d'ici
### Charger les données
# data = updateInfoProfs()

# expertises = data[['idsadvr', 'affiliations', 'etablissementsAffilies', 'expertise']]

# toNormalize = [
#     'affiliations',
#     'etablissementsAffilies',
#     'expertise',
#     'expertise.secteursRecherche',
#     'expertise.disciplines',
#     'expertise.pays',
#     'expertise.continents',
#     'expertise.periodesChronologiques'
# ]

# for c in toNormalize:
#     expertises = explodeNormalize(expertises, c)

# drop = [
#     'affiliations.courrielInstitutionnel', 
#     'affiliations.immeuble', 
#     'affiliations.fonction.codeSad', 
#     'affiliations.fonction.nom', 
#     'affiliations.local', 
#     'affiliations.exclusion', 
#     'affiliations.exclusionTel',
#     'affiliations.uniteAdministrative.codeSad',
#     'affiliations.uniteAdministrative.nom',
#     'affiliations.telephone.numero',
#     'affiliations.telephone.poste',
#     'expertise.phraseCle'
# ]

# expertises = expertises.drop(columns=drop)
## Fin zone commentée

#### Construire les tables et les figures à inclure dans le board
# Nombre de professeurs par faculté
facultes = pd.DataFrame(plotVariable(expertises, 'affiliations.faculte.nom'))[:-2].sort_values(by='count', ascending=False)

# Table 
tableFacultes = dash_table.DataTable(
    data = facultes.to_dict('records'), 
    columns = [{"name": i, "id": i} for i in facultes.columns],
    style_data={
        'whiteSpace': 'normal',
        'height' : 'auto',
    },

    style_table = {
        'height': '450px',
        'overflowY': 'scroll',
    },

    style_cell={
        'textAlign': 'left',
        'fontFamily': 'Calibri'} # left align text in columns for readability
)


# Secteurs
facultes['noms'] = facultes['labels'].apply(lambda x: (str(x)[:25] + "..."))
figFacultes = px.pie(
    facultes, 
    values= 'count',
    labels = 'labels', 
    names= 'noms', 
    title='Nombre de professeur-e-s par faculté',
    hover_name = "labels",
    width = 800,
    hole=0.6 
)

figFacultes = figFacultes.update_traces(
    textposition='inside')

figFacultes = figFacultes.update_layout(
    uniformtext_minsize=12, 
    uniformtext_mode='hide',
    legend=dict(font=dict(size= 10)),
    margin = dict(l=20)    
)

# Nombre de professeurs par établissement affilié
# Table
def groupEtablissements(etablissementNom : str) -> str : 
   return etablissementNom.split(' – ')[0]

# Etablissements affiliés
etablissementsAffilies = expertises.dropna(subset='etablissementsAffilies.nom')
etablissementsAffilies = etablissementsAffilies.drop_duplicates(subset=(['idsadvr', 'etablissementsAffilies.nom']))
etablissementsAffilies.loc[:, 'etablissementsAffilies.nom'] = etablissementsAffilies['etablissementsAffilies.nom'].apply(lambda x : x.split(' – ')[0])
etablissementsAffilies = pd.DataFrame(plotVariable(etablissementsAffilies, 'etablissementsAffilies.nom')).sort_values(by='count', ascending = False)
etablissementsAffilies = etablissementsAffilies.rename(columns={'labels':'Établissement', 'count':'N'})

tableEtablissements = dash_table.DataTable(
    data = etablissementsAffilies.to_dict('records'), 
    columns = [{"name": i, "id": i} for i in etablissementsAffilies.columns],
    style_data={
        'whiteSpace': 'normal',
        'height' : 'auto',
    },

    style_table = {
        'height': '450px',
        'overflowY': 'scroll',
    },

    style_cell={
        'textAlign': 'left',
        'fontFamily': 'Calibri'} # left align text in columns for readability
)

# Secteurs
etablissementsAffilies['noms'] = etablissementsAffilies['Établissement'].apply(lambda x: (str(x)[:25] + "..."))
figEtablissementsAffilies = px.pie(
    etablissementsAffilies,
    labels = etablissementsAffilies['Établissement'],
    names = etablissementsAffilies['noms'],
    values = etablissementsAffilies['N'],
    title = 'Nombre de professeurs par établissement affilié',
    hover_name = "Établissement",
    width = 800,
    hole = 0.6
)

figEtablissementsAffilies = figEtablissementsAffilies.update_traces(
    textposition='inside')

figEtablissementsAffilies = figEtablissementsAffilies.update_layout(
    uniformtext_minsize=12, 
    uniformtext_mode='hide',
    legend=dict(font=dict(size= 10)),
    margin=dict(l=20)    
)

# Principales disicplines de recherche à l'UdeM (Top 30)
disciplines = expertises[[
    'idsadvr', 'expertise.disciplines.nom', 
    'expertise.disciplines.uid', 'expertise.disciplines.codeLangue'
]].dropna(subset='expertise.disciplines.uid').drop_duplicates()

disciplines = disciplines.groupby(['expertise.disciplines.nom', 'expertise.disciplines.uid', 
                    'expertise.disciplines.codeLangue'])['idsadvr'].count().reset_index().rename(columns={'idsadvr': 'count'})

disciplines = disciplines.sort_values(by=['expertise.disciplines.uid', 'expertise.disciplines.codeLangue'], ascending=[True, False])
disciplines = disciplines.drop_duplicates(subset='expertise.disciplines.uid', keep='first').sort_values(by='count', ascending=False)

disciplines = disciplines[['expertise.disciplines.nom', 'count']]

disciplines = groupOtherValues(disciplines, 30)[:30]

figDisciplines = go.Figure(
    go.Treemap(
        labels= disciplines['expertise.disciplines.nom'],
        parents= [''] * len(disciplines),
        values = disciplines['count'],
    )
)

figDisciplines = figDisciplines.update_layout(
    title_text="Principales disciplines de recherche à l'UdeM (Top 30)",
    height = 600,
    margin = dict(t=25, l=50, b=50)
)


# Dropdown: principales disciplines de recherche par faculté
disciplines = expertises[
    ['idsadvr', 'affiliations.faculte.nom', 'expertise.disciplines.uid']
].dropna(subset='expertise.disciplines.uid').drop_duplicates()

disciplines['expertise.disciplines.nom'] = disciplines['expertise.disciplines.uid'].astype(str).map(mappingDisciplines)
disciplines = disciplines.drop(columns='expertise.disciplines.uid')

mappingDisciplines = pd.read_csv('tables/SADVR_disciplines.csv')
mappingDisciplines = mappingDisciplines[mappingDisciplines['noms.codeLangue'] == 'fre']
mappingDisciplines = {str(x['id']): x['noms.nom'] for x in mappingDisciplines.to_dict('records')}


# Group by 'faculty' and 'discipline' and count the number of professors in each group
faculty_discipline_counts = disciplines.groupby(['affiliations.faculte.nom', 'expertise.disciplines.nom'])['idsadvr'].count().reset_index()

# Rename the 'id' column to 'professor_count' for clarity
faculty_discipline_counts = faculty_discipline_counts.rename(columns={'idsadvr': 'count'})
faculty_discipline_counts = faculty_discipline_counts.sort_values(by='count', ascending=False)

def generate_pie_chart(selected_faculty):
    filtered_df = faculty_discipline_counts[faculty_discipline_counts['affiliations.faculte.nom'] == selected_faculty]

    # Extraire les dix principales disciplines associées à une faculté
    filtered_df = filtered_df.sort_values(by='count', ascending=False)
    filtered_df = groupOtherValues(filtered_df, 6)[:6]
    fig = go.Figure(go.Pie(
        labels= filtered_df['expertise.disciplines.nom'], 
        values = filtered_df['count'],
        hole = 0.6)
    )
    return fig

# Create a figure for each category
figs = {
    c: generate_pie_chart(c).update_traces(name=c, visible=False)
    for c in faculty_discipline_counts['affiliations.faculte.nom'].unique()
}

fig = go.Figure(
    layout=go.Layout(
        title=go.layout.Title(text="Principales disciplines de recherche par faculté")
    )
)

# Default category
defaultcat = faculty_discipline_counts['affiliations.faculte.nom'].unique()[0]
fig.add_traces(
    figs[defaultcat].data
    ).update_traces(visible=True)

# integrate figures per category into one figure
for k in figs.keys():
    if k != defaultcat:
        fig.add_traces(figs[k].data)

# finally build dropdown menu
fig.update_layout(
    title_x=0.02,  # Adjust this value to move the title to the left,
    updatemenus=[
        {
            "buttons": [
                {
                    "label": k,
                    "method": "update",
                    
                    # list comprehension for which traces are visible
                    "args": [{"visible": [kk == k for kk in figs.keys()]}],
                }
                for k in figs.keys()
            ]
        }
    ]
)

figDisciplinesFacultes = fig

# Cartographie des expertises de recherche: mots-clés associés aux principales disciplines de recherche de l'UdeM

In [None]:
app.layout = html.Div(
    children = [html.H1
        (
            "SADVR - Portrait statistique",
            style = {
                'textAlign': 'center',
                'color': 'white', 
                'backgroundColor': '#0b113a',
                'padding': '30px',
                'fontFamily': 'Calibri'
                }
        ),
        
        html.Div(
            style = {
                'border' : '1px solid lightgrey',
                'paddingTop': '2%',
                'marginBottom': '2%',
                'marginLeft': '10%',
                'marginRight': '10%',
                'paddingLeft': '5%',
                'paddingRight': '5%',
                'boxShadow': shadow
            }, 
            children= [
                # Titre de l'onglet
                html.H2(
                    'Expertises de recherche', 
                    style={
                            'textAlign': 'left',
                            'color': '#444444',
                    }
                ),

                # Trait de soulignement
                html.Hr(
                    style={
                        'borderTop': '4px solid #52B782',
                        'width' : '50%',
                        'marginBottom': '20px'
                    }
                ),

                # Table + Secteurs Nombre de professeurs par facultés
                html.Div(
                    style = {
                        'padding' : '0px',
                        'marginBottom': '30px'
                    },
                    children = 
                    [
                    html.H4(
                        "Facultés",
                        style = {'marginBottom': '30px'}
                    ),    

                    dbc.Row
                        (
                            [dbc.Col(
                                tableFacultes,
                                width = 5,
                            ),
                            dbc.Col(
                                dcc.Graph(figure=figFacultes),
                                width = 7,
                            )]
                        ), 
                    ],
                ),

                # Trait de soulignement
                html.Hr(
                    style={
                        'borderTop': '1px solid #lightgrey',
                        'marginBottom': '30px'
                    }
                ),

                
                # Table + Secteurs Nombre de professeurs par établissement affilié
                html.Div(
                    style = {
                        'padding' : '0px',
                        'marginBottom': '30px'
                    },
                    children = 
                    [
                    html.H4(
                        'Établissements affiliés',
                        style = {'marginBottom': '30px'}
                    ),    

                    dbc.Row
                        (
                            [dbc.Col(
                                tableEtablissements,
                                width = 5,
                            ),
                            dbc.Col(
                                dcc.Graph(figure=figEtablissementsAffilies),
                                width = 7,
                            )]
                        ), 
                    ],
                ),
                
                # Trait de soulignement
                html.Hr(
                    style={
                        'borderTop': '1px solid #lightgrey',
                        'marginBottom': '30px'
                    }
                ),
                
                # Principales disciplines de recherhce à l'UdeM (top 30)
                html.Div(
                    style = {
                        'marginTop': '20px',
                        'marginLeft' : '-50px',
                        'padding' : '0px'
                    },
                    children = [dcc.Graph(figure = figDisciplines)]
                ),

                # Trait de soulignement
                html.Hr(
                    style={
                        'borderTop': '1px solid #lightgrey',
                        'marginBottom': '30px'
                    }
                ),

                # Secteurs - Principales disciplines de recherche par faculté
                html.Div(
                    style = {
                        'marginLeft' : '-5px'
                    },
                    children = [dcc.Graph(figure = figDisciplinesFacultes)]
                ),

                # Trait de soulignement
                html.Hr(
                    style={
                        'borderTop': '1px solid #lightgrey',
                        'marginBottom': '30px'
                    }
                ),
            ],
        )
    ]
)

# if __name__ == '__main__':
#     app.run(debug=True, port=8052)

pio.write_html(app.layout, file='expertisesTab.html', auto_open=True)

AssertionError: The setup method 'errorhandler' can no longer be called on the application. It has already handled its first request, any changes will not be applied consistently.
Make sure all imports, decorators, functions, etc. needed to set up the application are done before running it.