### **API SADVR - Portrait statistique**  
https://www.cen.umontreal.ca/espacedoc/sadvr/  

Ce NoteBook est destiné à l'extraction et la visualisation de statistiques relatives aux professeur·e·s et à leurs expertises à partir de l'API de la vitrine de la recherche (SADVR). 
Celles-ci seront intégrées en un tableau de bord [PowerBI](https://wiki.umontreal.ca/display/SIE/Power+BI) permettant d'avoir un portrait d'ensemble des données.  

---

In [1]:
from utils.sadvr_utils import *
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import networkx as nx
from pyvis.network import Network
import matplotlib.pyplot as plt
from slugify import slugify

data = updateInfoProfs()
expertises = data[['idsadvr', 'expertise']]

departements = getTable('individus')[['idsadvr', 'uniteAdmin']]

expertises = expertises.merge(departements, on='idsadvr')

expertises['département'] = expertises['uniteAdmin'].astype(str).apply(uniteAdminDepartement)
expertises = expertises.drop(columns='uniteAdmin')


## Normalisation des données
toNormalize = ['expertise', 'expertise.disciplines']
for c in toNormalize:
    expertises = explodeNormalize(expertises, c)

expertises = expertises.dropna(subset = 'expertise.disciplines.uid') 
expertises = expertises[expertises['expertise.motsCles'].astype(str) != '[]']
expertises = expertises[expertises['département'].astype(str) != 'None']

expertises = explodeNormalize(expertises, 'expertise.motsCles')

expertises = expertises[expertises['expertise.motsCles.ordre'].astype(int) >=3]
expertises

Unnamed: 0,idsadvr,département,expertise.secteursRecherche,expertise.pays,expertise.continents,expertise.periodesChronologiques,expertise.phraseCle,expertise.disciplines.uid,expertise.disciplines.codeLangue,expertise.disciplines.nom,...,expertise.motsCles.uid,expertise.motsCles.nom,expertise.motsCles.ordre,expertise.motsCles.codeLangue,expertise.motsCles.departement,expertise.motsCles.departementCodeSAD,expertise.motsCles.faculte,expertise.motsCles.faculteCodeSAD,expertise.motsCles.uniteRecherche,expertise.motsCles.uniteRechercheIdsadvr
4,in13580,Département de biochimie et médecine moléculaire,"[{'uid': '6', 'codeLangue': 'fre', 'nom': 'Sci...",[],[],[],"[{'codeLangue': 'fre', 'contenu': 'Analyse str...",19,fre,Biochimie,...,83,Cellule,3,fre,Direction de la Faculté des arts et des sciences,0301,Faculté de médecine,23,,
5,in13580,Département de biochimie et médecine moléculaire,"[{'uid': '6', 'codeLangue': 'fre', 'nom': 'Sci...",[],[],[],"[{'codeLangue': 'fre', 'contenu': 'Analyse str...",19,fre,Biochimie,...,83,Cell,3,eng,Direction de la Faculté des arts et des sciences,0301,Faculté de médecine,23,,
6,in13580,Département de biochimie et médecine moléculaire,"[{'uid': '6', 'codeLangue': 'fre', 'nom': 'Sci...",[],[],[],"[{'codeLangue': 'fre', 'contenu': 'Analyse str...",19,fre,Biochimie,...,87,Chimie combinatoire,4,fre,Direction de la Faculté des arts et des sciences,0301,Faculté de médecine,23,,
7,in13580,Département de biochimie et médecine moléculaire,"[{'uid': '6', 'codeLangue': 'fre', 'nom': 'Sci...",[],[],[],"[{'codeLangue': 'fre', 'contenu': 'Analyse str...",19,fre,Biochimie,...,87,Combinatorial Chemistry,4,eng,Direction de la Faculté des arts et des sciences,0301,Faculté de médecine,23,,
8,in13580,Département de biochimie et médecine moléculaire,"[{'uid': '6', 'codeLangue': 'fre', 'nom': 'Sci...",[],[],[],"[{'codeLangue': 'fre', 'contenu': 'Analyse str...",19,fre,Biochimie,...,91,Chromosome (Organismes vivants),5,fre,Direction de la Faculté des arts et des sciences,0301,Faculté de médecine,23,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194801,in14949,École d'architecture,"[{'uid': '10', 'codeLangue': 'fre', 'nom': 'Sc...",[],"[{'uid': '2', 'ordre': '10', 'codeLangue': 'fr...","[{'uid': '8', 'ordre': '11', 'codeLangue': 'fr...","[{'codeLangue': 'fre', 'contenu': 'La conserva...",157,eng,Urban planning,...,634,Historic urban landscape,7,eng,École d'architecture,0104,,,,
194802,in14949,École d'architecture,"[{'uid': '10', 'codeLangue': 'fre', 'nom': 'Sc...",[],"[{'uid': '2', 'ordre': '10', 'codeLangue': 'fr...","[{'uid': '8', 'ordre': '11', 'codeLangue': 'fr...","[{'codeLangue': 'fre', 'contenu': 'La conserva...",157,eng,Urban planning,...,635,Politiques de gestion du patrimoine bâti,6,fre,École d'architecture,0104,,,,
194803,in14949,École d'architecture,"[{'uid': '10', 'codeLangue': 'fre', 'nom': 'Sc...",[],"[{'uid': '2', 'ordre': '10', 'codeLangue': 'fr...","[{'uid': '8', 'ordre': '11', 'codeLangue': 'fr...","[{'codeLangue': 'fre', 'contenu': 'La conserva...",157,eng,Urban planning,...,635,Built heritage management policy,6,eng,École d'architecture,0104,,,,
194804,in14949,École d'architecture,"[{'uid': '10', 'codeLangue': 'fre', 'nom': 'Sc...",[],"[{'uid': '2', 'ordre': '10', 'codeLangue': 'fr...","[{'uid': '8', 'ordre': '11', 'codeLangue': 'fr...","[{'codeLangue': 'fre', 'contenu': 'La conserva...",157,eng,Urban planning,...,645,Paysages urbains,9,fre,École d'architecture,0104,,,,


In [2]:
from utils.sadvr_utils import *
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

data = updateInfoProfs()
expertises = data[['idsadvr', 'expertise']]

departements = getTable('individus')[['idsadvr', 'uniteAdmin']]

expertises = expertises.merge(departements, on='idsadvr')

expertises['département'] = expertises['uniteAdmin'].astype(str).apply(uniteAdminDepartement)
expertises = expertises.drop(columns='uniteAdmin')


## Normalisation des données
toNormalize = ['expertise', 'expertise.disciplines']
for c in toNormalize:
    expertises = explodeNormalize(expertises, c)

expertises = expertises.dropna(subset = 'expertise.disciplines.uid') 
expertises = expertises[expertises['expertise.motsCles'].astype(str) != '[]']
expertises = expertises[expertises['département'].astype(str) != 'None']

expertises = explodeNormalize(expertises, 'expertise.motsCles')
expertises = expertises[expertises['expertise.motsCles.ordre'].astype(int) >=3]

expertises.loc[:, 'expertise.motsCles.nom'] = expertises['expertise.motsCles.nom'].apply(lambda x: x.replace("COVID19", "COVID-19"))

expertises = expertises[['idsadvr', 'département',
      'expertise.disciplines.uid', 'expertise.disciplines.codeLangue','expertise.disciplines.nom', 
       'expertise.motsCles.uid', 'expertise.motsCles.nom', 'expertise.motsCles.codeLangue']]

expertises = expertises.sort_values(by=[
    f'expertise.disciplines.uid', f'expertise.disciplines.codeLangue',
    f'expertise.motsCles.uid', f'expertise.motsCles.codeLangue'], ascending=[True, False, True, False])

expertises = expertises.drop_duplicates(subset=['idsadvr', 'expertise.motsCles.uid', 'expertise.disciplines.uid'])
expertises = expertises.drop(columns=['expertise.disciplines.codeLangue', 'expertise.motsCles.codeLangue'])

## Extraire les fréquences associées aux disciplines et aux mots-clés: elles vont permettre d'assigner
# une taille aux noeuds dans le graphe (plus fréquent = plus gros )
def freqVariable(variable: str, df: pd.DataFrame = expertises) -> pd.DataFrame:
    output = df[['idsadvr', f'expertise.{variable}.nom', 
                       f'expertise.{variable}.uid']].dropna(subset=f'expertise.{variable}.uid').drop_duplicates()

    output = output.groupby([f'expertise.{variable}.nom', f'expertise.{variable}.uid'])['idsadvr'].count().reset_index().rename(columns={'idsadvr': 'count'})
    output = output[[f'expertise.{variable}.nom', 'count']]

    return output

**Expertises de recherche: cartographie des expertises par mots-clés**

On va extraire un graphe par département (== département/unité administrative)

In [11]:
listeDepartements = [x for x in expertises['département'].unique().tolist() if 
    (not "Direction" in x) and (not "bureau" in x) and (not "dir" in x) and (not "rectorat" in x)]

for departement in listeDepartements:
    nx_graph = nx.Graph()
    subdf = expertises[expertises['département'] == departement].dropna()

    # Disciplines
    freqDisciplines = freqVariable('disciplines', subdf)
    freqDisciplines = {x['expertise.disciplines.nom'] : x['count'] for x in freqDisciplines.to_dict('records')}

    # Mots-clés
    freqMotsCles = freqVariable('motsCles', subdf)
    freqMotsCles = {x['expertise.motsCles.nom'] : x['count'] for x in freqMotsCles.to_dict('records')}

    subdf['freqDiscipline'] = subdf['expertise.disciplines.nom'].map(freqDisciplines)
    subdf['freqMotCle'] = subdf['expertise.motsCles.nom'].map(freqMotsCles)

    subdf = subdf[subdf['freqMotCle'].astype(int) > 1]
    subdf = subdf[['département', 'expertise.disciplines.nom', 'freqDiscipline', 'expertise.motsCles.nom', 'freqMotCle']]
    subdf

    records = (subdf.sort_values(by='freqMotCle', ascending=False).to_dict('records'))
    recordsD = (pd.DataFrame(records).drop_duplicates(subset='expertise.disciplines.nom')).to_dict('records')

    # Noeuds pour les disciplines
    tuples = [(r['expertise.disciplines.nom'], {"color": "lightgrey", "size": 7*int(r['freqDiscipline'])}) for r in recordsD]
    sizes = [r['freqDiscipline'] for r in recordsD]

    # Noeuds pour les mots-clés
    tuples += [(r['expertise.motsCles.nom'], {"color": "lightblue", "size": 7*int(r['freqMotCle'])}) for r in records]
    sizes += [r['freqMotCle'] for r in records]

    # Liens 
    edges = [(r['expertise.disciplines.nom'], r['expertise.motsCles.nom']) for r in records]

    nx_graph.add_nodes_from(tuples)
    nx_graph.add_edges_from(edges)

    # Set node attributes (colors)
    nx.set_node_attributes(nx_graph, {node: attr_dict for node, attr_dict in tuples})

    # Create a Pyvis Network instance
    pyvis_graph = Network(notebook=True, height="800px", width="100%", cdn_resources='remote')

    # Add nodes and edges to Pyvis Network
    for node, attr in nx_graph.nodes(data=True):
        pyvis_graph.add_node(
            node, 
            color=attr['color'], 
            size=attr['size'], 
            font={'size': 40},
            title=f"{node}\nN={int(attr['size'])}",
            )


    for edge in nx_graph.edges():
        pyvis_graph.add_edge(edge[0], edge[1], color='lightgrey')

    # Set layout to forceAtlas2Based for better node spacing
    pyvis_graph.barnes_hut(gravity=-8000, central_gravity=0.3, spring_length=50)

    # Save the graph to an HTML file
    name = slugify(departement)
    output_html = f"graphs/graph__{name}.html"
    pyvis_graph.show(output_html)
    

graphs/graph__departement-de-sciences-economiques.html
graphs/graph__departement-d-anesthesiologie-et-de-medecine-de-la-douleur.html
graphs/graph__departement-de-sciences-cliniques.html
graphs/graph__departement-de-medecine-sociale-et-preventive.html
graphs/graph__departement-d-obstetrique-gynecologie.html
graphs/graph__departement-de-microbiologie-infectiologie-et-immunologie.html
graphs/graph__departement-de-gestion-devaluation-et-de-politique-de-sante.html
graphs/graph__departement-de-medecine.html
graphs/graph__departement-de-biochimie-et-medecine-moleculaire.html
graphs/graph__faculte-de-pharmacie.html
graphs/graph__ecole-de-kinesiologie-et-des-sciences-de-l-activite-physique.html
graphs/graph__departement-de-pathologie-et-biologie-cellulaire.html
graphs/graph__departement-de-radiologie-radio-oncologie-et-medecine-nucleaire.html
graphs/graph__departement-de-pediatrie.html
graphs/graph__departement-de-pharmacologie.html
graphs/graph__departement-de-pharmacologie-et-physiologie.html

In [None]:
<!DOCTYPE html>
<html>
<head>
    <title>Graph Visualization with Dropdown</title>
    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
</head>
<body>

<h1>Graph Visualization with Dropdown</h1>

<select id="graphSelector">
    <option value="graph1.html">Graph 1</option>
    <option value="graph2.html">Graph 2</option>
    <!-- Add more options for different graphs -->
</select>

<iframe id="visualizationFrame" width="100%" height="800px" frameborder="0"></iframe>

<script>
    const graphSelector = document.getElementById("graphSelector");
    const visualizationFrame = document.getElementById("visualizationFrame");

    graphSelector.addEventListener("change", () => {
        const selectedValue = graphSelector.value;
        visualizationFrame.src = selectedValue;
    });

    // Initialize the iframe with the first graph
    visualizationFrame.src = graphSelector.value;
</script>

</body>
</html>

In [None]:
# nx_graph = nx.Graph()
# subdf = expertises[expertises['département'] == departement].dropna()

# # Disciplines
# freqDisciplines = freqVariable('disciplines', subdf)
# freqDisciplines = {x['expertise.disciplines.nom'] : x['count'] for x in freqDisciplines.to_dict('records')}

# # Mots-clés
# freqMotsCles = freqVariable('motsCles', subdf)
# freqMotsCles = {x['expertise.motsCles.nom'] : x['count'] for x in freqMotsCles.to_dict('records')}

# subdf['freqDiscipline'] = subdf['expertise.disciplines.nom'].map(freqDisciplines)
# subdf['freqMotCle'] = subdf['expertise.motsCles.nom'].map(freqMotsCles)

# subdf = subdf[subdf['freqMotCle'].astype(int) > 2]
# subdf = subdf[['département', 'expertise.disciplines.nom', 'freqDiscipline', 'expertise.motsCles.nom', 'freqMotCle']]
# subdf

# records = (subdf.sort_values(by='freqMotCle', ascending=False).to_dict('records'))
# recordsD = (pd.DataFrame(records).drop_duplicates(subset='expertise.disciplines.nom')).to_dict('records')

# # Noeuds pour les disciplines
# tuples = [(r['expertise.disciplines.nom'], {"color": "lightgrey", "size": 7*int(r['freqDiscipline'])}) for r in recordsD]
# sizes = [r['freqDiscipline'] for r in recordsD]

# # Noeuds pour les mots-clés
# tuples += [(r['expertise.motsCles.nom'], {"color": "lightblue", "size": 7*int(r['freqMotCle'])}) for r in records]
# sizes += [r['freqMotCle'] for r in records]

# # Liens 
# edges = [(r['expertise.disciplines.nom'], r['expertise.motsCles.nom']) for r in records]

# nx_graph.add_nodes_from(tuples)
# nx_graph.add_edges_from(edges)

# # Set node attributes (colors)
# nx.set_node_attributes(nx_graph, {node: attr_dict for node, attr_dict in tuples})

In [None]:
# # Create a Pyvis Network instance
# pyvis_graph = Network(notebook=True, height="800px", width="100%", cdn_resources='remote')

# # Add nodes and edges to Pyvis Network
# for node, attr in nx_graph.nodes(data=True):
#     pyvis_graph.add_node(
#         node, 
#         color=attr['color'], 
#         size=attr['size'], 
#         font={'size': 40},
#         title=f"{node}\nN={int(attr['size'])}",
#         )


# for edge in nx_graph.edges():
#     pyvis_graph.add_edge(edge[0], edge[1], color='lightgrey')

# # Set layout to forceAtlas2Based for better node spacing
# pyvis_graph.barnes_hut(gravity=-8000, central_gravity=0.3, spring_length=50)

# # Save the graph to an HTML file
# output_html = "graph_visualization.html"
# pyvis_graph.show(output_html)