### **API SADVR - Portrait statistique**  
https://www.cen.umontreal.ca/espacedoc/sadvr/  

Ce NoteBook est destiné à l'extraction et la visualisation de statistiques relatives aux professeur·e·s et à leurs expertises à partir de l'API de la vitrine de la recherche (SADVR). 
Celles-ci seront intégrées en un tableau de bord [PowerBI](https://wiki.umontreal.ca/display/SIE/Power+BI) permettant d'avoir un portrait d'ensemble des données.  

---

In [1]:
import pandas as pd
import requests 
import json

from utils.sadvr_utils import *
import plotly.express as px
import plotly.graph_objects as go
from slugify import slugify

from rdflib import *

data = updateInfoProfs()
expertises = data[['idsadvr', 'expertise']]

departements = getTable('individus')[['idsadvr', 'uniteAdmin']]

expertises = expertises.merge(departements, on='idsadvr')

expertises['département'] = expertises['uniteAdmin'].astype(str).apply(uniteAdminDepartement)
expertises = expertises.drop(columns='uniteAdmin')


## Normalisation des données
toNormalize = ['expertise', 'expertise.disciplines']
for c in toNormalize:
    expertises = explodeNormalize(expertises, c)

expertises = expertises.dropna(subset = 'expertise.disciplines.uid') 
expertises = expertises[expertises['expertise.motsCles'].astype(str) != '[]']
expertises = expertises[expertises['département'].astype(str) != 'None']

expertises = explodeNormalize(expertises, 'expertise.motsCles')

expertises = expertises[['idsadvr', 'département',
      'expertise.disciplines.uid', 'expertise.disciplines.codeLangue','expertise.disciplines.nom', 
       'expertise.motsCles.uid', 'expertise.motsCles.nom', 'expertise.motsCles.codeLangue']]

expertises = expertises.sort_values(by=[
    f'expertise.disciplines.uid', f'expertise.disciplines.codeLangue',
    f'expertise.motsCles.uid', f'expertise.motsCles.codeLangue'], ascending=[True, False, True, False])

expertises = expertises.drop_duplicates(subset=['idsadvr', 'expertise.motsCles.uid', 'expertise.disciplines.uid'])
expertises = expertises.drop(columns=['expertise.disciplines.codeLangue', 'expertise.motsCles.codeLangue'])

## Extraire les fréquences associées aux disciplines et aux mots-clés: elles vont permettre d'assigner
# une taille aux noeuds dans le graphe (plus fréquent = plus gros )
def freqVariableExpertises(variable: str, df: pd.DataFrame = expertises) -> pd.DataFrame:
    output = df[['idsadvr', f'expertise.{variable}.nom', 
                       f'expertise.{variable}.uid']].dropna(subset=f'expertise.{variable}.uid').drop_duplicates()

    output = output.groupby([f'expertise.{variable}.nom', f'expertise.{variable}.uid'])['idsadvr'].count().reset_index().rename(columns={'idsadvr': 'count'})
    output = output[[f'expertise.{variable}.nom', 'count']]

    return output

# Disciplines
freqDisciplines = freqVariableExpertises('disciplines')
freqDisciplines = {x['expertise.disciplines.nom'] : x['count'] for x in freqDisciplines.to_dict('records')}

# Mots-clés
freqMotsCles = freqVariableExpertises('motsCles')
freqMotsCles = {x['expertise.motsCles.nom'] : x['count'] for x in freqMotsCles.to_dict('records')}

# Départements
freqDepartements = expertises[['idsadvr', 'département']].drop_duplicates(subset='idsadvr')
freqDepartements = freqDepartements.groupby('département')['idsadvr'].count().reset_index().rename(columns={'idsadvr':'count'})
freqDepartements = {x['département'] : x['count'] for x in freqDepartements.to_dict('records')}

**Expertises de recherche: cartographie des expertises par mots-clés**

On va extraire un graphe par département (== département/unité administrative)

In [None]:
graphs = {}
departements = expertises
listeDepartements = departements['département'].unique().tolist()

uriDepartements = URIRef(f'{baseURI}/departements/')
uriDisciplines = URIRef(f'{baseURI}/disciplines/')
uriMotsCles = URIRef(f'{baseURI}/motsCles/')
uriFrequence = URIRef(f'{baseURI}/frequence/')

for departement in listeDepartements:
    ##### Définition du graphe
    g = Graph()
    g.bind('sadvr', 'https://www.recherche.umontreal.ca/vitrine/rest/api/1.7/umontreal/')

    ## Définition des classes
    # Classe pour les départements
    g.add((uriDepartements, RDF.type, OWL.Class))
    g.add((uriDepartements, RDFS.label, Literal('Département', lang='fr')))

    # Classe pour les disciplines
    g.add((uriDisciplines, RDF.type, OWL.Class))
    g.add((uriDisciplines, RDFS.label, Literal('Discipline', lang='fr')))

    # Classe pour les mots-clés
    g.add((uriMotsCles, RDF.type, OWL.Class))
    g.add((uriMotsCles, RDFS.label, Literal('MotClé', lang='fr')))

    ## Définition des objet properties
    # fréquence
    g.add((uriFrequence, RDF.type, OWL.ObjectProperty))
    g.add((uriFrequence, RDFS.label, Literal("Fréquence", lang='fr')))
    
    subdf = departements[departements['département'] == departement]
    nomDepartement = subdf['département'].tolist()[0]

    try:
        idDepart = mappingDepartements[nomDepartement]

    except:
        try:
            idDepart = mappingFacultes[nomDepartement]

        except:
            try:
                idDepart = mappingUnitesAdmin[nomDepartement]
            
            except Exception as E:
                print(nomDepartement, e)

    ### Département 
    uriDepartement = URIRef(f'{baseURI}/departements/{idDepart}')
    g.add((uriDepartement, RDF.type, uriDepartements))
    g.add((uriDepartement, RDF.type, OWL.Class))
    g.add((uriDepartement, RDFS.label, Literal(nomDepartement, lang='fr')))

    records = subdf.to_dict('records')

    for record in records:
        # Disciplines
        idDiscipline = record['expertise.disciplines.uid']
        uriDiscipline = URIRef(f'{baseURI}/disciplines/{idDiscipline}')
        nomDiscipline = record['expertise.disciplines.nom'] 

        # Lien disciplines <> Département
        g.add((uriDiscipline, RDFS.subClassOf, uriDepartement))

        # Construction du noeud pour la discipline
        g.add((uriDiscipline, RDF.type, uriDisciplines))
        g.add((uriDiscipline, RDF.type, OWL.Class))
        g.add((uriDiscipline, RDFS.label, Literal(nomDiscipline, lang='fr')))

        # Mots-clés
        idMotCle = record['expertise.motsCles.uid']
        uriMotCle = URIRef(f'{baseURI}/motsCles/{idMotCle}')
        nomMotCle = Literal(record['expertise.motsCles.nom'])

        ### Fréquence
        compte = Literal(freqMotsCles[record['expertise.motsCles.nom']], datatype=XSD.integer)

        # Lien discipline <> mot-clé
        g.add((uriMotCle, RDFS.subClassOf, uriDiscipline))

        # Construction du noeud pour le mot-clé
        g.add((uriMotCle, RDF.type, OWL.Class))
        g.add((uriMotCle, RDFS.label, nomMotCle))
        g.add((uriMotCle, uriFrequence, compte))

    graphs[nomDepartement] = g
    g.serialize(f'graphs/graph__{slugify(nomDepartement)}.ttl', format='ttl')

NameError: name 'unicode' is not defined

*On va essayer de visualiser chaque graphe avec networkx et plotly*

In [None]:
# Exemple de visualisation qui pourrait être utilisée avec le graphe permettant de cartographier les expertises de recherche

import networkx as nx
import plotly.graph_objects as go

# Create a NetworkX graph
G = nx.Graph()
G.add_nodes_from([1, 2, 3, 4, 5])

# Add edges
G.add_edges_from([(1, 2), (2, 3), (3, 4), (4, 5), (5, 1)])

# Define node sizes (you can replace this with your desired sizes)
node_sizes = [10, 20, 30, 40, 50]

# Create a Plotly figure
fig = go.Figure()

# Add nodes and edges to the Plotly figure
pos = nx.spring_layout(G)  # You can use a different layout algorithm
edge_trace = go.Scatter(x=[], y=[], line={'width': 0.5, 'color': '#888'})
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_trace['x'] += (x0, x1, None)
    edge_trace['y'] += (y0, y1, None)

node_trace = go.Scatter(
    x=[], y=[], text=[], mode='markers+text', hoverinfo='text',
    marker={'size': node_sizes, 'color': '#FF5733'}
)
for node in G.nodes():
    x, y = pos[node]
    node_trace['x'] += (x,)
    node_trace['y'] += (y,)

fig.add_trace(edge_trace)
fig.add_trace(node_trace)

# Customize layout to hide grid and remove axis
fig.update_layout(
    xaxis_visible=False,  # Hide x-axis
    yaxis_visible=False  # Hide y-axis
)

# Show the Plotly figure
fig.show()