In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import re
import igraph as ig
import gravis as gv
import networkx as nx
#import leidenalg as la

from matplotlib import rc
from functions import clean_address, es_diagonal_cero, propiedades, centralidades, centralizacion

In [2]:
dfa = pd.read_excel("../Datasets/Research_status_2017_2022.xlsx")

In [3]:
top = ['Multidisciplinary', 'Particles & Fields', 'Applied', 'Condensed Matter', 'Mathematical', 'Molecular & Chemical', 'Physical', 'Astronomy & Astrophysics', 'Fluids & Plasmas']
anios = [2017, 2018, 2019, 2020, 2021, 2022]

## Todos los años

In [4]:
dfe = dfa.copy()
dfe = dfe[['Language', 'Affiliations', 'Document Type', 'Addresses', 'Publication Year', 'WoS Categories']]
dfe = dfe.dropna(subset = ['Addresses', 'Affiliations'])
dfe = dfe[dfe['Document Type']=='Article']
dfe = dfe[dfe['Publication Year']<2023]
dfe["Addresses"] = dfe["Addresses"].apply(lambda x: re.sub(r'\[.*?\]', '', x))
dfe['Addresses'] = dfe['Addresses'].apply(lambda x: x + ';')
dfe['Countries'] = dfe['Addresses'].apply(lambda x: '; '.join(re.findall(r', ([A-Z][a-z]+);', x)))
pattern = re.compile(r'USA|Peoples R China|BELARUS|Korea|Czech Republic|U Arab Emirates|South Africa|Ireland')

dfe = dfe[~dfe['Addresses'].str.contains(r', [A-Z][a-z]+ [A-Z][a-z]+;', regex=True)]
dfe = dfe[~dfe['Addresses'].str.contains(pattern)]
dfe['Paises'] = dfe['Countries'].apply(clean_address)
dfe['Categorias'] = dfe['WoS Categories'].apply(clean_address)
dfe['Type'] = dfe['Document Type'].apply(clean_address)

dfe = dfe[dfe['Categorias'].apply(lambda x: all(item in top for item in x))]
dfe = dfe[dfe['Paises'].apply(lambda x: len(x) == 1 and x[0] == 'Uruguay')]


In [5]:
dfAll = dfe
dfAll['Instituciones'] = dfAll['Affiliations'].str.split(';', expand=False)
dfAll['Instituciones'] = dfAll['Instituciones'].apply(lambda inst: [i.strip() for i in inst])

all_inst = set()
for inst in dfAll['Instituciones']:
    all_inst.update(inst)
all_inst = sorted(list(all_inst))
adj_matrix = pd.DataFrame(0, index=all_inst, columns=all_inst)
for countries in dfAll['Instituciones']:
    countries = sorted(countries)
    for i in range(len(countries)):
        for j in range(i+1, len(countries)):
            adj_matrix.loc[countries[i], countries[j]] += 1
            adj_matrix.loc[countries[j], countries[i]] += 1
adjAll = adj_matrix.to_numpy()
adjAll = adjAll.astype(float)

InstA = nx.Graph()

for countries in dfAll['Instituciones']:
    countries = sorted(countries)
    for i in range(len(countries)):
        for j in range(i + 1, len(countries)):
            if countries[i] != countries[j]:  # Agregar esta condición para evitar bucles
                InstA.add_edge(countries[i], countries[j], weight=adjAll[all_inst.index(countries[i])][all_inst.index(countries[j])])

c = nx.algorithms.degree_centrality(InstA)
nx.set_node_attributes(InstA, c, 'betweenness')


fig = gv.d3(InstA, use_node_size_normalization=True, 
            node_size_normalization_max = 100,      
            use_edge_size_normalization = True, 
            edge_size_data_source = 'weight', 
            edge_curvature = 0.4)

print(len(all_inst))

5


In [6]:
print(propiedades(InstA))

Graph with 4 nodes and 2 edges
N de Nodos:  4
N de aristas:  2
Es direccionado:  False
Transitividad 0
Centralización  0.0
Tiene:  2 comunidades
[frozenset({'Institut Pasteur de Montevideo', 'Le Reseau International des Instituts Pasteur (RIIP)'}), frozenset({'Universidad de la Republica, Uruguay', 'Universidad de Montevideo'})]
Densidad: 0.3333333333333333
None


### 2017

In [7]:
dfe2017 = dfe[dfe['Publication Year']==2017].copy()
dfe2017['Instituciones'] = dfe2017['Affiliations'].str.split(';', expand=False)
dfe2017['Instituciones'] = dfe2017['Instituciones'].apply(lambda inst: [i.strip() for i in inst])

all_inst = set()
for inst in dfe2017['Instituciones']:
    all_inst.update(inst)
all_inst = sorted(list(all_inst))
adj_matrix = pd.DataFrame(0, index=all_inst, columns=all_inst)
for countries in dfe2017['Instituciones']:
    countries = sorted(countries)
    for i in range(len(countries)):
        for j in range(i+1, len(countries)):
            adj_matrix.loc[countries[i], countries[j]] += 1
            adj_matrix.loc[countries[j], countries[i]] += 1
adj = adj_matrix.to_numpy()
adj = adj.astype(float)

graph2017 = nx.Graph()

for countries in dfe2017['Instituciones']:
    countries = sorted(countries)
    for i in range(len(countries)):
        for j in range(i + 1, len(countries)):
            graph2017.add_edge(countries[i], countries[j], weight=adj[all_inst.index(countries[i])][all_inst.index(countries[j])])



c = nx.algorithms.degree_centrality(graph2017)
nx.set_node_attributes(graph2017, c, 'betweenness')


labels = {node: node for node in graph2017.nodes()}
fig = gv.d3(graph2017, use_node_size_normalization=True, 
            node_size_normalization_max = 100,      
            use_edge_size_normalization = True, 
            edge_size_data_source = 'weight', 
            edge_curvature = 0.4)

centr2017 = centralidades(graph2017)
cent_grado = centr2017['centralidad'].head(20)
centralizacion2017 = centralizacion(graph2017)
densidad2017 = nx.density(graph2017)
asort2017 = nx.degree_assortativity_coefficient(graph2017)
trans2017 = nx.transitivity(graph2017)

In [8]:
print(propiedades(graph2017))

Graph with 3 nodes and 2 edges
N de Nodos:  3
N de aristas:  2
Es direccionado:  False
Transitividad 0
Centralización  0.5
Tiene:  2 comunidades
[frozenset({'Institut Pasteur de Montevideo', 'Le Reseau International des Instituts Pasteur (RIIP)'}), frozenset({'Universidad de la Republica, Uruguay'})]
Densidad: 0.6666666666666666
None


### 2018

In [10]:
dfe2018 = dfe[dfe['Publication Year']==2018].copy()
dfe2018['Instituciones'] = dfe2018['Affiliations'].str.split(';', expand=False)
dfe2018['Instituciones'] = dfe2018['Instituciones'].apply(lambda inst: [i.strip() for i in inst])

all_inst = set()
for inst in dfe2018['Instituciones']:
    all_inst.update(inst)
all_inst = sorted(list(all_inst))
adj_matrix = pd.DataFrame(0, index=all_inst, columns=all_inst)
for countries in dfe2018['Instituciones']:
    countries = sorted(countries)
    for i in range(len(countries)):
        for j in range(i+1, len(countries)):
            adj_matrix.loc[countries[i], countries[j]] += 1
            adj_matrix.loc[countries[j], countries[i]] += 1
adj = adj_matrix.to_numpy()
adj = adj.astype(float)

graph2018 = nx.Graph()

for countries in dfe2018['Instituciones']:
    countries = sorted(countries)
    for i in range(len(countries)):
        for j in range(i + 1, len(countries)):
            if countries[i] != countries[j]:  # Agregar esta condición para evitar bucles
                graph2018.add_edge(countries[i], countries[j], weight=adj[all_inst.index(countries[i])][all_inst.index(countries[j])])

c = nx.algorithms.degree_centrality(graph2018)
nx.set_node_attributes(graph2018, c, 'betweenness')


labels = {node: node for node in graph2018.nodes()}
fig = gv.d3(graph2018, use_node_size_normalization=True, 
            node_size_normalization_max = 100,      
            use_edge_size_normalization = True, 
            edge_size_data_source = 'weight', 
            edge_curvature = 0.4)

centr2018 = centralidades(graph2018)
cent_grado = centr2018['centralidad'].head(20)
centralizacion2018 = centralizacion(graph2018)
densidad2018 = nx.density(graph2018)
asort2018 = nx.degree_assortativity_coefficient(graph2018)
trans2018 = nx.transitivity(graph2018)

NetworkXPointlessConcept: cannot compute centrality for the null graph

In [11]:
print(propiedades(graph2018))

Graph with 0 nodes and 0 edges


ValueError: cutoff must be between 1 and 0. Got 1.

### 2019

In [12]:
dfe2019 = dfe[dfe['Publication Year'] == 2019].copy()
dfe2019['Instituciones'] = dfe2019['Affiliations'].str.split(';', expand=False)
dfe2019['Instituciones'] = dfe2019['Instituciones'].apply(lambda inst: [i.strip() for i in inst])

all_inst = set()
for inst in dfe2019['Instituciones']:
    all_inst.update(inst)
all_inst = sorted(list(all_inst))
adj_matrix = pd.DataFrame(0, index=all_inst, columns=all_inst)
for countries in dfe2019['Instituciones']:
    countries = sorted(countries)
    for i in range(len(countries)):
        for j in range(i+1, len(countries)):
            adj_matrix.loc[countries[i], countries[j]] += 1
            adj_matrix.loc[countries[j], countries[i]] += 1
adj = adj_matrix.to_numpy()
adj = adj.astype(float)

graph2019 = nx.Graph()

for countries in dfe2019['Instituciones']:
    countries = sorted(countries)
    for i in range(len(countries)):
        for j in range(i + 1, len(countries)):
            if countries[i] != countries[j]:  # Agregar esta condición para evitar bucles
                graph2019.add_edge(countries[i], countries[j], weight=adj[all_inst.index(countries[i])][all_inst.index(countries[j])])

c = nx.algorithms.degree_centrality(graph2019)
nx.set_node_attributes(graph2019, c, 'betweenness')


labels = {node: node for node in graph2019.nodes()}
fig = gv.d3(graph2019, use_node_size_normalization=True, 
            node_size_normalization_max = 100,      
            use_edge_size_normalization = True, 
            edge_size_data_source = 'weight', 
            edge_curvature = 0.4)

centr2019 = centralidades(graph2019)
cent_grado = centr2019['centralidad'].head(20)
centralizacion2019 = centralizacion(graph2019)
densidad2019 = nx.density(graph2019)
asort2019 = nx.degree_assortativity_coefficient(graph2019)
trans2019 = nx.transitivity(graph2019)

ZeroDivisionError: float division by zero

In [13]:
print(propiedades(graph2019))

Graph with 2 nodes and 1 edges
N de Nodos:  2
N de aristas:  1
Es direccionado:  False
Transitividad 0


ZeroDivisionError: float division by zero

### 2020

In [14]:
dfe2020 = dfe[dfe['Publication Year']==2020].copy()
dfe2020['Instituciones'] = dfe2020['Affiliations'].str.split(';', expand=False)
dfe2020['Instituciones'] = dfe2020['Instituciones'].apply(lambda inst: [i.strip() for i in inst])

all_inst = set()
for inst in dfe2020['Instituciones']:
    all_inst.update(inst)
all_inst = sorted(list(all_inst))
adj_matrix = pd.DataFrame(0, index=all_inst, columns=all_inst)
for countries in dfe2020['Instituciones']:
    countries = sorted(countries)
    for i in range(len(countries)):
        for j in range(i+1, len(countries)):
            adj_matrix.loc[countries[i], countries[j]] += 1
            adj_matrix.loc[countries[j], countries[i]] += 1
adj = adj_matrix.to_numpy()
adj = adj.astype(float)

graph2020 = nx.Graph()

for countries in dfe2020['Instituciones']:
    countries = sorted(countries)
    for i in range(len(countries)):
        for j in range(i + 1, len(countries)):
            if countries[i] != countries[j]:  # Agregar esta condición para evitar bucles
                graph2020.add_edge(countries[i], countries[j], weight=adj[all_inst.index(countries[i])][all_inst.index(countries[j])])

c = nx.algorithms.degree_centrality(graph2020)
nx.set_node_attributes(graph2020, c, 'betweenness')


labels = {node: node for node in graph2020.nodes()}
fig = gv.d3(graph2020, use_node_size_normalization=True, 
            node_size_normalization_max = 100,      
            use_edge_size_normalization = True, 
            edge_size_data_source = 'weight', 
            edge_curvature = 0.4)

centr2020 = centralidades(graph2020)
cent_grado = centr2020['centralidad'].head(20)
centralizacion2020 = centralizacion(graph2020)
densidad2020 = nx.density(graph2020)
asort2020 = nx.degree_assortativity_coefficient(graph2020)
trans2020 = nx.transitivity(graph2020)

ZeroDivisionError: float division by zero

In [15]:
print(propiedades(graph2020))

Graph with 2 nodes and 1 edges
N de Nodos:  2
N de aristas:  1
Es direccionado:  False
Transitividad 0


ZeroDivisionError: float division by zero

### 2021


In [16]:
dfe2021 = dfe[dfe['Publication Year']==2021].copy()
dfe2021['Instituciones'] = dfe2021['Affiliations'].str.split(';', expand=False)
dfe2021['Instituciones'] = dfe2021['Instituciones'].apply(lambda inst: [i.strip() for i in inst])

all_inst = set()
for inst in dfe2021['Instituciones']:
    all_inst.update(inst)
all_inst = sorted(list(all_inst))
adj_matrix = pd.DataFrame(0, index=all_inst, columns=all_inst)
for countries in dfe2021['Instituciones']:
    countries = sorted(countries)
    for i in range(len(countries)):
        for j in range(i+1, len(countries)):
            adj_matrix.loc[countries[i], countries[j]] += 1
            adj_matrix.loc[countries[j], countries[i]] += 1
adj = adj_matrix.to_numpy()
adj = adj.astype(float)

graph2021 = nx.Graph()

for countries in dfe2021['Instituciones']:
    countries = sorted(countries)
    for i in range(len(countries)):
        for j in range(i + 1, len(countries)):
            if countries[i] != countries[j]:  # Agregar esta condición para evitar bucles
                graph2021.add_edge(countries[i], countries[j], weight=adj[all_inst.index(countries[i])][all_inst.index(countries[j])])

c = nx.algorithms.degree_centrality(graph2021)
nx.set_node_attributes(graph2021, c, 'betweenness')


labels = {node: node for node in graph2021.nodes()}
fig = gv.d3(graph2021, use_node_size_normalization=True, 
            node_size_normalization_max = 100,      
            use_edge_size_normalization = True, 
            edge_size_data_source = 'weight', 
            edge_curvature = 0.4)

centr2021 = centralidades(graph2021)
cent_grado = centr2021['centralidad'].head(20)
centralizacion2021 = centralizacion(graph2021)
densidad2021 = nx.density(graph2021)
asort2021 = nx.degree_assortativity_coefficient(graph2021)
trans2021 = nx.transitivity(graph2021)


NetworkXPointlessConcept: cannot compute centrality for the null graph

In [17]:
print(propiedades(graph2021))

Graph with 0 nodes and 0 edges


ValueError: cutoff must be between 1 and 0. Got 1.

### 2022

In [18]:
dfe2022 = dfe[dfe['Publication Year']==2022].copy()
dfe2022['Instituciones'] = dfe2022['Affiliations'].str.split(';', expand=False)
dfe2022['Instituciones'] = dfe2022['Instituciones'].apply(lambda inst: [i.strip() for i in inst])

all_inst = set()
for inst in dfe2022['Instituciones']:
    all_inst.update(inst)
all_inst = sorted(list(all_inst))
adj_matrix = pd.DataFrame(0, index=all_inst, columns=all_inst)
for countries in dfe2022['Instituciones']:
    countries = sorted(countries)
    for i in range(len(countries)):
        for j in range(i+1, len(countries)):
            adj_matrix.loc[countries[i], countries[j]] += 1
            adj_matrix.loc[countries[j], countries[i]] += 1
adj = adj_matrix.to_numpy()
adj = adj.astype(float)

graph2022 = nx.Graph()

for countries in dfe2022['Instituciones']:
    countries = sorted(countries)
    for i in range(len(countries)):
        for j in range(i + 1, len(countries)):
            if countries[i] != countries[j]:  # Agregar esta condición para evitar bucles
                graph2022.add_edge(countries[i], countries[j], weight=adj[all_inst.index(countries[i])][all_inst.index(countries[j])])

c = nx.algorithms.degree_centrality(graph2022)
nx.set_node_attributes(graph2022, c, 'betweenness')


labels = {node: node for node in graph2022.nodes()}
fig = gv.d3(graph2022, use_node_size_normalization=True, 
            node_size_normalization_max = 100,      
            use_edge_size_normalization = True, 
            edge_size_data_source = 'weight', 
            edge_curvature = 0.4)


centr2022 = centralidades(graph2022)
cent_grado = centr2022['centralidad'].head(20)
centralizacion2022 = centralizacion(graph2022)
densidad2022 = nx.density(graph2022)
asort2022 = nx.degree_assortativity_coefficient(graph2022)
trans2022 = nx.transitivity(graph2022)

ZeroDivisionError: float division by zero

In [19]:
print(propiedades(graph2022))

Graph with 2 nodes and 1 edges
N de Nodos:  2
N de aristas:  1
Es direccionado:  False
Transitividad 0


ZeroDivisionError: float division by zero

## Series de tiempo

### Densidad

In [20]:
densidad_anio = [densidad2017, densidad2018, densidad2019, densidad2020, densidad2021, densidad2022]

df_densidad = pd.DataFrame({
    "Año": anios,
    "Densidad": densidad_anio})
df_densidad

df_densidad.set_index('Año', inplace=True)

# Graficar la serie de tiempo
plt.figure(figsize=(10, 6))
plt.plot(df_densidad.index, df_densidad['Densidad'], marker='o', linestyle='-', color='b')
plt.xlabel('Año')
plt.ylabel('Densidad')
plt.title('Serie de Tiempo de Densidad Uruguay (2017-2022)')
plt.grid(True)
plt.show()

NameError: name 'densidad2018' is not defined

### Centralización

In [21]:
centralizacion_anio = [centralizacion2017, centralizacion2018, centralizacion2019, centralizacion2020, centralizacion2021, centralizacion2022]

df_centralizacion = pd.DataFrame({
    'Año': anios,
    'Centralización de Grado': centralizacion_anio})
df_centralizacion

df_centralizacion.set_index('Año', inplace=True)

# Graficar la serie de tiempo
plt.figure(figsize=(10, 6))
plt.plot(df_centralizacion.index, df_centralizacion['Centralización de Grado'], marker='o', linestyle='-', color='b')
plt.xlabel('Año')
plt.ylabel('Centralización de Grado')
plt.title('Serie de Tiempo de Centralización de Grado Uruguay (2017-2022)')
plt.grid(True)
plt.show()

NameError: name 'centralizacion2018' is not defined

### Asortatividad

In [22]:
asort_anio = [asort2017, asort2018, asort2019, asort2020, asort2021, asort2022]

df_asort = pd.DataFrame({
    'Año': anios,
    'Asortatividad': asort_anio})

df_asort.set_index('Año', inplace=True)

# Graficar la serie de tiempo
plt.figure(figsize=(10, 6))
plt.plot(df_asort.index, df_asort['Asortatividad'], marker='o', linestyle='-', color='b')
plt.xlabel('Año')
plt.ylabel('Asortatividad')
plt.title('Serie de Tiempo de Asortatividad Uruguay (2017-2022)')
plt.grid(True)
plt.show()

NameError: name 'asort2018' is not defined

### Transitividad

In [23]:
trans_anio = [trans2017, trans2018, trans2019, trans2020, trans2021, trans2022]

df_asort = pd.DataFrame({
    'Año': anios,
    'Transitividad': trans_anio})

df_asort.set_index('Año', inplace=True)

# Graficar la serie de tiempo
plt.figure(figsize=(10, 6))
plt.plot(df_asort.index, df_asort['Transitividad'], marker='o', linestyle='-', color='b')
plt.xlabel('Año')
plt.ylabel('Transitividad')
plt.title('Serie de Tiempo de Transitividad Uruguay (2017-2022)')
plt.grid(True)
plt.show()

NameError: name 'trans2018' is not defined

### Comparación métricas


In [24]:
df = pd.DataFrame({
    'Año': anios,
    'Densidad': densidad_anio,
    'Centralización de Grado': centralizacion_anio,
    'Asortatividad': asort_anio,
    'Transitividad': trans_anio
})

df.set_index('Año', inplace=True)

plt.figure(figsize=(12, 7))
plt.plot(df.index, df['Densidad'], marker='o', label='Densidad')
plt.plot(df.index, df['Centralización de Grado'], marker='s', label='Centralización de Grado')
plt.plot(df.index, df['Asortatividad'], marker='^', label='Asortatividad')
plt.plot(df.index, df['Transitividad'], marker='d', label='Transitividad')

plt.xlabel('Año')
plt.ylabel('Valor')
plt.title('Evolución de Métricas de Red Uruguaya (2017-2022)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

NameError: name 'densidad_anio' is not defined