<h1 style="text-align:center;"><strong>Collaboration Strength Measure Calculation<strong></h1>

### Importar librerías

In [1]:
from py2neo import Graph
import time
import pandas as pd

### Configurar la conexión a la base de datos de Neo4j

In [2]:
graph = Graph("bolt://localhost:7687", auth=("neo4j", "narias"))

### Obtener los autores que se relacionan a través de artículos y su número de autores

In [3]:
query = """
match (au1:Author)-[:WROTE]-(ar:Article)-[:WROTE]-(au2:Author)
WHERE id(au1) < id(au2)
return au1.scopus_id, ar.author_count, au2.scopus_id
"""
res = graph.run(query).to_table()

### Cargar la data en un dataframe

In [6]:
data = res
df = pd.DataFrame(data, columns =['au1', 'au_count', 'au2'])

In [7]:
df

Unnamed: 0,au1,au_count,au2
0,55981620200,6,57216628614
1,51964445400,7,57221733500
2,57194568539,5,8905349600
3,57194568539,6,8905349600
4,57202822939,85,57202822886
...,...,...,...
162228,57189324222,6,57192640335
162229,57192646879,6,57192640335
162230,57192644219,6,57192640335
162231,56565030800,3,57192640335


### Obtener la fuerza de colaboración entre autores

In [8]:
df['collab_strength'] = df['au_count'].apply(lambda x: 1/(int(x)-1))

In [9]:
df

Unnamed: 0,au1,au_count,au2,collab_strength
0,55981620200,6,57216628614,0.200000
1,51964445400,7,57221733500,0.166667
2,57194568539,5,8905349600,0.250000
3,57194568539,6,8905349600,0.200000
4,57202822939,85,57202822886,0.011905
...,...,...,...,...
162228,57189324222,6,57192640335,0.200000
162229,57192646879,6,57192640335,0.200000
162230,57192644219,6,57192640335,0.200000
162231,56565030800,3,57192640335,0.500000


### Obtener la suma de la fuerza de colaboración entre autores

In [10]:
df1 = df.groupby(['au1','au2']).agg({'collab_strength': 'sum'}).reset_index()

In [11]:
df1

Unnamed: 0,au1,au2,collab_strength
0,10040712400,15842091200,0.750000
1,10040712400,18133417900,0.200000
2,10040712400,25522363800,0.250000
3,10040712400,35087917700,0.666667
4,10040712400,35298881500,0.666667
...,...,...,...
118294,9942669300,57210120494,0.500000
118295,9942669300,57215195500,1.333333
118296,9942669300,57222231973,0.500000
118297,9942669300,57225265478,0.500000


#### Ejemplo de fuerza de colaboración de la autora Lorena Recalde y sus coautores

In [12]:
df1.loc[df1['au2'] == '57193901649']

Unnamed: 0,au1,au2,collab_strength
9532,36610928700,57193901649,0.5
19964,56209050900,57193901649,1.333333
21567,56442195300,57193901649,2.166667
29962,57189242187,57193901649,0.833333
54283,57202686559,57193901649,0.333333
54400,57202714657,57193901649,0.333333
57678,57203185972,57193901649,1.0
93672,57222629713,57193901649,0.833333
100930,57297418300,57193901649,0.5
100951,57297831900,57193901649,0.333333


###  Definir la ruta del directorio

In [3]:
directorio = "13-07-2022"

###  Almacenar la data en Pickle y CSV

In [14]:
df1.to_pickle('data/completo/'+directorio+'/collab_strength.pkl')

In [15]:
df1.to_csv('data/completo/'+directorio+'/collab_strength.csv')

### Definir las rutas de los archivo que contienen la data

In [3]:
base_path = "file:///T:/Tesis/API-Elsevier/src/data/completo/13-07-2022/"

In [4]:
collab_strengths_path = base_path+"collab_strength.csv"
print(collab_strengths_path)

file:///T:/Tesis/API-Elsevier/src/data/completo/13-07-2022/collab_strength.csv


### Cargar la data en Neo4j

In [5]:
#collab_strength
query = """
USING PERIODIC COMMIT 500
LOAD CSV WITH HEADERS 
FROM """+'"'+collab_strengths_path+'"'+""" AS csvLine
MATCH (au1:Author {scopus_id: csvLine.au1}), 
(au2:Author {scopus_id: csvLine.au2})
CREATE (au1)-[r:CO_AUTHORED{collab_strength:csvLine.collab_strength}]->(au2)
RETURN count(r)
"""
graph.run(query)

count(r)
118299
