# main.ipynb

Rationale:
EPFL faculty members participate in numerous collaborative projects wether with other faculty members from Switzerland and abroad and/or companies. These data are precious to map this collaborative network and leverage future groundbraking projects.

Objectives:
1. Concatenate various project data sources (i. SNF, ii. Horizon Europe)
2. Map the collaborative network
3. Describe the network (quantify KOL, find clusters, ...)
4. Find applicability to project Launch (complementary domains, best team, ...)

# Initialisation

In [4]:
import os
import json
import re
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm import tqdm

from src import *


In [5]:
%load_ext autoreload
%autoreload 2

In [None]:
# Set path of the folder containing dataset
dataset_FolderPath = Path.cwd() / 'data'

# SNF Database

## Load Persons

In [None]:
# Load all people in SNF database
Person_filename = 'Person.csv'
Person_columns_to_load = [
    'Surname', 
    'FirstName', 
    'ResearchInstitution', 
    'InstituteCountry', 
    'PersonNumber', 
    'ResponsibleApplicantGrantNumber', 
    'CoApplicantGrantNumber', 
    'ProjectPartnerGrantNumber'
    ]

Person_df = pd.read_csv(dataset_FolderPath / 'SNF' / Person_filename, usecols=Person_columns_to_load, sep=';')
Person_df

Unnamed: 0,Surname,FirstName,InstituteCountry,ResearchInstitution,PersonNumber,ResponsibleApplicantGrantNumber,CoApplicantGrantNumber,ProjectPartnerGrantNumber
0,Schmocker,Beat,Switzerland,"Non-profit organisations (libraries, museums, ...",533578,,,
1,Perrin,Nicolas,Switzerland,University of Lausanne – LA,44113,166323;147625;147091;129894;112511;108100;1003...,142108;41191;34592,
2,Jäger,Peter,,,530171,,,
3,Beck,Jan,Switzerland,University of Basel – BS,541633,151055;135899;119879,,
4,Wittmann,Frank,,,139096,119336;69255,,
...,...,...,...,...,...,...,...,...
141383,Szlajfer,Feliks,,,30076,29427,,
141384,Kosek,Eva,Sweden,,572943,,,182686
141385,Nunez,Rafael,United States of America,,59355,33279,,
141386,Fersztand,Anna Raphaela,Switzerland;Switzerland,University of Zurich – ZH;University of Zurich...,809705,222552,,


In [None]:
# Filter by EPFL members that worked in collab projects

EPFL_faculty_collab = Person_df[
    Person_df['ResearchInstitution'].str.contains('EPF Lausanne – EPFL', na=False) &
    (Person_df[['ResponsibleApplicantGrantNumber', 'CoApplicantGrantNumber', 'ProjectPartnerGrantNumber']].notna().sum(axis=1) >= 2)
    ]
EPFL_faculty_collab


Unnamed: 0,Surname,FirstName,InstituteCountry,ResearchInstitution,PersonNumber,ResponsibleApplicantGrantNumber,CoApplicantGrantNumber,ProjectPartnerGrantNumber
720,Weinand,Yves,Switzerland,EPF Lausanne – EPFL,515936,189596;137884;137704;126802;120037;112103;1037...,205604;182887;141853;127467,
1207,Neronov,Andrii,France;Switzerland;Switzerland,Institution abroad – IACH;EPF Lausanne – EPFL;...,137689,144923;123426,219937;158533;154221;135263;111020,165932;165902
1281,Knott,Graham,Switzerland,EPF Lausanne – EPFL,42271,170082;112335,213528;170955;170767;154453;130470,219656;212233;177237;173125
1400,Jolles-Haeberli,Brigitte,Switzerland;Switzerland;Switzerland;Switzerlan...,University of Lausanne – LA;University of Laus...,122322,166433;104752,184851;137940;120422;120136;105880;64951,
1779,Boillat,Eric,Switzerland,EPF Lausanne – EPFL,42669,155966;132115;109418;68042,66763;54156,
...,...,...,...,...,...,...,...,...
140919,Meyer,Thierry,Switzerland,EPF Lausanne – EPFL,46282,140209;129516;115900;109051;101477;61403,54480;36611,
140943,Pimienta,Lucas Xan,Switzerland,EPF Lausanne – EPFL,711504,190789,,212730
141058,Hughes,Josie,Switzerland;Switzerland,EPF Lausanne – EPFL;EPF Lausanne – EPFL,781436,10001574;226507;221366,,212101
141197,Pralong,William-François,Switzerland;Switzerland;Switzerland,University of Lausanne – LA;EPF Lausanne – EPF...,36406,40511,65404;64149;58682;58663;32376,


### Build network of PIs linked by projects

In [None]:
import networkx as nx
from pyvis.network import Network
from itertools import combinations

In [83]:
# Pre-processing of data
df = EPFL_faculty_collab.copy()
df = df.explode('ResponsibleApplicantGrantNumber').reset_index(drop=True)

df['CoApplicantGrantNumber'] = df['CoApplicantGrantNumber'].str.split(';')
df = df.explode('CoApplicantGrantNumber').reset_index(drop=True)

df['ProjectPartnerGrantNumber'] = df['ProjectPartnerGrantNumber'].str.split(';')
df = df.explode('ProjectPartnerGrantNumber').reset_index(drop=True)

df

###
melted_df = pd.melt(df, id_vars=['Surname', 'FirstName', 'PersonNumber'], value_vars=['ResponsibleApplicantGrantNumber', 'CoApplicantGrantNumber', 'ProjectPartnerGrantNumber'], var_name='ApplicantType', value_name='ProjectID')
melted_df = melted_df.dropna().reset_index(drop=True)
melted_df = melted_df.rename(columns={'PersonNumber': 'PersonID'})

network_data = melted_df

In [None]:
### Using pyvis (DEPRECATED)

# Create a graph
G = nx.Graph()

# Add edges based on the DataFrame
for _, row in network_data.iterrows():
    G.add_node(row['PersonID'], label=f"Person {row['PersonID']}")
    G.add_node(row['ProjectID'], label=f"Project {row['ProjectID']}")
    G.add_edge(row['PersonID'], row['ProjectID'])

# Create a PyVis network
net = Network(notebook=True, height="750px", width="100%")

# Load the NetworkX graph into PyVis
net.from_nx(G)

# Show the interactive network
net.show("example.html", notebook=False)

example.html


## Load projects

In [32]:
# Load all projects in SNF database
Project_filename = 'GrantWithAbstracts.csv'
Project_columns_to_load = [
    'GrantNumber',
    'Title',
    'FundingInstrumentPublished',
    'EffectiveGrantStartDate',
    'EffectiveGrantEndDate',
    'AmountGrantedAllSets',
    'Abstract',
    'State'
]

Project_df = pd.read_csv(dataset_FolderPath / 'SNF' / Project_filename, usecols=Project_columns_to_load, sep=';')
Project_df

Unnamed: 0,GrantNumber,Title,FundingInstrumentPublished,EffectiveGrantStartDate,EffectiveGrantEndDate,AmountGrantedAllSets,Abstract,State
0,172205,IceCube Event Reconstruction,Early Postdoc.Mobility,2017-01-01T00:00:00Z,2018-06-30T00:00:00Z,74145.0,The IceCube observatory at the South Pole is a...,Completed
1,58440,Des images pour agir. Le visuel en urbanisme.,Publication grants,2000-03-07T00:00:00Z,2000-04-06T00:00:00Z,12000.0,,Completed
2,4578,Synthese und Biosynthese von 'Betalain-Merocya...,Project funding,1985-07-01T00:00:00Z,1987-09-30T00:00:00Z,174480.0,,Completed
3,106888,Les manuscrits romains de musique pour le clav...,Fellowships for prospective researchers,2004-12-01T00:00:00Z,2005-07-31T00:00:00Z,25470.0,,Completed
4,152511,Ramsey spectroscopy in Rb vapour cells and app...,SCOPES,2014-06-01T00:00:00Z,2018-10-31T00:00:00Z,104599.0,In this project the teams of Prof. Gaetano Mil...,Completed
...,...,...,...,...,...,...,...,...
88300,6025,Aspects cellulaires et sub-cellulaires de la r...,Project funding,1976-10-01T00:00:00Z,1978-09-30T00:00:00Z,584618.0,,Completed
88301,139309,Medical end-of-life decisions: prevalence and ...,NRP 67 End of Life,2013-03-01T00:00:00Z,2018-01-31T00:00:00Z,640027.0,BackgroundEthical issues in end-of-life decisi...,Completed
88302,194873,"Encountering, occupying and exceeding categori...",Doc.Mobility,2021-10-01T00:00:00Z,2022-09-30T00:00:00Z,51200.0,My Ph.D. dissertation investigates the Latin A...,Completed
88303,28493,Etudes de la flore et de la végétation du Chac...,Project funding,1990-10-01T00:00:00Z,1993-09-30T00:00:00Z,335662.0,,Completed
