In [12]:
import pandas as pd
import networkx as nx
import json
from networkx.readwrite import json_graph
# Load the dataset
df = pd.read_csv("./data_scopus.csv")

# Drop rows with no Author Affiliations
df = df[~pd.isnull(df['Authors with affiliations'])]

# Create a NetworkX graph for the author network
G = nx.Graph()

# Add author nodes to the graph
authors = df['Authors with affiliations'].str.split(';')
for author_list in authors:
    for author_info in author_list:
        author_name = author_info.split(',')[-1].strip()
        G.add_node(author_name)

# Add co-authorship relationships (edges) to the graph
for author_list in authors:
    author_names = [author_info.split(',')[-1].strip() for author_info in author_list]
    if len(author_names) > 1:
        for i in range(len(author_names)):
            for j in range(i + 1, len(author_names)):
                G.add_edge(author_names[i], author_names[j])

# Create a JSON file for visualization
with open("author_network.json", 'w') as f:
    json.dump(json_graph.node_link_data(G), f)

# Optionally, you can save the graph in other formats (e.g., GML) for further analysis
nx.write_gml(G, "author_network.gml")
