In [None]:
import csv
import json
import itertools
import networkx as nx
from networkx.readwrite import json_graph

# Path to your CSV file
csv_path = 'data_scopus.csv'
G = nx.Graph()

# Helper function to parse author name and country
def get_author_country(author_affiliations):
    countries = []
    affiliations = []
    authors = author_affiliations.strip().split(';')  # Split each author by semicolon
    
    for author in authors:
        if author.strip():  # Skip empty entries
            # Split from the right to isolate the country (last part)
            parts = author.strip().rsplit(',', 1)
            country = parts[1].strip() if len(parts) == 2 and parts[1].strip() else "None"
            countries.append(country)

            # Remove the author's name (first two parts after splitting by comma)
            affiliation_parts = author.split(',', 2)
            if len(affiliation_parts) >= 3:
                affiliation = affiliation_parts[2].strip()  # Get everything after the name
            else:
                affiliation = affiliation_parts[-1].strip()  # In case of unexpected structure
            
            affiliations.append(affiliation)  # Add cleaned affiliation to the list
    
    print(affiliations)

    return countries, affiliations


# Initialize nodes and links lists
nodes = []
links = []
node_ids = []  # To keep track of unique author IDs and avoid duplicates
edges = []

# Step 1: Parse the CSV file and build nodes and links
with open(csv_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    
    for row in reader:
        author_countries, author_affiliations = get_author_country(row['Authors with affiliations'])
        authors = row['Authors'].split(',')
        authors_id = [author_id.strip() for author_id in row['Author(s) ID'].split(';') if author_id.strip()]
        publication_id = row['EID']  # Unique identifier for each publication
        if(author_countries == [] or row['Year'] == [] or authors == []):
            continue;
        # Step 2: Add nodes for each unique author ID
        for i, author_id in enumerate(authors_id):
            author_id = author_id.strip()
            author_name = authors[i].strip()
            country = author_countries[i].strip()
            affiliation = author_affiliations[i].strip()

            if author_id not in node_ids:
                # Add new node if author ID is unique
                node = (author_id,{
                    'name': author_name,
                    'country': country,
                    'affiliation': affiliation,
                })
                nodes.append(node)
                node_ids.append(author_id)
                        
        for author_pair in itertools.combinations(authors_id, 2):
            if (author_pair not in edges) and (author_pair[::-1] not in edges):  # Prevent duplicate edges
                edges.append(author_pair)

G = nx.Graph()                
G.add_nodes_from(nodes)
G.add_edges_from(edges)


graphml_path = 'author_network.graphml'
nx.write_graphml(G, graphml_path)

pos = nx.drawing.spring_layout(G)
nx.draw(G,pos = pos, node_size=40)
# Step 4: Save nodes and links into a JSON file

json_path = 'author_network.json'  # Destination for your JSON file
with open(json_path, 'w', encoding='utf-8') as f:
    json.dump(json_graph.node_link_data(G),f)
    
print("JSON file with author countries has been created successfully!")