In [1]:
# Importing the necessary libraries
import pandas as pd
import networkx as nx
from itertools import combinations
import json
from networkx.readwrite import json_graph


In [28]:
#Read The CSV FILE
Dt = pd.read_csv('data_scopus.csv')

In [29]:
#Remove the uncessary colums
Uncessary_columns = ['Conference name', 'Conference date', 'Abstract', 'Abbreviated Source Title']
Dt.drop(columns=Uncessary_columns, inplace=True)

In [30]:
# Remove the rows with missing values in sepecific columns
Specific_columns = ['Authors', 'Authors with affiliations', 'Year', 'Publisher']
Dt = Dt.dropna(subset=Specific_columns).copy()

In [31]:
#Fill NaN Values in 'Cited by' with 0
Dt['Cited by'].fillna(0, inplace=True)


In [32]:
#Display Top 5 rows
Dt.head(5)

Unnamed: 0,Title,Year,EID,Publisher,Authors,Author(s) ID,Authors with affiliations,Source title,Cited by
0,Virtual reality applications for the built env...,2020,2-s2.0-85086464158,Elsevier B.V.,"Zhang Y., Liu H., Kang S.-C., Al-Hussein M.",57202536919;53984550800;57215426514;6603541102;,"Zhang, Y., Department of Civil and Environment...",Automation in Construction,0.0
1,"Self-tracking while doing sport: Comfort, moti...",2020,2-s2.0-85082875828,Academic Press,"Rapp A., Tirabeni L.",23398572100;57191836654;,"Rapp, A., Computer Science Department, Univers...",International Journal of Human Computer Studies,0.0
2,"Bridge damage: Detection, IFC-based semantic e...",2020,2-s2.0-85078194587,Elsevier B.V.,"Isailović D., Stojanovic V., Trapp M., Richter...",57205293410;56421700900;24831175200;3619515940...,"Isailović, D., Department for Construction Pro...",Automation in Construction,0.0
3,VR system for spatio-temporal visualization of...,2019,2-s2.0-85075706132,Springer,"Okada K., Yoshida M., Itoh T., Czauderna T., S...",57201502480;57171240600;35373203100;2592569550...,"Okada, K., Ochanomizu University, Tokyo, Japan...",Multimedia Tools and Applications,0.0
4,DiseaSE: A biomedical text analytics system fo...,2019,2-s2.0-85074886243,Academic Press Inc.,"Abulaish M., Parwez M.A., Jahiruddin",6505934038;57202719622;35590173900;,"Abulaish, M., Department of Computer Science, ...",Journal of Biomedical Informatics,0.0


In [33]:
def first_author(x):
    try:
        Countries = [aff.split(",")[-1].strip() for aff in x.split(";")]
        return ','.join(Countries)
    except Exception as ex:
        print(x)
        


In [34]:
Dt['Author_Countries'] = Dt['Authors with affiliations'].apply(first_author)
Dt.head()

Unnamed: 0,Title,Year,EID,Publisher,Authors,Author(s) ID,Authors with affiliations,Source title,Cited by,Author_Countries
0,Virtual reality applications for the built env...,2020,2-s2.0-85086464158,Elsevier B.V.,"Zhang Y., Liu H., Kang S.-C., Al-Hussein M.",57202536919;53984550800;57215426514;6603541102;,"Zhang, Y., Department of Civil and Environment...",Automation in Construction,0.0,"Canada,United States,Canada,Canada"
1,"Self-tracking while doing sport: Comfort, moti...",2020,2-s2.0-85082875828,Academic Press,"Rapp A., Tirabeni L.",23398572100;57191836654;,"Rapp, A., Computer Science Department, Univers...",International Journal of Human Computer Studies,0.0,"Italy,Italy"
2,"Bridge damage: Detection, IFC-based semantic e...",2020,2-s2.0-85078194587,Elsevier B.V.,"Isailović D., Stojanovic V., Trapp M., Richter...",57205293410;56421700900;24831175200;3619515940...,"Isailović, D., Department for Construction Pro...",Automation in Construction,0.0,"Serbia,Germany,Germany,Germany,Serbia,Germany"
3,VR system for spatio-temporal visualization of...,2019,2-s2.0-85075706132,Springer,"Okada K., Yoshida M., Itoh T., Czauderna T., S...",57201502480;57171240600;35373203100;2592569550...,"Okada, K., Ochanomizu University, Tokyo, Japan...",Multimedia Tools and Applications,0.0,"Japan,Japan,Japan,Australia,Australia"
4,DiseaSE: A biomedical text analytics system fo...,2019,2-s2.0-85074886243,Academic Press Inc.,"Abulaish M., Parwez M.A., Jahiruddin",6505934038;57202719622;35590173900;,"Abulaish, M., Department of Computer Science, ...",Journal of Biomedical Informatics,0.0,"India,India,India"


In [35]:
import networkx as nx

# Create a graph
G = nx.Graph()

# Dictionary to store nodes with author_id as the key
nodes_dict = {}

# Iterate through the DataFrame
for _, row in Dt.iterrows():
    author_ids = row['Author(s) ID'].split(';')
    countries = row['Author_Countries'].split(',')
    names = row['Authors'].split(',')
    i = -1
    for author_id in author_ids:
        if author_id not in nodes_dict:
            nodes_dict[author_id] = {
                "Name": names[i].strip(),
                "country": countries[i],
                "num_publications": 0,
                "num_citations": 0,
                "degree": 0,
                "id": author_id
            }
       
        i += 1

        # Update node information
        nodes_dict[author_id]["num_publications"] += 1
        nodes_dict[author_id]["num_citations"] += row['Cited by']
        nodes_dict[author_id]["degree"] += len(author_ids) - 1

# Convert the dictionary values to a list
nodes = list(nodes_dict.values())

print(nodes)

[{'Name': 'Al-Hussein M.', 'country': 'Canada', 'num_publications': 1, 'num_citations': 0.0, 'degree': 4, 'id': '57202536919'}, {'Name': 'Zhang Y.', 'country': 'Canada', 'num_publications': 2, 'num_citations': 19.0, 'degree': 8, 'id': '53984550800'}, {'Name': 'Liu H.', 'country': 'United States', 'num_publications': 1, 'num_citations': 0.0, 'degree': 4, 'id': '57215426514'}, {'Name': 'Kang S.-C.', 'country': 'Canada', 'num_publications': 2, 'num_citations': 19.0, 'degree': 8, 'id': '6603541102'}, {'Name': 'Al-Hussein M.', 'country': 'Canada', 'num_publications': 341, 'num_citations': 4673.0, 'degree': 1133, 'id': ''}, {'Name': 'Tirabeni L.', 'country': 'Italy', 'num_publications': 2, 'num_citations': 50.0, 'degree': 4, 'id': '23398572100'}, {'Name': 'Rapp A.', 'country': 'Italy', 'num_publications': 1, 'num_citations': 0.0, 'degree': 2, 'id': '57191836654'}, {'Name': 'Döllner J.', 'country': 'Germany', 'num_publications': 1, 'num_citations': 0.0, 'degree': 6, 'id': '57205293410'}, {'Na

In [38]:
for node in nodes:
    G.add_node(node["id"], **node)

print(G.nodes(Dt=True))


TypeError: NodeView.__call__() got an unexpected keyword argument 'Dt'

In [24]:
edges = []

for _, row in Dt.iterrows():
    if ";" in row['Author(s) ID']:
        authors = row['Author(s) ID'][:-1].split(";")
        for i, author1 in enumerate(authors):
            for j in range(i + 1, len(authors)):
                edges.append((author1, authors[j]))
    else:
        continue

In [25]:
G.add_edges_from(edges)
print(G.edges(data=True))

[('57202536919', '53984550800', {}), ('57202536919', '57215426514', {}), ('57202536919', '6603541102', {}), ('53984550800', '57215426514', {}), ('53984550800', '6603541102', {}), ('53984550800', '57204697057', {}), ('53984550800', '55642611800', {}), ('57215426514', '6603541102', {}), ('6603541102', '57204697057', {}), ('6603541102', '55642611800', {}), ('23398572100', '57191836654', {}), ('23398572100', '8216952800', {}), ('57205293410', '56421700900', {}), ('57205293410', '24831175200', {}), ('57205293410', '36195159400', {}), ('57205293410', '6507346536', {}), ('57205293410', '6602981892', {}), ('56421700900', '24831175200', {}), ('56421700900', '36195159400', {}), ('56421700900', '6507346536', {}), ('56421700900', '6602981892', {}), ('24831175200', '36195159400', {}), ('24831175200', '6507346536', {}), ('24831175200', '6602981892', {}), ('36195159400', '6507346536', {}), ('36195159400', '6602981892', {}), ('6507346536', '6602981892', {}), ('57201502480', '57171240600', {}), ('57201

In [26]:
with open("publication_network.json",'w') as f:
  json.dump(json_graph.node_link_data(G),f)


nx.write_gml(G,"network.gml")