## Load Data

In [2]:
from scripts.preprocess_data import *
# Load, clean and normalize the df
df = load_data("dataset.json")
df = clean_data(df)
df = normalize_data(df)


In [19]:
import pandas as pd
import networkx as nx

relations = pd.read_csv("graph.csv")

## Co-citation and Biblographic Coupling

In [None]:
# Build a graph from edge list
G = nx.from_pandas_edgelist(relations, source='source', target='target')

# Co-citation: Nodes sharing common neighbors
co_citation = {}
for node in G.nodes:
    neighbors = list(G.neighbors(node))
    co_citation[node] = {n: len(set(neighbors) & set(G.neighbors(n))) for n in neighbors}

# Bibliographic coupling: Nodes sharing common connections
bib_coupling = {}
for node in G.nodes:
    connected = [n for n in G.nodes if G.has_edge(node, n)]
    bib_coupling[node] = {n: len(set(connected) & set([nb for nb in G.neighbors(n)])) for n in connected}

print("Co-Citation:", co_citation)
print("Bibliographic Coupling:", bib_coupling)


## Cycles & Acyclic Networks

In [None]:
is_acyclic = nx.is_directed_acyclic_graph(G)  # If directed
print("Acyclic:", is_acyclic)
cycles = list(nx.simple_cycles(G)) if not is_acyclic else []
print("Cycles:", cycles)


## Hypergraphs

In [20]:
import hypernetx as hnx

# Group targets by source
hyper_edges = relations.groupby('source')['target'].apply(list).to_dict()

# Create the hypergraph
H = hnx.Hypergraph(hyper_edges)

# Inspect the hypergraph
print("Hypergraph Nodes:", H.nodes)
print("Hypergraph Edges:", H.edges)



Hypergraph Nodes: <hypernetx.classes.hyp_view.HypergraphView object at 0x129c1e4e0>
Hypergraph Edges: <hypernetx.classes.hyp_view.HypergraphView object at 0x129c1cb90>


In [27]:
#print(hyper_edges)
print(H.edges[1084949174])

[1050463558, 1002148361, 1000741574, 1019020642, 1044910984, 1032965820, 1045532631, 1062570867, 1093616433, 1046494658, 1071753837, 1005890294, 1088712592, 1087450175, 1059839736, 1069823047, 1026118838, 1073919686, 1060428001, 1050607585, 1001691761, 1070498956, 1071870955, 1035595201, 1051971605, 1015062812, 1034483550, 1029385317, 1038596028, 1051729820, 1004123432, 1012378934, 1079898450, 1063959866, 1054238235, 1082653249, 1061759881, 1072398557, 1045545909, 1040044735, 1051921689, 1068393016, 1039859493, 1014561778, 1030635339, 1006065364]


In [23]:
import matplotlib.pyplot as plt
# List all nodes
print("Nodes in Hypergraph:")
for node in H.nodes:
    print(node)

# List all edges and their associated nodes
print("\nEdges in Hypergraph:")
for edge in H.edges:
    print(f"Edge {edge}: Members {H.edges[edge]}")

# Draw the hypergraph
#hnx.draw(H, with_edge_labels=True)
#plt.show()

Nodes in Hypergraph:
1050461344
1002884435
1036242859
1029214256
1006326035
1001084736
1023104358
1084536257
1051353372
1000365252
1093760914
1027640322
1054131785
1060922925
1028024076
1098182380
1077090669
1088290947
1049083372
1084792495
5902349869
1085657302
1005258925
1077585823
1010058843
1095144966
1053619964
1003240794
1088169096
1010626427
1093026435
1017004092
1025211230
1016134983
1074610708
1059380352
1037712021
1052250564
1054862743
1081298254
1033202867
1034241676
1028918733
1012293123
1099923757
1094999359
1067197288
1018891530
1043210746
1035626982
1095976791
1039861172
1004063287
1041120696
1026528730
1071859046
1053353253
1039536260
1002671341
1008168825
1044010125
1091359628
1096291640
1077477370
1096463502
1020058261
1075218379
1062888768
1085218331
1013745680
1050255454
1033666756
1028815652
1098047840
1051521309
1025107843
1094123605
1045130298
1070418672
1047005782
1007851165
1080023020
1080425450
1014921248
1041120762
1086710425
1019722343
1006427114
1014427304


AttributeError: 'NoneType' object has no attribute 'members'

In [22]:
print(relations.head())

       source      target  weight
0  1084949174  1050463558       1
1  1084949174  1002148361       1
2  1084949174  1000741574       1
3  1084949174  1019020642       1
4  1084949174  1044910984       1


## Degree and Density

In [None]:
degree = dict(G.degree())
density = nx.density(G)
print("Degree:", degree)
print("Density:", density)

## Centrality Measures

In [None]:
degree_centrality = nx.degree_centrality(G)
print("Degree Centrality:", degree_centrality)


In [None]:
eigenvector_centrality = nx.eigenvector_centrality(G)
print("Eigenvector Centrality:", eigenvector_centrality)


In [None]:
katz_centrality = nx.katz_centrality(G, alpha=0.1, beta=1.0)
pagerank = nx.pagerank(G)
print("Katz Centrality:", katz_centrality)
print("PageRank:", pagerank)


In [None]:
closeness_centrality = nx.closeness_centrality(G)
print("Closeness Centrality:", closeness_centrality)


In [None]:
betweenness_centrality = nx.betweenness_centrality(G)
print("Betweenness Centrality:", betweenness_centrality)
