In [23]:
import json
import networkx as nx
from collections import Counter

In [14]:
# Load data
with open("data/tmdb_movies.json", "r", encoding="utf-8") as f:
    movies = json.load(f)

with open("data/recommended_movie_data.json", "r", encoding="utf-8") as f:
    recommended_data = json.load(f)

# Build lookups
recommended_lookup = {movie["id"]: movie for movie in recommended_data}
movie_dict = {movie["id"]: movie for movie in movies}

# Merge recommended movies into a unified list (if not already present)
all_movies = {**movie_dict, **recommended_lookup}

In [15]:
G = nx.read_gexf("networks/movies.gexf")

centralities = {
    "degree": nx.degree_centrality(G),
    "betweenness": nx.betweenness_centrality(G),
    "closeness": nx.closeness_centrality(G),
    "eigenvector": nx.eigenvector_centrality(G),
    "pagerank": nx.pagerank(G),
}

In [19]:
for name, scores in centralities.items():
    top = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:5]
    print(f"{name.title()} Centrality Top 3:")

    for idx, (node_id, value) in enumerate(top, start=1):
        label = G.nodes[node_id].get('label', node_id)
        genres = G.nodes[node_id].get('genres', node_id)
        print(f"  {idx}. {label}: {value}  {genres}")

Degree Centrality Top 3:
  1. The Shawshank Redemption: 0.045489891135303266  Drama,Crime
  2. Joker: 0.04354587869362364  Drama,Crime,Thriller
  3. Interstellar: 0.04315707620528771  Science Fiction,Drama,Adventure
  4. Parasite: 0.03926905132192846  Drama,Comedy,Thriller
  5. The Godfather: 0.03693623639191291  Drama,Crime
Betweenness Centrality Top 3:
  1. 12 Angry Men: 0.02490521848287073  Drama
  2. Parasite: 0.022140816252467554  Drama,Comedy,Thriller
  3. Back to the Future: 0.019329033734161234  Science Fiction,Adventure,Comedy
  4. The Lives of Others: 0.017377478858977058  Drama,Thriller
  5. The Shawshank Redemption: 0.015954640118550616  Drama,Crime
Closeness Centrality Top 3:
  1. Fight Club: 0.24879556900546104  Drama
  2. Inception: 0.24460350673016173  Science Fiction,Adventure,Action
  3. Back to the Future: 0.24207848376497476  Science Fiction,Adventure,Comedy
  4. Pulp Fiction: 0.24131200835463712  Crime,Thriller
  5. The Dark Knight: 0.24061365737018542  Drama,Actio

In [30]:
for name, scores in centralities.items():
    top = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    genre_counter = Counter()
    for node_id, value in top:
        genres = G.nodes[node_id].get('genres', [])
        genre_counter.update(genres.split(","))
    total = sum(genre_counter.values())
    print(f"{name.title()} Centrality Top Genres (Normalized):")
    for genre, count in genre_counter.most_common(5):
        print(f"  {genre}: {count / total:.2f}")
    print()

Degree Centrality Top Genres (Normalized):
  Drama: 0.22
  Comedy: 0.11
  Thriller: 0.09
  Action: 0.08
  Romance: 0.07

Betweenness Centrality Top Genres (Normalized):
  Drama: 0.22
  Comedy: 0.11
  Thriller: 0.09
  Action: 0.08
  Romance: 0.07

Closeness Centrality Top Genres (Normalized):
  Drama: 0.22
  Comedy: 0.11
  Thriller: 0.09
  Action: 0.08
  Romance: 0.07

Eigenvector Centrality Top Genres (Normalized):
  Drama: 0.22
  Comedy: 0.11
  Thriller: 0.09
  Action: 0.08
  Romance: 0.07

Pagerank Centrality Top Genres (Normalized):
  Drama: 0.22
  Comedy: 0.11
  Thriller: 0.09
  Action: 0.08
  Romance: 0.07



In [4]:
C = nx.read_gexf("networks/countries.gexf")

centralities_c = {
    "degree": nx.degree_centrality(C),
    "betweenness": nx.betweenness_centrality(C),
    "closeness": nx.closeness_centrality(C),
    "eigenvector": nx.eigenvector_centrality(C),
    "pagerank": nx.pagerank(C),
}

In [12]:
for name, scores in centralities_c.items():
    top = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:5]
    print(f"{name.title()} Centrality Top 3:")
    for idx, (node_id, value) in enumerate(top, start=1):
        label = C.nodes[node_id].get('label', node_id)
        print(f"  {idx}. {label}: {value}")

Degree Centrality Top 3:
  1. us: 0.9875
  2. gb: 0.8625
  3. fr: 0.8250000000000001
  4. it: 0.7250000000000001
  5. ca: 0.7125
Betweenness Centrality Top 3:
  1. us: 0.23472577654238622
  2. gb: 0.09221497686386498
  3. fr: 0.08681991745772966
  4. it: 0.05998860316620501
  5. ca: 0.03887878009812863
Closeness Centrality Top 3:
  1. us: 0.9876543209876543
  2. gb: 0.8791208791208791
  3. fr: 0.851063829787234
  4. it: 0.7843137254901961
  5. ca: 0.7766990291262136
Eigenvector Centrality Top 3:
  1. us: 0.2289208071610199
  2. gb: 0.22264957373801847
  3. fr: 0.2174890384457987
  4. ca: 0.20659111353922135
  5. it: 0.20277710679002164
Pagerank Centrality Top 3:
  1. us: 0.30481265181651634
  2. gb: 0.13518687245120523
  3. fr: 0.0674944344465196
  4. de: 0.04900907871525968
  5. ca: 0.033889300526641866
