In [2]:
%pip install networkx

Collecting networkx
  Downloading networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Downloading networkx-3.4.2-py3-none-any.whl (1.7 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m30.0 MB/s[0m  [33m0:00:00[0m
Installing collected packages: networkx
Successfully installed networkx-3.4.2
Note: you may need to restart the kernel to use updated packages.


In [8]:
from pathlib import Path
import sys
import importlib

PROJECT_ROOT = Path("/Users/sajanshergill/Machine-Learning-Projects/Trust-Graph")
sys.path.insert(0, str(PROJECT_ROOT))

import src.config
importlib.reload(src.config)
from src.config import Config

from src.data_loader import load_yelp_minimal

cfg = Config(
    data_dir=Path("/Users/sajanshergill/Machine-Learning-Projects/Trust-Graph/data/yelp"),
    max_reviews=50_000,
    seed=42,
    burst_window_days=14
)

reviews, users, businesses = load_yelp_minimal(
    data_dir=cfg.data_dir,
    review_file="yelp_academic_dataset_review.json",
    user_file="yelp_academic_dataset_user.json",
    business_file="yelp_academic_dataset_business.json",
    max_reviews=cfg.max_reviews,
    seed=cfg.seed,
    keep_text=False
)

print("Reviews loaded:", reviews.shape)


Reading yelp_academic_dataset_review.json: 6990280it [00:18, 384009.87it/s]
Reading yelp_academic_dataset_user.json: 1987897it [00:14, 137190.77it/s]
Reading yelp_academic_dataset_business.json: 150346it [00:01, 122793.86it/s]


Reviews loaded: (50000, 5)


In [9]:
import networkx as nx

from src.graph_builder import (
    build_bipartite_review_graph,
    bipartite_projections
)

In [10]:
# Build bipartite graph
G = build_bipartite_review_graph(reviews)

# Projections
Guu, Gbb = bipartite_projections(G)

# Print stats
print("Bipartite graph:", G.number_of_nodes(), G.number_of_edges())
print("User–User graph:", Guu.number_of_nodes(), Guu.number_of_edges())
print("Business–Business graph:", Gbb.number_of_nodes(), Gbb.number_of_edges())


Bipartite graph: 74562 49977
User–User graph: 44917 67572
Business–Business graph: 29645 8565


In [11]:
# Top connected reviewers (potential “operators”)
top_users = sorted(Guu.degree(weight="weight"), key=lambda x: x[1], reverse=True)[:10]
top_users

[('u:tvsKwSmOQe4XgSu4aSspJQ', 76),
 ('u:kVCqtyHokO6qJA-I_X1KJA', 67),
 ('u:dnwg1hFul3KUZYMZTkKHQw', 67),
 ('u:-dgC1iRp_JqBTmn5d1wRhg', 66),
 ('u:pMu_29HFUtmRvPPjSYMo0g', 64),
 ('u:uWewwXuxfe3Q4Ca2I_l8zw', 64),
 ('u:FLGFtGbXaUo0obR6lnZA1Q', 64),
 ('u:omtZLg0ryIeaJfLTM7JPIA', 64),
 ('u:-yrJh-TX-Z2jde5VJz86lg', 64),
 ('u:yzkVncPHvL567WP6kIUdoQ', 64)]