In [7]:
pip install pandas numpy networkx scikit-learn scipy

Collecting pandas
  Downloading pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (91 kB)
Collecting numpy
  Downloading numpy-2.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (6.6 kB)
Collecting networkx
  Downloading networkx-3.6.1-py3-none-any.whl.metadata (6.8 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.8.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (11 kB)
Collecting scipy
  Downloading scipy-1.16.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (62 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting joblib>=1.3.0 (from scikit-learn)
  Downloading joblib-1.5.3-py3-none-any.whl.metadata (5.5 kB)
Collecting threadpoolctl>=3.2.0 (from scikit-learn)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (12.4

In [8]:
import os
import numpy as np
import pandas as pd
import networkx as nx
from sklearn.metrics import roc_auc_score, average_precision_score
from sklearn.feature_selection import mutual_info_classif
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from scipy.stats import ttest_rel, wilcoxon, f_oneway

In [9]:
TRUST_PATH = "filmtrust_data/trust.txt" 
OUTPUT_DIR = "outputs_filmtrust"
os.makedirs(OUTPUT_DIR, exist_ok=True)

W_VALUES = [0.6, 0.7, 0.8, 0.9]
EPS = 1e-15

In [10]:
def load_filmtrust(path):
    # FilmTrust format: [TrusterID] [TrusteeID] [Value]
    df = pd.read_csv(path, sep=' ', header=None, names=["u", "v", "label"])
    # Standardizing labels to binary
    df['label'] = (df['label'] >= 1).astype(int)
    return df

df = load_filmtrust(TRUST_PATH)
y = df.label.values
print(f"[OK] Loaded FilmTrust | edges: {len(df)}")

[OK] Loaded FilmTrust | edges: 1853


In [11]:
G = nx.DiGraph()
for _, r in df.iterrows():
    G.add_edge(r.u, r.v)
UG = G.to_undirected()

In [12]:
rows = []
for _, r in df.iterrows():
    # Connectivity features
    cn = len(list(nx.common_neighbors(UG, r.u, r.v))) if UG.has_node(r.u) and UG.has_node(r.v) else 0
    
    try:
        jaccard = next(nx.jaccard_coefficient(UG, [(r.u, r.v)]))[2]
        adamic = next(nx.adamic_adar_index(UG, [(r.u, r.v)]))[2]
    except (nx.NetworkXError, StopIteration):
        jaccard, adamic = 0, 0

    rows.append({
        "u": r.u, "v": r.v, "label": r.label,
        "u_in": G.in_degree(r.u), "u_out": G.out_degree(r.u),
        "v_in": G.in_degree(r.v), "v_out": G.out_degree(r.v),
        "cn": cn, "jaccard": jaccard, "adamic": adamic,
        "pa": G.degree(r.u) * G.degree(r.v)
    })

feature_df = pd.DataFrame(rows).fillna(0)

In [13]:
node_cols = ["u_in", "u_out", "v_in", "v_out"]
link_cols = ["jaccard", "adamic", "pa", "cn"]

# Scaling
for c in node_cols:
    feature_df[c] = np.log1p(feature_df[c])
feature_df[node_cols] = StandardScaler().fit_transform(feature_df[node_cols])

feature_df["pa"] = np.log1p(feature_df["pa"])
feature_df[link_cols] = MinMaxScaler().fit_transform(feature_df[link_cols])

X = feature_df[node_cols + link_cols]

# Reliability logic
if len(np.unique(y)) > 1:
    auc_scores = {c: roc_auc_score(y, X[c]) for c in X.columns}
    mi_vals = mutual_info_classif(X, y, random_state=0)
else:
    # If FilmTrust only has positive trust, we use default weights
    auc_scores = {c: 1.0 for c in X.columns}
    mi_vals = np.ones(len(X.columns))

mi_norm = dict(zip(X.columns, MinMaxScaler().fit_transform(mi_vals.reshape(-1,1)).flatten()))

In [14]:
def sigmoid(x): return 1 / (1 + np.exp(-x))

metrics = []
for w in W_VALUES:
    comp = {f: w * auc_scores[f] + (1 - w) * mi_norm[f] for f in X.columns}
    alpha = {k: v / (sum(comp[f] for f in node_cols) + EPS) for k, v in comp.items() if k in node_cols}
    beta  = {k: v / (sum(comp[f] for f in link_cols) + EPS) for k, v in comp.items() if k in link_cols}

    z = (X[node_cols].values @ np.array(list(alpha.values())) +
         X[link_cols].values @ np.array(list(beta.values())))

    probs = sigmoid(z)
    loss = -(y * np.log(probs + EPS) + (1 - y) * np.log(1 - probs + EPS))

    metrics.append([w, 
                    roc_auc_score(y, probs) if len(np.unique(y)) > 1 else 0,
                    average_precision_score(y, probs) if len(np.unique(y)) > 1 else 0,
                    loss.mean()])

# Save
pd.DataFrame(metrics, columns=["w","AUC","AP","LogLoss"]).to_csv(f"{OUTPUT_DIR}/ft_results.csv", index=False)
print(f"=== PIPELINE COMPLETED ===")

=== PIPELINE COMPLETED ===
