In [5]:
from collections import deque, defaultdict
import numpy as np
import pandas as pd
from scipy import sparse
from sklearn.decomposition import TruncatedSVD
from sklearn.cluster import KMeans

In [None]:
# --- Load data
movies = pd.read_csv(
    "ml-1m/ml-1m/movies.dat", sep="::", header=None, engine="python",
    encoding="ISO-8859-1", names=["MovieID", "Title", "Genres"]
)
users = pd.read_csv(
    "ml-1m/ml-1m/users.dat", sep="::", header=None, engine="python",
    names=["UserID", "Gender", "Age", "Occupation", "Zip-code"]
)
ratings = pd.read_csv(
    "ml-1m/ml-1m/ratings.dat", sep="::", header=None, engine="python",
    names=["UserID", "MovieID", "Rating", "Timestamp"]
)

# Reward in [0,1]
ratings["reward"] = (ratings["Rating"] - 1) / 4.0

# Unique IDs
user_ids = ratings["UserID"].unique()
movie_ids = ratings["MovieID"].unique()
u2i = {u: i for i, u in enumerate(user_ids)}
m2i = {m: i for i, m in enumerate(movie_ids)}
i2m = {i: m for m, i in m2i.items()}

ratings["u_idx"] = ratings["UserID"].map(u2i)
ratings["m_idx"] = ratings["MovieID"].map(m2i)

n_users = len(user_ids)
n_items = len(movie_ids)

In [7]:
#############################################
# à comprendre


# --- Matrice sparse user-item (valeurs = reward) ---
R = sparse.coo_matrix(
    (ratings["reward"].values, (ratings["u_idx"].values, ratings["m_idx"].values)),
    shape=(n_users, n_items)
).tocsr()

# --- Features item via SVD ---
d = 30  # dimension latente (à tuner)
svd = TruncatedSVD(n_components=d, random_state=0)
item_factors = svd.fit_transform(R.T)  # (n_items, d)

# Normalisation (souvent utile en LinUCB)
item_factors = item_factors / (np.linalg.norm(item_factors, axis=1, keepdims=True) + 1e-12)

# --- Partition "dynamique" des items : clusters (super-bras) ---
K = 50  # nb de clusters items = nb de "bras"
kmeans = KMeans(n_clusters=K, random_state=0, n_init="auto")
item_cluster = kmeans.fit_predict(item_factors)

cluster_items = {c: np.where(item_cluster == c)[0] for c in range(K)}
cluster_centroids = kmeans.cluster_centers_
cluster_centroids = cluster_centroids / (np.linalg.norm(cluster_centroids, axis=1, keepdims=True) + 1e-12)


In [8]:
class DC3MABLike:
    def __init__(
        self,
        n_users: int,
        item_factors: np.ndarray,
        item_cluster: np.ndarray,
        cluster_items: dict,
        alpha: float = 0.6,      # exploration LinUCB
        lam: float = 1.0,        # regularisation ridge
        edge_init: str = "kmeans",  # "full" (cher) ou "kmeans"
        user_init_clusters: int = 30,
        split_beta: float = 1.0,  # agressivité de séparation des clusters users
        seed: int = 0
    ):
        self.rng = np.random.default_rng(seed)
        self.n_users = n_users
        self.X = item_factors
        self.d = item_factors.shape[1]

        self.item_cluster = item_cluster
        self.cluster_items = cluster_items
        self.K = len(cluster_items)

        self.alpha = alpha
        self.lam = lam
        self.split_beta = split_beta

        # --- Per-user LinUCB stats ---
        self.A = np.array([np.eye(self.d) * lam for _ in range(n_users)])  # (n_users, d, d)
        self.b = np.zeros((n_users, self.d))
        self.t = np.zeros(n_users, dtype=int)  # nb d'updates par user

        # --- Graphe utilisateurs (adjacency list) ---
        self.G = [set() for _ in range(n_users)]
        self._init_graph(edge_init=edge_init, user_init_clusters=user_init_clusters)

    def _init_graph(self, edge_init: str, user_init_clusters: int):
        if edge_init == "full":
            for u in range(self.n_users):
                self.G[u] = set(range(self.n_users)) - {u}
            return

        # init plus scalable: cluster users sur leur facteur latent (SVD côté users)
        # user factors approx: R * V (ici R * components_.T)
        # => on reconstruit vite un embedding users depuis item_factors
        # NOTE: si tu veux mieux, calcule explicitement un SVD user-side.
        # Ici: user_emb = R @ item_factors (proj)
        # (à passer en param si tu préfères)
        # fallback: random partition si trop lourd
        try:
            # approx user embedding: moyenne pondérée des items
            # simple et rapide
            user_emb = np.zeros((self.n_users, self.d))
            # (si R est accessible, tu peux l’injecter via une closure; sinon, random)
            # => on fait random ici pour rester autonome
            user_emb = self.rng.normal(size=(self.n_users, self.d))
            km = KMeans(n_clusters=user_init_clusters, random_state=0, n_init="auto")
            ucl = km.fit_predict(user_emb)
        except Exception:
            ucl = self.rng.integers(0, user_init_clusters, size=self.n_users)

        groups = defaultdict(list)
        for u, c in enumerate(ucl):
            groups[c].append(u)

        for _, us in groups.items():
            s = set(us)
            for u in us:
                self.G[u] = s - {u}

    def _theta_user(self, u: int) -> np.ndarray:
        Ainv = np.linalg.inv(self.A[u])
        return Ainv @ self.b[u]

    def _cluster_of(self, u: int):
        # BFS composante connexe
        seen = set([u])
        q = deque([u])
        while q:
            x = q.popleft()
            for y in self.G[x]:
                if y not in seen:
                    seen.add(y)
                    q.append(y)
        return list(seen)

    def _cluster_model(self, cluster_users):
        A_sum = np.eye(self.d) * 0.0
        b_sum = np.zeros(self.d)
        for v in cluster_users:
            A_sum += self.A[v]
            b_sum += self.b[v]
        Ainv = np.linalg.inv(A_sum + 1e-12*np.eye(self.d))
        theta = Ainv @ b_sum
        return theta, Ainv

    def recommend(self, u: int, L: int = 10, per_cluster: int = 2, candidate_clusters: int = 10):
        """
        Recommande une liste de L items.
        Stratégie:
        - score UCB sur les centroides de clusters items -> top candidate_clusters
        - puis à l’intérieur: pick per_cluster meilleurs items par UCB
        """
        cu = self._cluster_of(u)
        theta_c, Ainv_c = self._cluster_model(cu)

        # UCB sur centroides => choisir des "super-bras"
        cent = np.array([self.X[self.cluster_items[k]].mean(axis=0) for k in range(self.K)])
        cent = cent / (np.linalg.norm(cent, axis=1, keepdims=True) + 1e-12)

        mu = cent @ theta_c
        unc = np.sqrt(np.einsum("ij,jk,ik->i", cent, Ainv_c, cent))
        ucb = mu + self.alpha * unc

        topK = np.argsort(-ucb)[:candidate_clusters]

        recs = []
        for k in topK:
            items = self.cluster_items[k]
            if len(items) == 0:
                continue

            Xk = self.X[items]
            mu_i = Xk @ theta_c
            unc_i = np.sqrt(np.einsum("ij,jk,ik->i", Xk, Ainv_c, Xk))
            ucb_i = mu_i + self.alpha * unc_i

            best = items[np.argsort(-ucb_i)[:per_cluster]]
            for it in best:
                recs.append(int(it))
                if len(recs) >= L:
                    return recs[:L]

        # fallback si pas assez
        if len(recs) < L:
            fill = self.rng.choice(self.X.shape[0], size=L-len(recs), replace=False).tolist()
            recs.extend(fill)
        return recs[:L]

    def update(self, u: int, item: int, reward: float):
        x = self.X[item]
        self.A[u] += np.outer(x, x)
        self.b[u] += reward * x
        self.t[u] += 1

        # --- Dynamic user clustering: couper des edges si trop différent ---
        # Heuristique CLUB-like: compare theta_u vs theta_v + incertitudes
        theta_u = self._theta_user(u)

        # incertitude (simple): radius décroît ~ 1/sqrt(t)
        rad_u = self.split_beta / np.sqrt(max(self.t[u], 1))

        to_remove = []
        for v in list(self.G[u]):
            theta_v = self._theta_user(v)
            rad_v = self.split_beta / np.sqrt(max(self.t[v], 1))
            if np.linalg.norm(theta_u - theta_v) > (rad_u + rad_v):
                to_remove.append(v)

        for v in to_remove:
            self.G[u].discard(v)
            self.G[v].discard(u)


In [9]:
# Trier par temps
events = ratings.sort_values("Timestamp")[["u_idx","m_idx","reward"]].to_numpy()

bandit = DC3MABLike(
    n_users=n_users,
    item_factors=item_factors,
    item_cluster=item_cluster,
    cluster_items=cluster_items,
    alpha=0.6,
    lam=1.0,
    edge_init="kmeans",
    user_init_clusters=30,
    split_beta=1.2,
    seed=0
)

L = 10
cum_reward = 0.0
hits = 0
T = 200_000  # pour aller vite au début (1M events sinon long)

for t, (u, true_item, r) in enumerate(events[:T], start=1):
    recs = bandit.recommend(int(u), L=L, per_cluster=2, candidate_clusters=10)

    obs_r = float(r) if int(true_item) in recs else 0.0
    cum_reward += obs_r
    hits += int(int(true_item) in recs)

    bandit.update(int(u), int(true_item), obs_r)

    if t % 20000 == 0:
        print(f"t={t}  hit@{L}={hits/t:.4f}  cum_reward={cum_reward:.1f}")


t=20000  hit@10=0.0001  cum_reward=0.0


KeyboardInterrupt: 