<a href="https://colab.research.google.com/drive/1DBiEoUHc7YftJdDkbJaEc3-BOE5BCwNR" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>





In [1]:
!apt-get install -y swig cmake ffmpeg
!pip uninstall -y box2d-py
!pip install gymnasium[box2d] pygame

zsh:1: command not found: apt-get
[33mDEPRECATION: Loading egg at /opt/miniconda3/lib/python3.12/site-packages/pyparsing-3.1.1-py3.12.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m
[0m[33mDEPRECATION: Loading egg at /opt/miniconda3/lib/python3.12/site-packages/ProDy-2.5.0-py3.12-macosx-11.0-arm64.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m
[0mzsh:1: no matches found: gymnasium[box2d]


In [None]:
# Colab Notebook Setup for Reinforcement Learning (DQN, PPO) with GPU Support

# Install required dependencies
!pip install stable-baselines3 torch pandas numpy requests matplotlib seaborn -q


[33mDEPRECATION: Loading egg at /opt/miniconda3/lib/python3.12/site-packages/pyparsing-3.1.1-py3.12.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m
[0m[33mDEPRECATION: Loading egg at /opt/miniconda3/lib/python3.12/site-packages/ProDy-2.5.0-py3.12-macosx-11.0-arm64.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m
[0m

In [24]:
import pandas as pd
import numpy as np
import zipfile
import requests
from io import BytesIO
from surprise import Dataset, Reader, SVD
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, ndcg_score
import random
import warnings
from collections import defaultdict

warnings.filterwarnings("ignore")

# -------------------------------
# Data Loading & Preprocessing
# -------------------------------
def load_dataset(max_users=1000):
    MOVIELENS_URL = "https://files.grouplens.org/datasets/movielens/ml-1m.zip"
    response = requests.get(MOVIELENS_URL)
    zip_file = zipfile.ZipFile(BytesIO(response.content))

    with zip_file.open("ml-1m/ratings.dat") as file:
        ratings = pd.read_csv(
            file,
            sep="::",
            names=["userId", "movieId", "rating", "timestamp"],
            engine="python",
            encoding="latin-1"
        )

    with zip_file.open("ml-1m/movies.dat") as file:
        movies = pd.read_csv(
            file,
            sep="::",
            names=["movieId", "title", "genres"],
            engine="python",
            encoding="latin-1"
        )

    df = ratings.merge(movies, on="movieId")
    df = df[df["rating"] >= 3.5]
    df = df[df["userId"].isin(df["userId"].unique()[:max_users])]
    df["userId"] = df["userId"].astype("category")
    df["movieId"] = df["movieId"].astype("category")
    return df

df = load_dataset()

# -------------------------------
# Train-Test Split with Reindexing
# -------------------------------
def split_and_reindex_data(df, test_size=0.1):
    user_activity = df.groupby("userId")["movieId"].nunique()
    activity_bins = pd.qcut(user_activity, q=4, labels=False)
    users = df["userId"].unique()
    train_users, test_users = train_test_split(
        users,
        test_size=test_size,
        random_state=42,
        stratify=activity_bins.loc[users].values
    )

    train_data = df[df["userId"].isin(train_users)].copy()
    test_data = df[df["userId"].isin(test_users)].copy()

    train_data["userId"] = train_data["userId"].cat.remove_unused_categories().cat.codes
    train_data["movieId"] = train_data["movieId"].cat.remove_unused_categories().cat.codes

    movie_counts = train_data["movieId"].value_counts()
    train_data = train_data[train_data["movieId"].isin(movie_counts[movie_counts > 20].index)]
    test_data = test_data[test_data["movieId"].isin(train_data["movieId"].unique())]

    return train_data, test_data

train_data, test_data = split_and_reindex_data(df)

# -------------------------------
# Multi-Armed Bandit (MAB)
# -------------------------------
class BayesianUCB:
    def __init__(self, movies, prior_strength=5):
        self.prior_strength = prior_strength
        self.attempts = defaultdict(lambda: prior_strength)
        self.successes = defaultdict(lambda: prior_strength * 0.7)

    def select_items(self, num_recs=5):
        total_attempts = sum(self.attempts.values())
        if total_attempts == 0:
            return random.sample(list(self.attempts.keys()), num_recs)
        scores = {
            movie: (self.successes[movie]/self.attempts[movie] +
                    np.sqrt(2 * np.log(total_attempts)/self.attempts[movie]))
            for movie in self.attempts
        }
        return list(sorted(scores, key=scores.get, reverse=True)[:num_recs])

    def update(self, movie, reward):
        self.attempts[movie] += 1
        self.successes[movie] += reward

mab = BayesianUCB(train_data["movieId"].unique())
for _, row in train_data.iterrows():
    mab.update(row["movieId"], 1 if row["rating"] >= 4 else 0)

# -------------------------------
# Markov Decision Process (MDP)
# -------------------------------
class SequentialRecommender:
    def __init__(self, window_size=2, decay=0.9):
        self.window_size = window_size
        self.decay = decay
        self.transitions = defaultdict(lambda: defaultdict(int))
        self.rewards = defaultdict(float)

    def update(self, session):
        for i in range(len(session) - 1):
            state = tuple(session[max(0, i - self.window_size + 1):i + 1])
            next_item = session[i + 1]
            self.transitions[state][next_item] += 1
            self.rewards[(state, next_item)] = self.decay * self.rewards.get((state, next_item), 0) + (1 - self.decay) * 1

    def recommend(self, current_session, num_recs=5):
        state = tuple(current_session[-self.window_size:])
        candidates = []
        if state in self.transitions:
            total = sum(self.transitions[state].values())
            for item, count in self.transitions[state].items():
                score = count / total + 0.1 * self.rewards.get((state, item), 0)
                candidates.append((item, score))
        candidates.sort(key=lambda x: -x[1])
        return [item for item, _ in candidates[:num_recs]]

mdp = SequentialRecommender()
for user in train_data["userId"].unique():
    session = train_data[train_data["userId"] == user].sort_values("timestamp")["movieId"].values
    if len(session) > 1:
        mdp.update(session)

# -------------------------------
# Q-Learning (Memory-Efficient)
# -------------------------------
class SimpleQLearning:
    def __init__(self, alpha=0.3, gamma=0.95, epsilon=0.1):
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.q_table = defaultdict(float)

    def update(self, state, action, reward, next_state):
        max_future = max([self.q_table[(next_state, a)] for a in range(1000)], default=0)  # limit action space
        current = self.q_table[(state, action)]
        self.q_table[(state, action)] = current + self.alpha * (reward + self.gamma * max_future - current)

    def recommend(self, state, num_recs=5):
        if random.random() < self.epsilon:
            return random.sample(range(1000), num_recs)
        q_values = [(a, self.q_table[(state, a)]) for a in range(1000)]
        return [a for a, _ in sorted(q_values, key=lambda x: -x[1])[:num_recs]]

ql = SimpleQLearning()
for user in train_data["userId"].unique():
    session = train_data[train_data["userId"] == user].sort_values("timestamp")
    movie_ids = session["movieId"].values
    ratings = session["rating"].values
    if len(movie_ids) > 1:
        for i in range(len(movie_ids) - 1):
            state = movie_ids[i]
            next_state = movie_ids[i + 1]
            reward = ratings[i + 1] / 5
            ql.update(state, next_state, reward, next_state)

# -------------------------------
# Evaluation Framework
# -------------------------------
class RecommendationEvaluator:
    def __init__(self, test_data, train_items):
        self.test_users = test_data["userId"].unique()
        self.ground_truth = test_data.groupby("userId")["movieId"].apply(set).to_dict()
        self.train_items = set(train_items)

    def _get_relevant(self, user):
        return self.ground_truth.get(user, set())

    def evaluate(self, recommender, name):
        hits = []
        ndcg_scores = []

        for user in self.test_users:
            try:
                recs = recommender(user)[:5]
                valid_recs = [item for item in recs if item in self.train_items]
                if len(valid_recs) == 0:
                    continue
                relevant = self._get_relevant(user)
                if not relevant:
                    continue
                hits.append(len(set(valid_recs) & relevant) > 0)
                relevance_scores = [1 if item in relevant else 0 for item in valid_recs]
                true_relevance = [1] * len(relevance_scores)
                if len(relevance_scores) < 2:
                    ndcg_scores.append(0)
                else:
                    ndcg_scores.append(ndcg_score([true_relevance], [relevance_scores]))
            except Exception as e:
                print(f"Error evaluating {name} for user {user}: {e}")

        return {
            "hit_rate": np.mean(hits) if hits else 0,
            "ndcg": np.mean(ndcg_scores) if ndcg_scores else 0
        }

def mab_recommender(user):
    return mab.select_items(5)

def mdp_recommender(user):
    history = train_data[train_data["userId"] == user]["movieId"].tolist()
    return mdp.recommend(history, 5)

def ql_recommender(user):
    session = train_data[train_data["userId"] == user].sort_values("timestamp")["movieId"].tolist()
    return ql.recommend(session[-1], 5) if session else []

print("\nEvaluation Results:")
print("MAB:", RecommendationEvaluator(test_data, train_data["movieId"].unique()).evaluate(mab_recommender, "MAB"))
print("MDP:", RecommendationEvaluator(test_data, train_data["movieId"].unique()).evaluate(mdp_recommender, "MDP"))
print("QL:", RecommendationEvaluator(test_data, train_data["movieId"].unique()).evaluate(ql_recommender, "Q-Learning"))


Evaluation Results:
MAB: {'hit_rate': 0.010101010101010102, 'ndcg': 0.9999999999999999}
MDP: {'hit_rate': 0.13043478260869565, 'ndcg': 0.6086956521739131}
QL: {'hit_rate': 0.10989010989010989, 'ndcg': 0.978021978021978}
