# Modeule 9: HSTU Recommender Engines

This is a basic HSTU recommender engine that was trained on the 1 million MovieLens dataset.  This code was initially generated by ChatGPT (OpenAI 2025), before I debugged it and tested its expected behavior.

OpenAI. 2025. *Chat with ChatGPT about building an HSTU-based recommender engine using PyTorch and MovieLens 1M*. May 30, 2025. ChatGPT. [https://chat.openai.com/](https://chat.openai.com/).

In [15]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

# Load dataset
ratings = pd.read_csv("./data/ml-1m/ratings.dat", sep="::", engine="python",
                      names=["userId", "movieId", "rating", "timestamp"])

movies = pd.read_csv("./data/ml-1m/movies.dat", sep="::", engine="python",
                      names=["movieId", "movieTitle", "genres"],
                      encoding="latin-1")

# Encode userId and movieId
user_encoder = LabelEncoder()
movie_encoder = LabelEncoder()

ratings['userId'] = user_encoder.fit_transform(ratings['userId'])
ratings['movieId'] = movie_encoder.fit_transform(ratings['movieId'])

# Sort by timestamp for sequential modeling
ratings.sort_values(by=['userId', 'timestamp'], inplace=True)


In [16]:
def build_sessions(data, session_length=10):
    sessions = []
    targets = []
    for uid, user_data in data.groupby('userId'):
        movies = user_data['movieId'].tolist()
        for i in range(0, len(movies) - session_length):
            sessions.append(movies[i:i + session_length])
            targets.append(movies[i + session_length])  # next movie
    return sessions, targets

sessions, targets = build_sessions(ratings, session_length=10)


In [17]:
class MovieLensDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return torch.tensor(self.sequences[idx], dtype=torch.long), torch.tensor(self.targets[idx], dtype=torch.long)

#dataset = MovieLensDataset(sequences, targets)
#dataloader = DataLoader(dataset, batch_size=128, shuffle=True)


### Define the HSTU model

In [18]:
import torch.nn as nn

class HSTURecommender(nn.Module):
    def __init__(self, num_movies, embedding_dim=64, hidden_dim=128):
        super().__init__()
        self.embedding = nn.Embedding(num_movies, embedding_dim)
        
        # Session-level RNN
        self.session_rnn = nn.GRU(embedding_dim, hidden_dim, batch_first=True)
        
        # User-level summarization
        self.user_linear = nn.Linear(hidden_dim, hidden_dim)
        
        self.output = nn.Linear(hidden_dim, num_movies)

    def forward(self, x):
        emb = self.embedding(x)  # [batch, seq_len, emb_dim]
        _, h_n = self.session_rnn(emb)  # h_n: [1, batch, hidden_dim]
        h_user = self.user_linear(h_n.squeeze(0))  # [batch, hidden_dim]
        out = self.output(h_user)
        return out


### Create a test/train datasets and train the model

In [19]:
from sklearn.model_selection import train_test_split

# Split into train and test (90/10)
train_seqs, test_seqs, train_targets, test_targets = train_test_split(
    sessions, targets, test_size=0.1, random_state=42)

train_dataset = MovieLensDataset(train_seqs, train_targets)
test_dataset = MovieLensDataset(test_seqs, test_targets)

train_loader = DataLoader(train_dataset, batch_size=1024, num_workers=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1024, num_workers=8, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_hstu = HSTURecommender(num_movies=len(movie_encoder.classes_)).to(device)
optimizer = torch.optim.Adam(model_hstu.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training loop
for epoch in range(500):
    model_hstu.train()
    total_loss = 0
    #print(len(batch))
    for batch in train_loader:
        x, y = batch
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        out = model_hstu(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch + 1}, Loss: {total_loss / len(train_loader):.4f}")


Epoch 10, Loss: 5.3021
Epoch 20, Loss: 5.0716
Epoch 30, Loss: 4.9445
Epoch 40, Loss: 4.8635
Epoch 50, Loss: 4.8008
Epoch 60, Loss: 4.7552
Epoch 70, Loss: 4.7141
Epoch 80, Loss: 4.6778
Epoch 90, Loss: 4.6543
Epoch 100, Loss: 4.6307
Epoch 110, Loss: 4.6147
Epoch 120, Loss: 4.6103
Epoch 130, Loss: 4.5806
Epoch 140, Loss: 4.5850
Epoch 150, Loss: 4.5553
Epoch 160, Loss: 4.5463
Epoch 170, Loss: 4.5403
Epoch 180, Loss: 4.5299
Epoch 190, Loss: 4.5257
Epoch 200, Loss: 4.5204
Epoch 210, Loss: 4.5218
Epoch 220, Loss: 4.5296
Epoch 230, Loss: 4.5083
Epoch 240, Loss: 4.5033
Epoch 250, Loss: 4.4999
Epoch 260, Loss: 4.5160
Epoch 270, Loss: 4.5078
Epoch 280, Loss: 4.4906
Epoch 290, Loss: 4.4898
Epoch 300, Loss: 4.5057
Epoch 310, Loss: 4.4911
Epoch 320, Loss: 4.4840
Epoch 330, Loss: 4.4894
Epoch 340, Loss: 4.4914
Epoch 350, Loss: 4.4813
Epoch 360, Loss: 4.4991
Epoch 370, Loss: 4.4928
Epoch 380, Loss: 4.5191
Epoch 390, Loss: 4.4887
Epoch 400, Loss: 4.4744
Epoch 410, Loss: 4.4725
Epoch 420, Loss: 4.4809
E

In [20]:
import numpy as np

def dcg_at_k(r, k):
    r = np.asarray(r, dtype=np.float64)[:k]
    if r.size:
        return np.sum(r / np.log2(np.arange(2, r.size + 2)))
    return 0.

def ndcg_at_k(r, k):
    dcg = dcg_at_k(r, k)
    ideal = dcg_at_k(sorted(r, reverse=True), k)
    return dcg / ideal if ideal > 0 else 0.

def precision_at_k(r, k):
    r = np.asarray(r, dtype=np.float64)[:k]
    return np.mean(r)

def recall_at_k(r, k, all_pos_num):
    r = np.asarray(r, dtype=np.float64)[:k]
    return np.sum(r) / all_pos_num if all_pos_num > 0 else 0.


In [21]:
def evaluate(model, dataloader, device, top_ks=[10, 100]):
    model.eval()
    ndcg_scores = {k: [] for k in top_ks}
    precision_scores = {k: [] for k in top_ks}
    recall_scores = {k: [] for k in top_ks}

    with torch.no_grad():
        for x, y_true in dataloader:
            x = x.to(device)
            y_true = y_true.to(device)

            logits = model(x)  # shape: [batch, num_items]
            topk_preds = torch.topk(logits, max(top_ks), dim=1).indices.cpu().numpy()

            for i in range(len(x)):
                true_item = y_true[i].item()
                for k in top_ks:
                    preds_k = topk_preds[i][:k]
                    r = [1 if true_item == p else 0 for p in preds_k]
                    ndcg_scores[k].append(ndcg_at_k(r, k))
                    precision_scores[k].append(precision_at_k(r, k))
                    recall_scores[k].append(recall_at_k(r, k, 1))  # only 1 positive item

    print("Evaluation Results:")
    for k in top_ks:
        print(f"@{k}: NDCG={np.mean(ndcg_scores[k]):.4f}, "
              f"Precision={np.mean(precision_scores[k]):.4f}, "
              f"Recall={np.mean(recall_scores[k]):.4f}")


In [22]:
def recommend_for_user(model, user_sequence, movie_encoder, top_k=10):
    model.eval()
    input_seq = torch.tensor(user_sequence[-10:], dtype=torch.long).unsqueeze(0).to(device)
    with torch.no_grad():
        logits = model(input_seq)
        topk = torch.topk(logits, top_k).indices.squeeze(0).cpu().tolist()
    movie_ids = movie_encoder.inverse_transform(topk)
    return movie_ids


### Evaluate the HSTU model using the test dataset

In [23]:
# Evaluate model
evaluate(model_hstu, test_loader, device)

# Recommend for a specific user sequence
sample_seq = train_seqs[0]  # last 10 movieIds
recommendations = recommend_for_user(model_hstu, sample_seq, movie_encoder)
print("Recommended Movie IDs:", movies[movies.movieId.isin(recommendations)].iloc[:, :-1])


Evaluation Results:
@10: NDCG=0.1137, Precision=0.0211, Recall=0.2112
@100: NDCG=0.1920, Precision=0.0060, Recall=0.6045
Recommended Movie IDs:       movieId                         movieTitle
462       466        Hot Shots! Part Deux (1993)
680       688        Operation Dumbo Drop (1995)
1545     1586                   G.I. Jane (1997)
2184     2253                        Toys (1992)
2333     2402  Rambo: First Blood Part II (1985)
2335     2404                   Rambo III (1988)
2342     2411                    Rocky IV (1985)
2746     2815                  Iron Eagle (1986)
3372     3441                    Red Dawn (1984)
3697     3766           Missing in Action (1984)


## DLMR RecSys

This is a deep-learning recommender model that uses neural collaborative filtering. It was also trained on the 1 million MovieLens dataset.  The code for this was initially generated by ChatGPT (OpenAI 2025), before I then debugged it and confirmed it works as expected.

OpenAI. 2025. Chat with ChatGPT about implementing a DLRM-based recommender system using PyTorch and MovieLens 1M. May 30, 2025. ChatGPT. https://chat.openai.com/.

In [24]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load MovieLens 1M dataset
df = pd.read_csv('./data/ml-1m/ratings.dat', sep='::', engine='python',
                 names=['userId', 'movieId', 'rating', 'timestamp'])

# Binarize ratings (implicit feedback setup)
df['rating'] = (df['rating'] >= 4).astype(int)

# Encode user and item IDs
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()
df['user'] = user_encoder.fit_transform(df['userId'])
df['item'] = item_encoder.fit_transform(df['movieId'])

num_users = df['user'].nunique()
num_items = df['item'].nunique()


In [25]:
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

class MovieLensPairs(Dataset):
    def __init__(self, df):
        self.users = df['user'].values
        self.items = df['item'].values
        self.labels = df['rating'].values

    def __len__(self):
        return len(self.users)

    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.labels[idx]

# Split data
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(df, test_size=0.1, random_state=42)
train_ds = MovieLensPairs(train_df)
test_ds = MovieLensPairs(test_df)

train_loader = DataLoader(train_ds, batch_size=1024, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=1024)


### Define and train the NCF model

In [26]:
class NCF(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=64):
        super().__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.item_embedding = nn.Embedding(num_items, embedding_dim)

        self.mlp = nn.Sequential(
            nn.Linear(embedding_dim * 2, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, users, items):
        user_emb = self.user_embedding(users)   # [B, D]
        item_emb = self.item_embedding(items)   # [B, D]
        x = torch.cat([user_emb, item_emb], dim=1)  # [B, 2D]
        return self.mlp(x).squeeze(1)  # [B]



model_ncf = NCF(num_users, num_items).to(device)
optimizer = optim.Adam(model_ncf.parameters(), lr=1e-3)
criterion = nn.BCEWithLogitsLoss()

for epoch in range(250):
    model_ncf.train()
    for users, items, labels in train_loader:
        users = users.to(device)
        items = items.to(device)
        labels = labels.float().to(device)

        logits = model_ncf(users, items)  # shape: [batch_size]

        loss = criterion(logits, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}: loss = {loss.item():.4f}")

Epoch 10: loss = 0.5092
Epoch 20: loss = 0.4125
Epoch 30: loss = 0.3037
Epoch 40: loss = 0.3335
Epoch 50: loss = 0.3161
Epoch 60: loss = 0.2891
Epoch 70: loss = 0.1977
Epoch 80: loss = 0.1686
Epoch 90: loss = 0.1313
Epoch 100: loss = 0.1932
Epoch 110: loss = 0.2030
Epoch 120: loss = 0.1081
Epoch 130: loss = 0.1051
Epoch 140: loss = 0.0841
Epoch 150: loss = 0.1606
Epoch 160: loss = 0.1219
Epoch 170: loss = 0.1334
Epoch 180: loss = 0.1916
Epoch 190: loss = 0.0693
Epoch 200: loss = 0.0370
Epoch 210: loss = 0.0929
Epoch 220: loss = 0.0979
Epoch 230: loss = 0.0787
Epoch 240: loss = 0.0881
Epoch 250: loss = 0.0616


In [27]:
import numpy as np
import torch

def dcg_at_k(r, k):
    r = np.asarray(r, dtype=np.float64)[:k]
    if r.size:
        return np.sum(r / np.log2(np.arange(2, r.size + 2)))
    return 0.

def ndcg_at_k(r, k):
    dcg = dcg_at_k(r, k)
    ideal = dcg_at_k(sorted(r, reverse=True), k)
    return dcg / ideal if ideal > 0 else 0.

def precision_at_k(r, k):
    r = np.asarray(r)[:k]
    return np.mean(r)

def recall_at_k(r, k, all_positives):
    r = np.asarray(r)[:k]
    return np.sum(r) / all_positives if all_positives > 0 else 0.


def evaluate_full_ranking(model, user_item_dict, all_items, device, top_ks=[10, 100]):
    model.eval()

    ndcg_all = {k: [] for k in top_ks}
    prec_all = {k: [] for k in top_ks}
    recall_all = {k: [] for k in top_ks}

    with torch.no_grad():
        for user, true_items in user_item_dict.items():
            user_tensor = torch.LongTensor([user] * len(all_items)).to(device)
            item_tensor = torch.LongTensor(all_items).to(device)

            scores = model(user_tensor, item_tensor)  # [num_items]
            scores = scores.cpu().numpy()

            ranked_items = np.argsort(-scores)  # indices of all_items

            hits = np.isin(all_items, list(true_items)).astype(int)
            ranked_hits = hits[ranked_items]  # reordered according to model scores

            for k in top_ks:
                ndcg_all[k].append(ndcg_at_k(ranked_hits, k))
                prec_all[k].append(precision_at_k(ranked_hits, k))
                recall_all[k].append(recall_at_k(ranked_hits, k, all_positives=len(true_items)))

    print("\nEvaluation Metrics:")
    for k in top_ks:
        print(f"Top-{k} → NDCG: {np.mean(ndcg_all[k]):.4f}, Precision: {np.mean(prec_all[k]):.4f}, Recall: {np.mean(recall_all[k]):.4f}")


from collections import defaultdict

def build_user_item_dict_from_df(df):
    user_item_dict = defaultdict(set)
    for user, item, label in zip(df['user'].values, df['item'].values, df['rating'].values):
        if label >= 1.0:  # assuming ratings ≥ 3 are positive interactions
            user_item_dict[int(user)].add(int(item))
    return dict(user_item_dict)

user_item_dict = build_user_item_dict_from_df(test_df)
all_items = sorted(list(set(train_df['item']).union(set(test_df['item']))))
evaluate_full_ranking(model_ncf, user_item_dict, all_items, device=device, top_ks=[10, 100])



Evaluation Metrics:
Top-10 → NDCG: 0.0008, Precision: 0.0008, Recall: 0.0007
Top-100 → NDCG: 0.0076, Precision: 0.0019, Recall: 0.0216


In [28]:
def recommend_ncf(model, user_id, user_encoder, item_encoder, top_k=10, device=device):
    model.eval()

    # Encode the user and create item indices
    user_idx = torch.tensor([user_encoder.transform([user_id])[0]], dtype=torch.long).to(device)
    all_item_indices = torch.arange(len(item_encoder.classes_), dtype=torch.long).to(device)

    # Expand user to match number of items
    user_tensor = user_idx.expand(all_item_indices.shape[0])

    with torch.no_grad():
        scores = model(user_tensor, all_item_indices)  # shape: [num_items]
        top_items = torch.topk(scores, top_k).indices.cpu().tolist()

    # Decode recommended item indices to original item IDs
    return item_encoder.inverse_transform(top_items)

recommendations = recommend_ncf(
    model_ncf,
    user_id=5,
    user_encoder=user_encoder,
    item_encoder=item_encoder,  
    top_k=10,
    device=device
)

print("Recommended Movie IDs:",
      movies[movies.movieId.isin(recommendations)].iloc[:, :-1])

Recommended Movie IDs:       movieId                                       movieTitle
40         41                               Richard III (1995)
52         53                                  Lamerica (1994)
884       896                                Wild Reeds (1994)
1782     1851                 Leather Jacket Love Story (1997)
2434     2503                          Apple, The (Sib) (1998)
2871     2940                                     Gilda (1946)
2912     2981            Brother, Can You Spare a Dime? (1975)
3020     3089  Bicycle Thief, The (Ladri di biciclette) (1948)
3610     3679      Decline of Western Civilization, The (1981)
3810     3880              Ballad of Ramblin' Jack, The (2000)
