<a href="https://colab.research.google.com/github/prasannashrestha011/NeuralNetworkFromScratch/blob/main/MovieRecommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [46]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

In [47]:
import pandas as pd

# Ratings
data_url = "https://files.grouplens.org/datasets/movielens/ml-100k/u.data"
columns_data = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_csv(data_url, sep='\t', names=columns_data)

# Movies
movies_url = "https://files.grouplens.org/datasets/movielens/ml-100k/u.item"

columns_movies = ['movie_id','title','release_date','video_release','imdb_url'] + [f'genre_{i}' for i in range(19)]
movies = pd.read_csv(movies_url, sep='|', names=columns_movies, encoding='latin-1')

# Merging ratings with movie names
df = ratings.merge(movies[['movie_id','title']], on='movie_id')

df['user_id'] = df['user_id'] - 1
df['movie_id'] = df['movie_id'] - 1

n_users=df["user_id"].nunique()
n_movies=df["movie_id"].nunique()

print(df.head())

train_df,test_df=train_test_split(df,test_size=0.2,random_state=42)

   user_id  movie_id  rating  timestamp                       title
0      195       241       3  881250949                Kolya (1996)
1      185       301       3  891717742    L.A. Confidential (1997)
2       21       376       1  878887116         Heavyweights (1994)
3      243        50       2  880606923  Legends of the Fall (1994)
4      165       345       1  886397596         Jackie Brown (1997)


In [48]:
class MovieDataset(Dataset):
   def __init__(self,df) -> None:
      self.users=torch.tensor(df["user_id"].values,dtype=torch.long)
      self.movie_id=torch.tensor(df["movie_id"].values,dtype=torch.long)
      self.rating=torch.tensor(df["rating"].values,dtype=torch.float32)

   def __len__(self):
    return len(self.users)

   def __getitem__(self,idx):
    return self.users[idx],self.movie_id[idx],self.rating[idx]

In [49]:
train_dataset=MovieDataset(train_df)
test_dataset=MovieDataset(test_df)

train_loader=DataLoader(train_dataset,shuffle=True,batch_size=64)
test_loader=DataLoader(test_dataset,batch_size=64)

In [50]:
class MovieRecommender(nn.Module):
  def __init__(self,n_users,n_movies,emb_size=32):
    super().__init__()
    self.user_emb=nn.Embedding(n_users,emb_size)
    self.movie_emb=nn.Embedding(n_movies,emb_size)

    self.fc=nn.Sequential(
        nn.Linear(emb_size*2,64),
        nn.ReLU(),
        nn.Linear(64,32),
        nn.ReLU(),
        nn.Linear(32,1)
    )
  def forward(self,user,movie):
    u=self.user_emb(user)
    m=self.movie_emb(movie)
    x=torch.cat([u,m],dim=1)
    output=self.fc(x)
    return output

In [54]:
model = MovieRecommender(n_users, n_movies)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

for epoch in range(30):  # Increase epochs for better results
    model.train()
    total_loss = 0
    for user, movie, rating in train_loader:
        optimizer.zero_grad()
        pred = model(user, movie)
        loss = criterion(pred.squeeze(), rating)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}")


Epoch 1, Loss: 1.1520
Epoch 2, Loss: 0.9216
Epoch 3, Loss: 0.8748
Epoch 4, Loss: 0.8407
Epoch 5, Loss: 0.8040
Epoch 6, Loss: 0.7656
Epoch 7, Loss: 0.7259
Epoch 8, Loss: 0.6865
Epoch 9, Loss: 0.6488
Epoch 10, Loss: 0.6182
Epoch 11, Loss: 0.5854
Epoch 12, Loss: 0.5618
Epoch 13, Loss: 0.5372
Epoch 14, Loss: 0.5147
Epoch 15, Loss: 0.4955
Epoch 16, Loss: 0.4798
Epoch 17, Loss: 0.4615
Epoch 18, Loss: 0.4467
Epoch 19, Loss: 0.4348
Epoch 20, Loss: 0.4156
Epoch 21, Loss: 0.4103
Epoch 22, Loss: 0.3976
Epoch 23, Loss: 0.3907
Epoch 24, Loss: 0.3829
Epoch 25, Loss: 0.3697
Epoch 26, Loss: 0.3612
Epoch 27, Loss: 0.3528
Epoch 28, Loss: 0.3494
Epoch 29, Loss: 0.3428
Epoch 30, Loss: 0.3340


In [62]:
model.eval()
user_id = 10  # Example user

all_movies = torch.tensor(range(n_movies))
user_tensor = torch.tensor([user_id]*n_movies)

with torch.no_grad():
    scores = model(user_tensor, all_movies)
print(scores)
# Get top 5 recommended movies
top5 = torch.topk(scores.squeeze(), 5).indices
print("Top 5 recommended movie IDs:", top5.tolist())


tensor([[2.6551],
        [2.5377],
        [4.3461],
        ...,
        [2.5579],
        [4.1332],
        [3.1815]])
Top 5 recommended movie IDs: [1404, 1635, 514, 317, 222]
