In [30]:
import fastai
from fastai import learner
from fastai.losses import MSELossFlat
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from fastai.data.core import DataLoaders
import numpy as np
from sklearn.model_selection import train_test_split

import polars as pl

In [31]:
df_movies = pl.read_excel("../data/movies.xlsx")
df_movies = df_movies.sort("movie_id")
df_movies.head()

movieId,movie_id,title,rating
i64,i64,str,i64
1,0,"""Toy Story (1995)""",
2,1,"""Jumanji (1995)""",
3,2,"""Grumpier Old Men (1995)""",
4,3,"""Waiting to Exhale (1995)""",
5,4,"""Father of the Bride Part II (1…",


In [32]:
movie_embds = np.load("../data/movie_embeddings.npy")

In [33]:
# Adjust rating to get weight
df_movies = df_movies.with_columns((pl.col("rating") - 2.5).alias("weight"))

df_movies.head()

movieId,movie_id,title,rating,weight
i64,i64,str,i64,f64
1,0,"""Toy Story (1995)""",,
2,1,"""Jumanji (1995)""",,
3,2,"""Grumpier Old Men (1995)""",,
4,3,"""Waiting to Exhale (1995)""",,
5,4,"""Father of the Bride Part II (1…",,


In [34]:
mask = df_movies["weight"].is_null().not_().to_numpy()

In [37]:
personal_embds = (
    movie_embds[mask, :] * df_movies.filter(mask)["weight"].to_numpy().reshape(-1, 1)
).sum(axis=0)

personal_embds = personal_embds / personal_embds.sum()

In [45]:
pref_order = np.argsort(-1 * np.matmul(movie_embds, personal_embds.reshape(-1, 1)).flatten())

pref_order[:10]

array([9395, 9598, 1462, 8121, 9299, 2677, 1278, 7706, 2450, 8347])

In [46]:
df_movies.filter(pl.col("movie_id").is_in(pref_order[:10]))

movieId,movie_id,title,rating,weight
i64,i64,str,i64,f64
1487,1278,"""Selena (1997)""",,
1711,1462,"""Midnight in the Garden of Good…",,
2872,2450,"""Excalibur (1981)""",,
3120,2677,"""Distinguished Gentleman, The (…",,
84601,7706,"""Unknown (2011)""",,
55232,8121,"""Resident Evil: Extinction (200…",,
122924,8347,"""X-Men: Apocalypse (2016)""",,
95449,9299,"""Magic Mike (2012)""",,
128975,9395,"""Hot Tub Time Machine 2 (2015)""",,
32019,9598,"""Be Cool (2005)""",,
