In [None]:
import sqlite3
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
import tensorflow_recommenders as tfrs
class RecommenderModel(tfrs.Model):
    def __init__(self, user_model, movie_model, task):
        super().__init__()
        self.movie_model: tf.keras.Model = movie_model
        self.user_model: tf.keras.Model = user_model
        self.task: tf.keras.layers.Layer = task

    def compute_loss(self, features, training=False):
        # We pick out the user features and pass them into the user model.
        user_embeddings = self.user_model(features["user_id"])
        # And pick out the movie features and pass them into the movie model,
        movie_embeddings = self.movie_model(features["movie_title"])
        
        # The task computes the loss and the metrics.
        return self.task(user_embeddings, movie_embeddings, features["rating"])

# Connect to your SQLite database
conn = sqlite3.connect('my_letterboxd_data.db')

# Load ratings data
query = """
SELECT u.username, u.movie_name, u.rating
FROM users u
"""
ratings_df = pd.read_sql(query, conn)
 # Drop rows with NaN ratings

# Ensure correct types
ratings_df['rating'] = ratings_df['rating'].astype(float)
ratings_df['username'] = ratings_df['username'].astype(str)
ratings_df['movie_name'] = ratings_df['movie_name'].astype(str)
print(ratings_df['rating'].describe())
ratings_df.dropna(subset=['rating'], inplace=True) 
# Close the connection
conn.close()
# Convert to TensorFlow dataset
ratings = tf.data.Dataset.from_tensor_slices({
    "user_id": ratings_df["username"].values,
    "movie_title": ratings_df["movie_name"].values,
    "rating": ratings_df["rating"].values,
})

# Split the dataset into train and test
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train_size = int(0.8 * len(ratings_df))
train = shuffled.take(train_size).batch(256).cache().prefetch(buffer_size=tf.data.AUTOTUNE)
test = shuffled.skip(train_size).batch(256).cache().prefetch(buffer_size=tf.data.AUTOTUNE)
# Determine unique user ids and movie titles
unique_user_ids = ratings_df["username"].unique().tolist()
unique_movie_titles = ratings_df["movie_name"].unique().tolist()

# Model embedding dimensions
embedding_dimension = 64

# User model
user_model = tf.keras.Sequential([
    layers.StringLookup(vocabulary=unique_user_ids, mask_token=None),
    layers.Embedding(len(unique_user_ids) + 1, embedding_dimension),
])
print('hello')
# Movie model
movie_model = tf.keras.Sequential([
    layers.StringLookup(vocabulary=unique_movie_titles, mask_token=None),
    layers.Embedding(len(unique_movie_titles) + 1, embedding_dimension),
])
# Define your task
task = tfrs.tasks.Ranking(
    loss = tf.keras.losses.MeanSquaredError(),
    metrics=[tf.keras.metrics.RootMeanSquaredError()],
)
print('hello')
# Create and compile the model
model = RecommenderModel(user_model, movie_model, task)
#set learning rate
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.001))

# Train the model
model.fit(train, epochs=10, validation_data=test,verbose=1)

