In [None]:
# This is just a rough outline for now!

In [None]:
# Imports and setups

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Concatenate, Dense, Dropout
from sklearn.model_selection import train_test_split

In [None]:
# Loading and pre-processing data

# Example: userId, movieId, rating
df = pd.read_csv('ratings.csv')

# Filter implicit feedback (optional: for implicit NCF)
df['rating'] = df['rating'].apply(lambda x: 1 if x > 3.5 else 0)

# Encode user and movie IDs
user_ids = df['userId'].unique().tolist()
movie_ids = df['movieId'].unique().tolist()

user2user_encoded = {x: i for i, x in enumerate(user_ids)}
movie2movie_encoded = {x: i for i, x in enumerate(movie_ids)}

df['user'] = df['userId'].map(user2user_encoded)
df['movie'] = df['movieId'].map(movie2movie_encoded)

num_users = len(user2user_encoded)
num_movies = len(movie2movie_encoded)

X = df[['user', 'movie']].values
y = df['rating'].values

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Building NCF Model

# Input layers
user_input = Input(shape=(1,))
movie_input = Input(shape=(1,))

# Embedding layers
embedding_size = 50
user_embedding = Embedding(input_dim=num_users, output_dim=embedding_size)(user_input)
movie_embedding = Embedding(input_dim=num_movies, output_dim=embedding_size)(movie_input)

# Flatten
user_vec = Flatten()(user_embedding)
movie_vec = Flatten()(movie_embedding)

# Concatenate user and movie vectors
concat = Concatenate()([user_vec, movie_vec])

# Fully connected layers (MLP)
x = Dense(128, activation='relu')(concat)
x = Dropout(0.5)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)  # use 'sigmoid' for implicit feedback

# Model
model = Model(inputs=[user_input, movie_input], outputs=output)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# Training the model

history = model.fit(
    [X_train[:, 0], X_train[:, 1]],
    y_train,
    validation_data=([X_test[:, 0], X_test[:, 1]], y_test),
    epochs=10,
    batch_size=256,
    verbose=1
)

In [None]:
# Making Recommendations

def recommend_movies(user_id, model, top_n=10):
    user_idx = user2user_encoded[user_id]
    movie_indices = np.arange(num_movies)

    # Predict scores for all movies for this user
    predictions = model.predict([np.full(num_movies, user_idx), movie_indices], verbose=0)
    
    # Get top N movie indices
    top_indices = predictions.flatten().argsort()[-top_n:][::-1]

    # Map back to original movie IDs
    recommended_movie_ids = [movie_ids[i] for i in top_indices]
    return recommended_movie_ids