In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Embedding, Flatten, Concatenate, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Embedding, Reshape, Lambda, Add, Concatenate, Multiply, Subtract
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split


def tensor_sum(name='tensor_sum'):
    return Lambda(lambda x: K.sum(x, axis=1, keepdims=True), name=name)

def tensor_square(name='tensor_square'):
    return Lambda(lambda x: K.square(x), name=name)


class MovieRecommenderFM5:
    def __init__(self, n_uid, n_mid, k):
        self.topPopular = TopPopRecommender()
        self.n_uid = n_uid
        self.n_mid = n_mid
        self.k = k
        self.model = self._create_model()

    def fm_1d(self, uid_input, mid_input, age_input, genres_input, n_uid, n_mid):
        # Embedding layers for uid and mid, each followed by a Flatten layer
        uid_embed_1d = Flatten()(Embedding(n_uid + 1, 1, name='uid_embed_1d')(uid_input))
        mid_embed_1d = Flatten()(Embedding(n_mid + 1, 1, name='mid_embed_1d')(mid_input))
        
        # Dense layer for the age input
        age_dense = Dense(1, use_bias=False, name='age_dense')(age_input)
        
        # Combining all the 1D contributions including genres_input directly for additive model part
        linear_terms = Add()([uid_embed_1d, mid_embed_1d, age_dense, genres_input])
        return linear_terms

    def fm_2d(self, uid_input, mid_input, age_input, genres_input, n_uid, n_mid, k):
        # Flatten embedding layers to fit the expected 2D interaction tensor shape
        uid_embed_2d = Flatten()(Embedding(n_uid + 1, k, name='uid_embed_2d')(uid_input))
        mid_embed_2d = Flatten()(Embedding(n_mid + 1, k, name='mid_embed_2d')(mid_input))
        
        # Dense layer to transform age input into a k-dimensional vector for interaction
        age_embed_2d = Dense(k, use_bias=False, name='age_embed_2d')(age_input)
        
        # Concatenate all features for 2D interactions
        concatenated_features = Concatenate()([uid_embed_2d, mid_embed_2d, age_embed_2d, genres_input])
        
        # Compute the squared sum of features for the 2D interactions
        squared_sum_features = Lambda(lambda x: K.square(K.sum(x, axis=1, keepdims=True)))(concatenated_features)
        
        # Compute the sum of squared features for the 2D interactions
        sum_squared_features = Lambda(lambda x: K.sum(K.square(x), axis=1, keepdims=True))(concatenated_features)
        
        # Interaction term by subtracting the squared_sum_features from sum_squared_features
        interaction_term = Subtract()([squared_sum_features, sum_squared_features])
        
        return interaction_term

    def _create_model(self):
        # Define inputs
        uid_input = Input(shape=(1,), dtype='int32', name='uid_input')
        mid_input = Input(shape=(1,), dtype='int32', name='mid_input')
        age_input = Input(shape=(1,), name='age_input')
        genres_input = Input(shape=(18,), name='genres_input')  # Assuming 18 genres are one-hot encoded
        
        # Calculate outputs from fm_1d and fm_2d functions
        fm_1d_output = self.fm_1d(uid_input, mid_input, age_input, genres_input, self.n_uid, self.n_mid)
        fm_2d_output = self.fm_2d(uid_input, mid_input, age_input, genres_input, self.n_uid, self.n_mid, self.k)
        
        # Combine 1D and 2D interaction outputs
        combined_output = Add()([fm_1d_output, Flatten()(fm_2d_output)])
        
        # Final output predictions
        final_output = Dense(1, activation='sigmoid')(combined_output)
        
        # Model compilation
        model = Model(inputs=[uid_input, mid_input, age_input, genres_input], outputs=final_output)
        model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
        
        return model

    def df2xy_and_split(self, df, test_size=0.2):
            # Create lookup tables for ages and genres
            self.user_ages_lookup = df[['uid', 'age']].drop_duplicates().set_index('uid')['age'].to_dict()
            genres_columns = ['Action', 'Adventure', 'Animation', "Children's", 'Comedy', 'Crime', 'Documentary',
                            'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi',
                            'Thriller', 'War', 'Western']
            self.movie_genres_lookup = df[['mid'] + genres_columns].drop_duplicates().set_index('mid')[genres_columns].to_dict('index')

            # Generate all possible user-movie pairs
            uids = df['uid'].unique()
            mids = df['mid'].unique()
            um_pairs = pd.DataFrame([(uid, mid) for uid in uids for mid in mids], columns=['uid', 'mid'])

            # Merge existing ratings
            um_pairs = um_pairs.merge(df[['uid', 'mid', 'rating']], on=['uid', 'mid'], how='left')
            um_pairs['rating'].fillna(0, inplace=True)  # Fill missing ratings with 0

            # Populate ages and genres using the lookup tables
            um_pairs['age'] = um_pairs['uid'].map(self.user_ages_lookup).fillna(0)  # Filling missing ages just in case, though there shouldn't be any
            for genre in genres_columns:
                um_pairs[genre] = um_pairs['mid'].map(lambda mid: self.movie_genres_lookup[mid][genre] if mid in self.movie_genres_lookup else 0)

            # Preparing the final dataset for model
            X = um_pairs[['uid', 'mid', 'age'] + genres_columns].values
            y = um_pairs['rating'].values

            # Splitting the data into training and testing sets
            train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=test_size)

            # Splitting the features back into separate arrays for the model
            train_x = [train_x[:, 0].reshape(-1, 1), train_x[:, 1].reshape(-1, 1), train_x[:, 2].reshape(-1, 1), train_x[:, 3:]]
            test_x = [test_x[:, 0].reshape(-1, 1), test_x[:, 1].reshape(-1, 1), test_x[:, 2].reshape(-1, 1), test_x[:, 3:]]

            return train_x, train_y, test_x, test_y

    

    def fit(self, df, pop_source, epochs=30, batch_size=2048, patience=3, kaggle_mode=False):
        self.topPopular.fit(pop_source)
        if kaggle_mode:
            train_x, train_y, test_x, test_y = self.df2xy_and_split(df, test_size=0.00001)
        else:
            train_x, train_y, test_x, test_y = self.df2xy_and_split(df)
            self.valid_x = test_x
            self.valid_y = test_y     
        
        early_stop = EarlyStopping(monitor='val_loss', patience=patience)
        model_ckp = ModelCheckpoint(filepath='deepfm_.weights.h5', monitor='val_loss', save_weights_only=True, save_best_only=True)
        callbacks = [model_ckp, early_stop]

        train_history = self.model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size,validation_split=0.1, callbacks=callbacks)#validation_data=(valid_x, valid_y), 
        return train_history

    def evaluate_model(self):
        #valid_x, valid_y = self.df2xy(val_df)
        predictions = self.model.predict(self.valid_x)
        rmse = np.sqrt(mean_squared_error(self.valid_y, predictions))
        print("MSE:", mean_squared_error(self.valid_y, predictions))
        print("RMSE:", rmse)
    
    def recommend_movies(self, user_id, top_n=25):
        genres_columns = ['Action', 'Adventure', 'Animation', "Children's", 'Comedy', 'Crime', 'Documentary',
                    'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi',
                    'Thriller', 'War', 'Western']

        # Fetch the age for the user
        user_age = self.user_ages_lookup.get(user_id, 0)  # Default 0 if user_id is not found
    
        unseen_movie_ids = np.array(self.topPopular.predict_top(user_id, at=200, remove_seen=True))
        
        # Prepare the prediction input arrays
        age_array = np.full((len(unseen_movie_ids), 1), fill_value=user_age)
        genres_array = np.array([[genre_dict.get(genre, 0) for genre in genres_columns] 
                     if (genre_dict := self.movie_genres_lookup.get(mid)) is not None 
                     else np.zeros(len(genres_columns)) for mid in unseen_movie_ids])
        user_ids_array = np.full((len(unseen_movie_ids), ), user_id)
        movie_ids_array = unseen_movie_ids  # Now ensure it's in a format that supports reshape
        
        # Prepare the data in the correct format
        predict_x = [user_ids_array.reshape(-1, 1), movie_ids_array.reshape(-1, 1), age_array, genres_array]
        
        # Predict the ratings for all movies for the given user_id
        predicted_ratings = self.model.predict(predict_x).flatten()
        
        # Sort the predictions and select top N
        top_n_movie_indices = np.argsort(-predicted_ratings)[:top_n]
        top_n_movie_ids = unseen_movie_ids[top_n_movie_indices]

        return top_n_movie_ids

    
