In [2]:
import functools
from pathlib import Path

import streamlit as st
from st_aggrid import AgGrid, GridUpdateMode
from st_aggrid.shared import JsCode
from st_aggrid.grid_options_builder import GridOptionsBuilder
import pandas as pd
import plotly.express as px
from typing import List
import re
from datetime import datetime
from sklearn import metrics, preprocessing


import numpy as np
# from surprise import Reader, Dataset, SVD
# from sklearn import metrics, preprocessing
from tensorflow.keras import models, layers, utils
from tensorflow.keras.models import load_model

In [3]:
# Products
dtf_products = pd.read_excel("data_movies.xlsx", sheet_name="products")

dtf_products = dtf_products[~dtf_products["genres"].isna()]
dtf_products["product"] = range(0,len(dtf_products))
dtf_products["name"] = dtf_products["title"].apply(lambda x: re.sub("[\(\[].*?[\)\]]", "", x).strip())
dtf_products["date"] = dtf_products["title"].apply(lambda x: int(x.split("(")[-1].replace(")","").strip()) 
                                                            if "(" in x else np.nan)

## add features
dtf_products["date"] = dtf_products["date"].fillna(9999)
dtf_products["old"] = dtf_products["date"].apply(lambda x: 1 if x < 2000 else 0)

# Users
dtf_users = pd.read_excel("data_movies.xlsx", sheet_name="users").head(2000)

dtf_users["user"] = dtf_users["userId"].apply(lambda x: x-1)

dtf_users["timestamp"] = dtf_users["timestamp"].apply(lambda x: datetime.fromtimestamp(x))
dtf_users["daytime"] = dtf_users["timestamp"].apply(lambda x: 1 if 6<int(x.strftime("%H"))<20 else 0)
dtf_users["weekend"] = dtf_users["timestamp"].apply(lambda x: 1 if x.weekday() in [5,6] else 0)

dtf_users = dtf_users.merge(dtf_products[["movieId","product","name"]], how="left")
dtf_users = dtf_users.rename(columns={"rating":"y"})

dtf_products_use = dtf_products[["product","name","old","genres"]].set_index("product")
dtf_context = dtf_users[["user","product","daytime","weekend"]]

dtf_ratings = dtf_users[["user","product","y"]]



In [65]:
dtf_users.shape

(10000, 9)

In [None]:
tmp = dtf_ratings.copy()
dtf_up_pivot = tmp.pivot_table(index="user", columns="product", values="y")
missing_cols = list(set(dtf_products_use.index) - set(dtf_up_pivot.columns))
for col in missing_cols:
    dtf_up_pivot[col] = np.nan
dtf_up_pivot = dtf_up_pivot[sorted(dtf_up_pivot.columns)]

dtf_up_normed = pd.DataFrame(preprocessing.MinMaxScaler(feature_range=(0.5,1)).fit_transform(dtf_up_pivot.values), columns=dtf_up_pivot.columns, index=dtf_up_pivot.index)

In [48]:
dtf_up_normed

product,0,1,2,3,4,5,6,7,8,9,...,9731,9732,9733,9734,9735,9736,9737,9738,9739,9740
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.8,,0.750,,,0.750,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,0.8,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,,0.833333,,,,0.875,,,,,...,,,,,,,,,,
62,1.0,,,,,,,,,0.666667,...,,,,,,,,,,
63,0.8,,0.625,,,0.875,,,,,...,,,,,,,,,,
64,,,,,,,,,,,...,,,,,,,,,,


In [69]:
tags = [i.split("|") for i in dtf_products_use["genres"].unique()]
columns = list(set([i for lst in tags for i in lst]))
columns.remove('(no genres listed)')
for col in columns:
    dtf_products_use[col] = dtf_products_use["genres"].apply(lambda x: 1 if col in x else 0)

features = dtf_products_use.drop(["genres","name"], axis=1).columns
context = dtf_context.drop(["user","product"], axis=1).columns

# Recommend unrated movies to users:
unrated_df = dtf_up_normed.stack(dropna=False).reset_index().rename(columns={0:"y"}).sample(frac = 0.2)

## add features
unrated_df = unrated_df.merge(dtf_products_use.drop(["genres","name"], axis=1), how="left", left_on="product", right_index=True)

# add context
unrated_df[context] = 0 #--> simulate production for a weekday night


In [70]:
unrated_df.shape

(128581, 25)

In [76]:
# filename = 'finalized_model.sav'
# loaded_rs = tf.saved_model.load(filename)
loaded_model = load_model('model.h5')


In [86]:
unrated_to_recommend = unrated_df[unrated_df["y"].isna()]
unrated_to_recommend["yhat"] = loaded_model.predict([unrated_to_recommend["user"], unrated_to_recommend["product"], unrated_to_recommend[features], unrated_to_recommend[context]])

user_selections = 4
recommended_title = unrated_to_recommend[unrated_to_recommend["user"] == user_selections].sort_values(by=['yhat'],ascending=False).head(5)
final_result = recommended_title[["user","product"]].merge(dtf_products_use.reset_index()[["product","name","genres"]], how="inner", on="product")
final_result



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,user,product,name,genres
0,4,7212,Avatar,Action|Adventure|Sci-Fi|IMAX
1,4,7402,Dragon Ball Z: Dead Zone,Action|Adventure|Animation|Fantasy|Sci-Fi
2,4,31,Twelve Monkeys,Mystery|Sci-Fi|Thriller
3,4,7726,Abduction,Action|Drama|Mystery|Thriller
4,4,706,2001: A Space Odyssey,Adventure|Drama|Sci-Fi
