## Install libraries

In [None]:
!pip install -U mlfoundry
!pip install -U servicefoundry==0.1.69

## Load datasets

In [None]:
# load our datasets as dataframes
import pandas as pd

movie_meta_df = pd.read_csv('https://raw.githubusercontent.com/srihari-tf/recommender-system-tfy/master/movies_metadata.csv')
keywords_df = pd.read_csv('https://raw.githubusercontent.com/srihari-tf/recommender-system-tfy/master/keywords.csv')
ratings_df = pd.read_csv("https://raw.githubusercontent.com/srihari-tf/recommender-system-tfy/master/ratings_small.csv")

## Clean dataset

In [None]:
meta_df = movie_meta_df.copy()
meta_df.drop(['belongs_to_collection', 'homepage', 'tagline', 'poster_path', 'overview', 'imdb_id', 'spoken_languages'], inplace=True, axis=1)
meta_df.dropna(inplace=True)

In [None]:
import ast

column_changes = ['production_companies', 'production_countries', 'genres']
json_shrinker_dict = dict({'production_companies': list(), 'production_countries': list(), 'genres': list()})

for col in column_changes:
    if col == 'production_companies':
        for i in meta_df[col]:
            i = ast.literal_eval(i)
            if len(i) < 1:
                json_shrinker_dict['production_companies'].append(None)

            for element in i:
                json_shrinker_dict['production_companies'].append(element['name'])
                break
    elif col == 'production_countries':
        for i in meta_df[col]:
            i = ast.literal_eval(i)
            if len(i) < 1:
                json_shrinker_dict['production_countries'].append(None)
            for element in i:
                json_shrinker_dict['production_countries'].append(element['iso_3166_1'])
                break
    else:
        for i in meta_df[col]:
            i = ast.literal_eval(i)
            if len(i) < 1:
                json_shrinker_dict['genres'].append(None)

            for element in i:
                json_shrinker_dict['genres'].append(element['name'])
                break

In [None]:
for i in column_changes:
    meta_df[i] = json_shrinker_dict[i]

meta_df.dropna(inplace=True)
meta_df.head()

## Scale features for clustering

In [None]:
from sklearn.preprocessing import MinMaxScaler

scalar = MinMaxScaler()
scaled_df = meta_df[['budget', 'popularity', 'revenue', 'runtime', 'vote_average', 'vote_count']]
scaled = scalar.fit_transform(meta_df[['budget', 'popularity', 'revenue', 'runtime', 'vote_average', 'vote_count']])
scaled_df = pd.DataFrame(scaled, index=scaled_df.index, columns=scaled_df.columns)


## Set up MLFoundry to log params and metrics

In [None]:
import mlfoundry

mlfoundry.login()
client = mlfoundry.get_client()
MLF_PROJECT_NAME= "movie-clustering-jul-29-1"

## Find optimal number of clusters and log to MLFoundry

In [None]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score


clustering_df = scaled_df[['budget', 'popularity', 'revenue', 'runtime', 'vote_average', 'vote_count']]

for num_cluster in [5, 10, 15, 20, 25]:
    run = client.create_run(project_name=MLF_PROJECT_NAME, run_name=f"num-clusters-{num_cluster}")
    cluster_fit = KMeans(n_clusters=num_cluster, random_state=0).fit(clustering_df)
    score = silhouette_score(clustering_df, cluster_fit.labels_, metric='euclidean', sample_size=None, random_state=None)
    run.log_params({'num_clusters': num_cluster, 'centers': cluster_fit.cluster_centers_})
    run.log_metrics({'silhouette_score': score})

## Apply KMeans using optimal number of clusters

In [None]:
def apply_kmeans(df, clusters):
    kmeans = KMeans(n_clusters=clusters, random_state=0)
    cluster_labels = kmeans.fit(df).labels_
    string_labels = ["c{}".format(i) for i in cluster_labels]
    df['cluster_label'] = cluster_labels
    df['cluster_string'] = string_labels

    return df

apply_kmeans(scaled_df, 15)

## Train a collaborative filtering model using `implicit`

In [None]:
ratings_df['movieId'] = ratings_df['movieId'].astype("category")
ratings_df['userId'] = ratings_df['userId'].astype("category")

In [None]:
!pip install implicit

In [None]:
from scipy.sparse import coo_matrix
import implicit

In [None]:
r = coo_matrix((ratings_df['rating'], (ratings_df['userId'].cat.codes, ratings_df['movieId'].cat.codes)))

In [None]:
import implicit

model = implicit.als.AlternatingLeastSquares(factors=25)
model.fit(r)

In [None]:
user_category_to_code = dict([(category, code) for code, category in enumerate(ratings_df.userId.cat.categories)])

def get_movie_names_from_movie_category_codes(movie_cat_codes):
  ids = [ratings_df['movieId'].cat.categories[i] for i in movie_cat_codes]
  return list(movie_meta_df.loc[movie_meta_df['id'].isin([str(id) for id in ids])].original_title)

def get_recommendation_for_user(user_id):
  user_cat_code = user_category_to_code[user_id]
  return get_movie_names_from_movie_category_codes(model.recommend(user_cat_code, r.tocsr().getrow(user_cat_code))[0])
  

In [None]:
get_recommendation_for_user(11)

## Save the model and log it to MLFoundry

In [None]:
model.save('recommendation-model')


In [None]:
run = client.create_run(project_name=MLF_PROJECT_NAME, run_name=f"cf-model")
run.log_artifact('/content/recommendation-model.npz')


## Deploy the app using ServiceFoundry

In [None]:
import servicefoundry.core as sfy
sfy.login()

In [None]:
%%writefile predict.py
import mlfoundry
import pandas as pd
from scipy.sparse import coo_matrix
import implicit

movie_meta_df = pd.read_csv('https://raw.githubusercontent.com/srihari-tf/recommender-system-tfy/master/movies_metadata.csv')
ratings_df = pd.read_csv("https://raw.githubusercontent.com/srihari-tf/recommender-system-tfy/master/ratings_small.csv")

# only keep movies in ratings dataset
movie_meta_df = movie_meta_df[movie_meta_df['id'].isin(ratings_df['movieId'])]

ratings_df['movieId'] = ratings_df['movieId'].astype("category")
ratings_df['userId'] = ratings_df['userId'].astype("category")
r = coo_matrix((ratings_df['rating'], (ratings_df['userId'].cat.codes, ratings_df['movieId'].cat.codes)))

user_category_to_code = dict([(category, code) for code, category in enumerate(ratings_df.userId.cat.categories)])
movie_category_to_code = dict([(category, code) for code, category in enumerate(ratings_df.movieId.cat.categories)])

client = mlfoundry.get_client(api_key='<api-key>')
run = client.get_run('truefoundry/user-truefoundry/movie-clustering-jul-29-1/cf-model')
local_path = run.download_artifact('recommendation-model.npz')
model = implicit.als.AlternatingLeastSquares(factors=25).load(local_path)

def search_movie(name):
  return (movie_meta_df.loc[movie_meta_df['original_title'].str.contains(name, case=False)][['original_title', 'id']]).to_dict('records')

def find_similar_movie(movie_name):
  search_result =search_movie(movie_name)
  if len(search_result) > 0:
    movie_id = search_result[0]['id']
    movie_name = search_result[0]['original_title']
  else:
    return []
  movie_cat_code = movie_category_to_code[int(movie_id)]
  movie_cat_codes = model.similar_items(movie_cat_code)[0]
  ids = [ratings_df['movieId'].cat.categories[i] for i in movie_cat_codes]
  return movie_name, list(movie_meta_df.loc[movie_meta_df['id'].isin([str(id) for id in ids])].original_title)
 
def get_movie_names_from_movie_category_codes(movie_cat_codes):
  ids = [ratings_df['movieId'].cat.categories[i] for i in movie_cat_codes]
  return list(movie_meta_df.loc[movie_meta_df['id'].isin([str(id) for id in ids])].original_title)

def get_recommendation_for_user(user_id):
  user_cat_code = user_category_to_code[int(user_id)]
  movie_cat_codes = model.recommend(user_cat_code, r.tocsr().getrow(user_cat_code))[0]
  ids = [ratings_df['movieId'].cat.categories[i] for i in movie_cat_codes]
  return list(movie_meta_df.loc[movie_meta_df['id'].isin([str(id) for id in ids])].original_title)

In [None]:
reqs = sfy.gather_requirements("predict.py")

In [None]:
auto_service = sfy.Service("predict.py", reqs, sfy.Parameters(
    name="auto-service",
    workspace="v1:local:my-ws"
))
auto_service.deploy()

# APIs generated
![](https://github.com/srihari-tf/recommender-system-tfy/raw/master/assets/apis.png)

# Fetching similar movies
![](https://github.com/srihari-tf/recommender-system-tfy/raw/master/assets/get_reco.png)