<a href="https://colab.research.google.com/github/rreichhard/Data_Science_Portfolio/blob/main/Movie%20Recommender%20Hybrid/Movie_Recommender_API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Citation

#### Dataset retrived from: F. Maxwell Harper and Joseph A. Konstan. 2015. The MovieLens Datasets: History and Context. ACM Transactions on Interactive Intelligent Systems (TiiS) 5, 4: 19:1–19:19. https://doi.org/10.1145/2827872




In [156]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [157]:
# Import libraries

import pandas as pd
import numpy as np
from lightfm import LightFM
from lightfm.cross_validation import random_train_test_split
from lightfm.data import Dataset
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score

In [158]:
ratings_df = pd.read_csv('/content/drive/My Drive/ratings.csv')
links_df = pd.read_csv('/content/drive/My Drive/links.csv')
movies_df = pd.read_csv('/content/drive/My Drive/movies.csv')
# tags_df = pd.read_csv('tags.csv')

In [159]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [160]:
ratings_df.dtypes

userId         int64
movieId        int64
rating       float64
timestamp      int64
dtype: object

In [161]:
links_df.head()

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0


In [162]:
movies_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [163]:
df = pd.merge(ratings_df, movies_df, how='left', on=['movieId'])

In [164]:
df = df[['userId','movieId','rating','genres']]

In [165]:
df.head()

Unnamed: 0,userId,movieId,rating,genres
0,1,1,4.0,Adventure|Animation|Children|Comedy|Fantasy
1,1,3,4.0,Comedy|Romance
2,1,6,4.0,Action|Crime|Thriller
3,1,47,5.0,Mystery|Thriller
4,1,50,5.0,Crime|Mystery|Thriller


In [166]:
df.dtypes

userId       int64
movieId      int64
rating     float64
genres      object
dtype: object

In [167]:
df['userId'] = df['userId'].astype(str)
df['movieId'] = df['movieId'].astype(str)

In [168]:
df.dtypes

userId      object
movieId     object
rating     float64
genres      object
dtype: object

In [169]:
# df = df[df['rating'] > 3.0]

In [170]:
# df.head()

In [171]:
import itertools

movie_genre = [x.split("|") for x in df['genres']]
all_genres = sorted(list(set(itertools.chain.from_iterable(movie_genre))))

In [172]:
dataset = Dataset()

In [173]:
dataset.fit(users=df['userId'],
           items=df['movieId'])

In [174]:
num_users, num_topics = dataset.interactions_shape()
print(f'Num users: {num_users}, num_topics: {num_topics}.')

Num users: 610, num_topics: 9724.


In [175]:
dataset.fit_partial(users=df['userId'],
           items=df['movieId'],
            item_features=all_genres)

In [176]:
(interactions, _) = dataset.build_interactions(df.iloc[:, 0:3].values)

In [177]:
item_features = dataset.build_item_features((x, y) for x, y in zip(df.movieId, movie_genre))

In [178]:
labels = np.array(df['movieId'])

In [179]:
model = LightFM(loss='bpr')

In [180]:
(train, test) = random_train_test_split(interactions=interactions, test_percentage=0.2)

In [181]:
model.fit(train, item_features=item_features, epochs=10)

<lightfm.lightfm.LightFM at 0x7fe4af9c0580>

In [182]:
## model performnce evaluation

train_precision = precision_at_k(model, train,item_features=item_features, k=10).mean()
test_precision = precision_at_k(model, test,item_features=item_features, k=10).mean()

train_auc = auc_score(model, train, item_features=item_features).mean()
test_auc = auc_score(model, test, item_features=item_features).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))


Precision: train 0.13, test 0.03.
AUC: train 0.74, test 0.70.


In [183]:
model = LightFM(loss='warp')

In [184]:
model.fit_partial(train, item_features=item_features, epochs=10)

<lightfm.lightfm.LightFM at 0x7fe4b3522f20>

In [185]:
## model performnce evaluation

train_precision = precision_at_k(model, train,item_features=item_features, k=10).mean()
test_precision = precision_at_k(model, test,item_features=item_features, k=10).mean()

train_auc = auc_score(model, train, item_features=item_features).mean()
test_auc = auc_score(model, test,item_features=item_features).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))


Precision: train 0.22, test 0.05.
AUC: train 0.89, test 0.87.


In [186]:
# import requests,json,csv,os

In [187]:
# api_key = ''
# query = 'https://api.themoviedb.org/3/movie/'+'464052'+'?api_key='+api_key+'&language=en-US'

In [188]:
# response = requests.get(query)
# array = response.json()
# print("Print each key-value pair from JSON Response")
# for key, value in array.items():
#   print(key, ":", value)

In [189]:
# def get_data(api_key, movie_id):
#     query = 'https://api.themoviedb.org/3/movie/'+movie_id+'?api_key='+api_key+'&language=en-US'
#     response = requests.get(query)
#     if response.status_code==200:
#         array = response.json()
#         text = json.dumps(array)
#         return text
#     else:
#         return ('error')

In [190]:
# movie_list = ['464052','508442']
# for movie in movie_list:
#     text = get_data(api_key, movie)
#     if text == "error":
#         break
#     print(text)

In [191]:
# type(text)

In [192]:
# def sample_recommendation(model, data, user_ids):

#     n_users, n_items = data.shape

#     #build a structure to store user scores for each item
#     all_scores = np.empty(shape=(0,n_items))

#     #iterate through the group and build the scores
#     for user_id in user_ids:
#         #known_positives = labels[data.tocsr()[user_id].indices]

#         scores = model.predict(user_id,np.arange(n_items),item_features)

#         top_items_for_user = labels[np.argsort(-scores)]
#         print("Top Recommended Movies For User: ", user_id)
#         for x in top_items_for_user[:3]:
#             print("     %s" % x)

#         #vertically stack the user scores (items are columns)
#         all_scores = np.vstack((all_scores, scores))
#         #print(all_top_items)

#     #compute the average rating for each item in the group
#     item_averages = np.mean(all_scores.astype(np.float), axis=0)
#     top_items_for_group = labels[np.argsort(-item_averages)]

#     print("Top Recommended Movies for Group:")

#     for x in top_items_for_group[:3]:
#         print("     %s" % x)


In [193]:
# #fetch user_ids of users in group
# group = [3,26,451,23,24,25]


In [194]:

# #sample recommendations for the group
# sample_recommendation(model, interactions, group)