<a href="https://colab.research.google.com/github/rreichhard/Data_Science_Portfolio/blob/main/Movie%20Recommender%20Hybrid/Movie_Recommender_API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Citation

#### Dataset retrived from: F. Maxwell Harper and Joseph A. Konstan. 2015. The MovieLens Datasets: History and Context. ACM Transactions on Interactive Intelligent Systems (TiiS) 5, 4: 19:1–19:19. https://doi.org/10.1145/2827872




In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
pip install lightfm



In [35]:
# Import libraries

import pandas as pd
import numpy as np
from lightfm import LightFM
from lightfm.cross_validation import random_train_test_split
from lightfm.data import Dataset

In [4]:
ratings_df = pd.read_csv('/content/drive/My Drive/ratings.csv')
links_df = pd.read_csv('/content/drive/My Drive/links.csv')
movies_df = pd.read_csv('/content/drive/My Drive/movies.csv')
# tags_df = pd.read_csv('tags.csv')

In [5]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [6]:
ratings_df.dtypes

userId         int64
movieId        int64
rating       float64
timestamp      int64
dtype: object

In [7]:
links_df.head()

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0


In [8]:
movies_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [9]:
df = pd.merge(ratings_df, movies_df, how='left', on=['movieId'])

In [10]:
df = df[['userId','movieId','rating','genres']]

In [11]:
df.head()

Unnamed: 0,userId,movieId,rating,genres
0,1,1,4.0,Adventure|Animation|Children|Comedy|Fantasy
1,1,3,4.0,Comedy|Romance
2,1,6,4.0,Action|Crime|Thriller
3,1,47,5.0,Mystery|Thriller
4,1,50,5.0,Crime|Mystery|Thriller


In [12]:
df.dtypes

userId       int64
movieId      int64
rating     float64
genres      object
dtype: object

In [13]:
df['userId'] = df['userId'].astype(str)
df['movieId'] = df['movieId'].astype(str)

In [14]:
df.dtypes

userId      object
movieId     object
rating     float64
genres      object
dtype: object

In [15]:
df.shape

(100836, 4)

In [16]:
ratings_df.shape

(100836, 4)

In [24]:
import itertools

movie_genre = [x.split("|") for x in df['genres']]
all_genres = sorted(list(set(itertools.chain.from_iterable(movie_genre))))

In [25]:
dataset = Dataset()

In [26]:
dataset.fit(users=df['userId'],
           items=df['movieId'])

In [27]:
num_users, num_topics = dataset.interactions_shape()
print(f'Num users: {num_users}, num_topics: {num_topics}.')

Num users: 610, num_topics: 9724.


In [28]:
dataset.fit_partial(users=df['userId'],
           items=df['movieId'],
            item_features=all_genres)

In [29]:
(interactions, _) = dataset.build_interactions(df.iloc[:, 0:3].values)

In [30]:
item_features = dataset.build_item_features((x, y) for x, y in zip(df.movieId, movie_genre))

In [32]:
labels = np.array(df['movieId'])

In [33]:
model = LightFM(loss='warp')

In [36]:
(train, test) = random_train_test_split(interactions=interactions, test_percentage=0.2)

In [37]:
model.fit(train, item_features=item_features, epochs=5)

<lightfm.lightfm.LightFM at 0x7d74541ef880>

In [44]:
import requests,json,csv,os

In [53]:
api_key = '241c75c800196217b1b87be05d7e3cc0'
query = 'https://api.themoviedb.org/3/movie/'+'464052'+'?api_key='+api_key+'&language=en-US'

In [54]:
response = requests.get(query)
array = response.json()
print("Print each key-value pair from JSON Response")
for key, value in array.items():
  print(key, ":", value)

Print each key-value pair from JSON Response
adult : False
backdrop_path : /egg7KFi18TSQc1s24RMmR9i2zO6.jpg
belongs_to_collection : {'id': 468552, 'name': 'Wonder Woman Collection', 'poster_path': '/8AQRfTuTHeFTddZN4IUAqprN8Od.jpg', 'backdrop_path': '/n9KlvCOBFDmSyw3BgNrkUkxMFva.jpg'}
budget : 200000000
genres : [{'id': 28, 'name': 'Action'}, {'id': 12, 'name': 'Adventure'}, {'id': 14, 'name': 'Fantasy'}]
homepage : https://www.warnerbros.com/movies/wonder-woman-1984
id : 464052
imdb_id : tt7126948
original_language : en
original_title : Wonder Woman 1984
overview : A botched store robbery places Wonder Woman in a global battle against a powerful and mysterious ancient force that puts her powers in jeopardy.
popularity : 66.106
poster_path : /8UlWHLMpgZm9bx6QYh0NFoq67TZ.jpg
production_companies : [{'id': 174, 'logo_path': '/IuAlhI9eVC9Z8UQWOIDdWRKSEJ.png', 'name': 'Warner Bros. Pictures', 'origin_country': 'US'}, {'id': 114152, 'logo_path': None, 'name': 'The Stone Quarry', 'origin_cou

In [55]:
def get_data(api_key, movie_id):
    query = 'https://api.themoviedb.org/3/movie/'+movie_id+'?api_key='+api_key+'&language=en-US'
    response = requests.get(query)
    if response.status_code==200:
        array = response.json()
        text = json.dumps(array)
        return text
    else:
        return ('error')

In [58]:
movie_list = ['464052','508442']
for movie in movie_list:
    text = get_data(api_key, movie)
    if text == "error":
        break
    print(text)

{"adult": false, "backdrop_path": "/egg7KFi18TSQc1s24RMmR9i2zO6.jpg", "belongs_to_collection": {"id": 468552, "name": "Wonder Woman Collection", "poster_path": "/8AQRfTuTHeFTddZN4IUAqprN8Od.jpg", "backdrop_path": "/n9KlvCOBFDmSyw3BgNrkUkxMFva.jpg"}, "budget": 200000000, "genres": [{"id": 28, "name": "Action"}, {"id": 12, "name": "Adventure"}, {"id": 14, "name": "Fantasy"}], "homepage": "https://www.warnerbros.com/movies/wonder-woman-1984", "id": 464052, "imdb_id": "tt7126948", "original_language": "en", "original_title": "Wonder Woman 1984", "overview": "A botched store robbery places Wonder Woman in a global battle against a powerful and mysterious ancient force that puts her powers in jeopardy.", "popularity": 66.106, "poster_path": "/8UlWHLMpgZm9bx6QYh0NFoq67TZ.jpg", "production_companies": [{"id": 174, "logo_path": "/IuAlhI9eVC9Z8UQWOIDdWRKSEJ.png", "name": "Warner Bros. Pictures", "origin_country": "US"}, {"id": 114152, "logo_path": null, "name": "The Stone Quarry", "origin_countr

In [59]:
type(text)

str

In [41]:
def sample_recommendation(model, data, user_ids):

    n_users, n_items = data.shape

    #build a structure to store user scores for each item
    all_scores = np.empty(shape=(0,n_items))

    #iterate through the group and build the scores
    for user_id in user_ids:
        #known_positives = labels[data.tocsr()[user_id].indices]

        scores = model.predict(user_id,np.arange(n_items),item_features)

        top_items_for_user = labels[np.argsort(-scores)]
        print("Top Recommended Movies For User: ", user_id)
        for x in top_items_for_user[:3]:
            print("     %s" % x)

        #vertically stack the user scores (items are columns)
        all_scores = np.vstack((all_scores, scores))
        #print(all_top_items)

    #compute the average rating for each item in the group
    item_averages = np.mean(all_scores.astype(np.float), axis=0)
    top_items_for_group = labels[np.argsort(-item_averages)]

    print("Top Recommended Movies for Group:")

    for x in top_items_for_group[:3]:
        print("     %s" % x)


In [42]:
#fetch user_ids of users in group
group = [3,26,451,23,24,25]


In [43]:

#sample recommendations for the group
sample_recommendation(model, interactions, group)

Top Recommended Movies For User:  3
     2858
     8907
     333
Top Recommended Movies For User:  26
     247
     500
     2485
Top Recommended Movies For User:  451
     3508
     1676
     2641
Top Recommended Movies For User:  23
     1263
     89745
     260
Top Recommended Movies For User:  24
     1263
     89745
     99114
Top Recommended Movies For User:  25
     5891
     432
     7169
Top Recommended Movies for Group:
     1263
     89745
     1198
     1291
     260


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  item_averages = np.mean(all_scores.astype(np.float), axis=0)
