In [1]:
cd ../

/Users/scottcronin/gh/recommender_deployed


In [41]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())
import pandas as pd
import numpy as np
import scipy.sparse as scs
from lightfm import LightFM
from tqdm import tqdm, tqdm_notebook
import time
from app.preprocess import Preprocessor
from app.models import FM



import os
import tmdbsimple as tmdb
tmdb.API_KEY = os.environ['TMDB_API_KEY']

In [3]:
interactions = pd.read_csv('data/ratings.dat',
                           sep='::', engine='python',
                           header=None,
                           names=['uid', 'iid', 'rating', 'timestamp'],
                           usecols=['uid', 'iid', 'rating'],
                          )
display(interactions.sample(5))
print('Shape: {:>9,} x {}'.format(*interactions.shape))

Unnamed: 0,uid,iid,rating
6081280,43413,3033,3.0
8097261,58074,7162,4.0
3912973,28041,1275,5.0
8742337,62572,25,4.0
5738520,40992,1483,3.0


Shape: 10,000,054 x 3


In [7]:
links = pd.read_csv('data/links.csv')
display(links.sample(5))
print('Shape: {:>9,} x {}'.format(*links.shape))

Unnamed: 0,movieId,imdbId,tmdbId
41557,165727,3760966,333665.0
6890,7001,77745,11850.0
24273,113715,3605002,259761.0
7945,8628,114371,54022.0
32496,140269,338109,80219.0


Shape:    45,843 x 3


In [8]:
pp = Preprocessor(min_rating=4.0)
csr = pp.fit_transform(interactions)

In [10]:
lfm = LightFM(no_components=30, loss='warp', learning_rate=0.05)
fm = FM(pp, lfm)

# Generate posters for each movieId in dataset

In [38]:
movieIds = pd.DataFrame(interactions.iid.unique(), columns=['movieId'])
m = movieIds.merge(links[['movieId', 'tmdbId']], how='left').dropna().astype('int64')
m.head(4)

Unnamed: 0,movieId,tmdbId
0,122,11066
1,185,1642
2,231,8467
3,292,6950


In [45]:
posters = []

for i, movie in tqdm_notebook(m.iterrows(), total=10634):
    time.sleep(0.5)
    try:
        _id = movie['tmdbId']
        poster_path = tmdb.Movies(_id).info()['poster_path']
    except:
        poster_path = 'error'    
    posters.append(poster_path)




In [46]:
posters[-5:]

['/fJKn2IEgvtOU7LwO7zNHkLDzc3m.jpg',
 '/23UKOJp4gKPuJR3vjp3ueRUJJYr.jpg',
 '/mEKMsQgaDHjB0XAwL4odTJChJfe.jpg',
 None,
 '/vgV4BPVeMLCa4sEvmT7PTt7fx4N.jpg']

In [49]:
m['poster_path'] = posters

In [50]:
m.to_csv('data/movie_poster_urls.csv', index=False)

In [54]:
m['url_base'] = 'https://image.tmdb.org/t/p/w600_and_h900_bestv2/'

In [55]:
m['poster_url'] = m['url_base'] + m['poster_path']

In [59]:
for url in m.sample(5).poster_url.tolist():
    print(url)

https://image.tmdb.org/t/p/w600_and_h900_bestv2//4viJcRFgF4cCPnJWgpvb3CRd2pK.jpg
https://image.tmdb.org/t/p/w600_and_h900_bestv2//hXa5ArW1Llu4SOqPWRQ7dzCDyOH.jpg
https://image.tmdb.org/t/p/w600_and_h900_bestv2//nacr1Xj8tJroyVJKzPtdtgApphj.jpg
https://image.tmdb.org/t/p/w600_and_h900_bestv2//yyCXkzmi8jChDD6qSmhD2QVs1E.jpg
https://image.tmdb.org/t/p/w600_and_h900_bestv2//mB0KF5T2s6raTjiV676Umd8ciE0.jpg
