In [78]:
import boto3
import json 
from datetime import datetime
import random 
import pandas as pd 
from recommender import Recommender
from helpers import fill_id, df_to_id_list, prep_data

import warnings;
warnings.filterwarnings('ignore')


class PythonPredictor:
    def __init__(self, config={}):
        """ Requires configuration from cortex.yaml """
         
        # When using s3 bucket to download the model
        # s3 = boto3.client("s3")
        # s3.download_file(config["bucket"], config["key"], "w2v_limitingfactor_v3.51.model")

        self.model = Recommender('models/w2v_limitingfactor_v3.51.model')
#         self.model.connect_db()
        pass

    def predict(self, payload=None): # recieves userid, outputs recommendation_id
        """Called once per request. Runs preprocessing of the request payload, inference, and postprocessing of the inference output. Required.

        Args:
            payload: The parsed JSON request payload.

        Returns:
            Prediction or a batch of predictions.
        """
        self.model.connect_db()
        user_id = payload
        
        """ testing with local lettboxd data """
#         ratings = pd.read_csv('exported_data/imdb/riley_imdb_ratings.csv', engine='python')
#         ratings = pd.read_csv('exported_data/letterboxd/riley/ratings.csv',  engine='python')
#         ratings = pd.read_csv('exported_data/letterboxd/riley/ratings_triple.csv',  engine='python')
#         ratings = pd.read_csv('exported_data/imdb/ratings.csv', engine='python')
#         ratings = pd.read_csv('exported_data/letterboxd/cooper/ratings.csv')
#         watched = pd.read_csv('exported_data/letterboxd/cooper/watched.csv')
#         watchlist = pd.read_csv('exported_data/letterboxd/cooper/watchlist.csv')
        
        
        
#         id_book = pd.read_csv('exported_data/title_basics_small.csv')
        
        self.model.cursor_dog.execute("SELECT date, name, year, letterboxd_uri, rating FROM user_letterboxd_ratings WHERE user_id=%s;", (user_id,))
        ratings_sql= self.model.cursor_dog.fetchall()
        ratings = pd.DataFrame(ratings_sql, columns = ['Date', 'Name', 'Year', 'Letterboxd URI', 'Rating'])
        ratings= ratings.dropna()
        

#         self.model.cursor_dog.execute("SELECT * FROM test_watchlist WHERE user_id=%s;", (user_id,))
#         watchlist_sql= self.model.cursor_dog.fetchall()
#         watchlist = pd.DataFrame(watchlist_sql, columns = ['Date', 'Name', 'Year', 'Letterboxd URI', 'user_id'])
#         watchlist = watchlist.dropna()
        

#         self.model.cursor_dog.execute("SELECT * FROM test_watched WHERE user_id=%s;", (user_id,))
#         watched_sql= self.model.cursor_dog.fetchall()
#         watched = pd.DataFrame(watched_sql, columns = ['Date', 'Name', 'Year', 'Letterboxd URI', 'user_id'])
#         watched = watched.dropna()
        

#         self.model.cursor_dog.execute("SELECT * FROM test_title_basics_small;")
#         title_basics_small_sql= self.model.cursor_dog.fetchall()
#         id_book = pd.DataFrame(title_basics_small_sql, columns = ['tconst', 'primaryTitle', 'originalTitle', 'startYear'])
#         id_book = id_book.dropna()
        
        """ Prepare data  """
        good_list, bad_list, hist_list, val_list, ratings_dict = prep_data(
                                    ratings, watched_df=None, watchlist_df=None, good_threshold=3, bad_threshold=2) 
        
        """ Load JSON into a list (if applicable) """ 
        # payload_jsonified = json.dumps(payload)
        # movie_dict = json.loads(payload_jsonified)
        # movie_list = list(movie_dict.values())
        
        """ Run prediction with parameters """
        
        predictions = self.model.predict(good_list, bad_list, hist_list, val_list, ratings_dict, n=20, harshness=4, rec_movies=True, scoring=True,)
        
        """ Turn predictions into JSON """
        
        names = ['Title', 'Year', 'IMDB URL', 'Average Rating', 'Number of Votes', 'Similarity Score', 'IMDB ID']
        names_lists = {key:[] for key in names}
        
        for x in range(0, len(predictions[0])):
            for y in range(0, len(predictions)):
                names_lists[names[x]].append(predictions[y][x])
                
        results_dict = [dict(zip(names_lists,t)) for t in zip(*names_lists.values())]
        json_data = json.dumps(results_dict)
        

        """ Commit to the database """
        recommendation_id = 1234
        query = "SELECT EXISTS(SELECT 1 FROM recommendations where recommendation_id=%s);" 
        self.model.cursor_dog.execute(query, (recommendation_id,))
        boolean = self.model.cursor_dog.fetchall()
        recommendation_json = json_data
        date = datetime.now()
        if boolean[0][0]: # True
            self.model.cursor_dog.close()
            self.model.connection.close()
            return "Already recommended", recommendation_json
        else:
            query = "INSERT INTO recommendations(user_id, recommendation_id, recommendation_json, date) VALUES (%s, %s, %s, %s);"
            self.model.cursor_dog.execute(query, (user_id, recommendation_id, recommendation_json, date))
            self.model.connection.commit()
            self.model.cursor_dog.close()
            self.model.connection.close()
            return "Recommendation committed to DB with id:", recommendation_id

In [79]:
predictor = PythonPredictor()

In [80]:
predictor.predict(1111) # works with letterboxd , can change code to with with imdb but results are weird, 

Connected!


('Already recommended',
 '[{"Title": "Mad Max: Fury Road", "Year": 2015, "IMDB URL": "https://www.imdb.com/title/tt1392190/", "Average Rating": 8.1, "Number of Votes": 815496, "Similarity Score": 0.697939932346344, "IMDB ID": "1392190"}, {"Title": "It", "Year": 2017, "IMDB URL": "https://www.imdb.com/title/tt1396484/", "Average Rating": 7.3, "Number of Votes": 417872, "Similarity Score": 0.658385157585144, "IMDB ID": "1396484"}, {"Title": "Enemy", "Year": 2013, "IMDB URL": "https://www.imdb.com/title/tt2316411/", "Average Rating": 6.9, "Number of Votes": 151426, "Similarity Score": 0.6434247493743896, "IMDB ID": "2316411"}, {"Title": "Ex Machina", "Year": 2014, "IMDB URL": "https://www.imdb.com/title/tt0470752/", "Average Rating": 7.7, "Number of Votes": 441653, "Similarity Score": 0.6405469179153442, "IMDB ID": "0470752"}, {"Title": "The Witch", "Year": 2015, "IMDB URL": "https://www.imdb.com/title/tt4263482/", "Average Rating": 6.9, "Number of Votes": 172468, "Similarity Score": 0.62

In [43]:
ratings = pd.read_csv('exported_data/letterboxd/riley/ratings_ten.csv',  engine='python')
ratings

Unnamed: 0,Date,Name,Year,Letterboxd URI,Rating
0,2020-02-26,Suspiria,2018,https://letterboxd.com/film/suspiria-2018/,2.5
1,2020-02-26,Very Ralph,2019,https://letterboxd.com/film/very-ralph/,5.0
2,2020-02-26,Enter the Void,2009,https://letterboxd.com/film/enter-the-void/,3.5
3,2020-02-26,Dogtooth,2009,https://letterboxd.com/film/dogtooth/,3.0
4,2020-02-26,Prisoners,2013,https://letterboxd.com/film/prisoners/,5.0
5,2020-02-26,Under the Skin,2013,https://letterboxd.com/film/under-the-skin-2013/,3.0
6,2020-02-26,Melancholia,2011,https://letterboxd.com/film/melancholia/,3.0
7,2020-02-26,Beyond the Black Rainbow,2010,https://letterboxd.com/film/beyond-the-black-r...,2.5
8,2020-02-26,I Saw the Devil,2010,https://letterboxd.com/film/i-saw-the-devil/,3.5


In [21]:
predictor.model.cursor_dog.execute("SELECT date, name, year, letterboxd_uri, rating FROM user_letterboxd_ratings WHERE user_id=%s;", (1111,))
ratings_sql= predictor.model.cursor_dog.fetchall()
ratings = pd.DataFrame(ratings_sql, columns = ['Date', 'Name', 'Year', 'Letterboxd URI', 'Rating'])
ratings= ratings.dropna()

In [27]:
ratings.dtypes

Date               object
Name               object
Year                int64
Letterboxd URI     object
Rating            float64
dtype: object

In [67]:
predictor.model.connect_db()
recommendation_id = 1234
query = "SELECT EXISTS(SELECT 1 FROM recommendations where recommendation_id=%s);" 
predictor.model.cursor_dog.execute(query, (recommendation_id,))
boolean = predictor.model.cursor_dog.fetchall()
predictor.model.cursor_dog.close()
predictor.model.connection.close()
boolean

Connected!


[(True,)]

In [74]:
boolean[0][0]==False

False

In [26]:
predictor.predict() # works with letterboxd , can change code to with with imdb but results are weird, 

Connected!


TypeError: cannot unpack non-iterable numpy.float64 object

In [39]:
import psycopg2
import os 
from dotenv import load_dotenv
load_dotenv()
    
""" test DB credentials """
connection = psycopg2.connect(
    database  = os.getenv("DB_NAME"),
    user      = os.getenv("DB_USER"),
    password  = os.getenv("DB_PASSWORD"),
    host      = os.getenv("DEV"),
    port      = os.getenv("PORT")
)

try:
    c = connection.cursor()
    print("Connected!")
    c.close()
    connection.close()
except Exception as e:
    print("Connection problem chief!\n")
    print(e)

Connected!


In [30]:
predictor.model.cursor_dog

<connection object at 0x0000015FA8E8B378; dsn: 'user=postgres password=xxx dbname=postgres host=groadb-prod.cbayt2opbptw.us-east-1.rds.amazonaws.com port=5432', closed: 1>

In [16]:
predictor.model.connection

<connection object at 0x000001332F11EBF8; dsn: 'user=postgres password=xxx dbname=postgres host=groadb-dev.cbayt2opbptw.us-east-1.rds.amazonaws.com port=5432', closed: 1>

In [None]:
predictor.model.cursor_dog.close()

In [14]:
predictor.model.connection.close()