In [1]:
import numpy as np
import pandas as pd
import heapq
from math import floor

In [2]:
from surprise import Reader, Dataset
from surprise import KNNWithMeans
from surprise.model_selection import cross_validate

In [3]:
import pickle
import os
os_path = os.path.join(os.path.expanduser('~'))
PATH = os_path+'/mldl/models/'
filename = PATH+'movielens_light_recommender_model.sav'
ml_recommender = pickle.load(open(filename, 'rb'))

In [4]:
genres = ['Action','Adventure','Animation',
          'Children','Comedy','Crime',
          'Documentary','Drama','Fantasy',
          'Film-Noir','Horror','Musical',
          'Mystery','Romance','Sci-Fi',
          'Thriller','War','Western']

In [5]:
movies = pd.read_csv('datasets/movies.dat', delimiter='::')
ratings = pd.read_csv('datasets/ratings.dat', delimiter='::').drop_duplicates(['movieid', 'userid'], keep='last')
users = pd.read_csv('datasets/users.dat', delimiter='::')

  """Entry point for launching an IPython kernel.
  
  This is separate from the ipykernel package so we can avoid doing imports until


### Inference

The main function to run the model and get inferences

In [12]:
def get_r(user_id):
    # Select which system to use. Due to memory constraints, item based is the only viable option
    recommender_system = ml_recommender
    # N will represent how many items to recommend
    N = 2000
    
    # The setting to a set and back to list is a failsafe.
    rated_items = list(set(ratings.loc[ratings['userid'] == user_id]['movieid'].tolist()))
    ratings_list = movies['movieid'].values.tolist()
    reduced_ratings = ratings.loc[ratings['movieid'].isin(ratings_list)].copy()
    
    # Self explanitory name
    all_item_ids = list(set(reduced_ratings['movieid'].tolist()))
    
    # New_items just represents all the items not rated by the user
    new_items = [x for x in all_item_ids if x not in rated_items]
    
    # Estimate ratings for all unrated items
    predicted_ratings = {}
    for item_id in new_items:
        predicted_ratings[item_id] = recommender_system.predict(user_id, item_id).est
        pass
    
    # Get the item_ids for the top ratings
    recommended_ids = heapq.nlargest(N, predicted_ratings, key=predicted_ratings.get)
    recommended_ids = sorted(recommended_ids)
    
    # predicted_ratings
    recommended_df = movies.loc[movies['movieid'].isin(recommended_ids)].copy()
    #recommended_df.insert(1, 'pred_rating', np.zeros(len(recommended_ids)))
    recommended_df.insert(1, 'pred_rating', 0)
    
    # recommended_df = movies.copy()
    for idx,item_id in enumerate(recommended_ids):
        recommended_df.iloc[idx, recommended_df.columns.get_loc('pred_rating')] = int(predicted_ratings[item_id])
        pass
    return recommended_df.head(N).sort_values('pred_rating', ascending=False)

def set_up_ml(user_id,genre_list):
    ml_list = get_r(user_id)
    cols = ml_list.columns
    f_list = pd.DataFrame(columns = cols)
    
    items = genre_list.split(',')
    s_ = ''
    for i in items:
        str_ = f'(?=.*{i})'
        s_ += str_
    s_
    
    for j in range(0,len(items)):
        b_list = ml_list.copy()
        b_list = b_list[b_list['genres'].str.contains(items[j])]
        f_list = f_list.append(b_list, sort=False, ignore_index=True)
    f_list = f_list[f_list['genres'].str.contains(fr'^\b{s_}\b',regex=True)]
    #f_list.pop('movieid')
    #f_list.pop('pred_rating')
    #f_list = f_list.reset_index(drop=True)
    #f_list = f_list.T.reset_index(drop=True).T
    return f_list

def mk_tbl(rows):
    #this is for creating dynamic tables
    rows.pop()
    return arr

### Get a Recommendation Based on Genres

The final code that will be impletented in a cleaner fashion through the browser interface.

In [13]:
user_id = 4168
genre_list = 'Sci-Fi,Horror'
table_list = set_up_ml(user_id,genre_list)

In [14]:
table_list = table_list[table_list['genres'].str.contains('Horror')]

In [15]:
table_list.head(10)

Unnamed: 0,movieid,pred_rating,title,genres
2,1692,3,Alien Escape (1995),Horror|Sci-Fi
3,1690,3,Alien: Resurrection (1997),Action|Horror|Sci-Fi
11,1215,3,Army of Darkness (1993),Action|Adventure|Comedy|Horror|Sci-Fi
12,1214,3,Alien (1979),Action|Horror|Sci-Fi|Thriller
35,1334,3,"Blob, The (1958)",Horror|Sci-Fi
38,1320,3,Alien� (1992),Action|Horror|Sci-Fi|Thriller
58,2256,3,Parasite (1982),Horror|Sci-Fi
63,1862,3,Species II (1998),Horror|Sci-Fi
66,1762,3,Deep Rising (1998),Action|Horror|Sci-Fi
71,1924,3,Plan 9 from Outer Space (1958),Horror|Sci-Fi


In [32]:
for i in range(len(table_list)) : 
  print(table_list['title'],table_list['genres']) 

6                                    Jumanji (1995)
17                           Pagemaster, The (1994)
20                               Dragonheart (1996)
21                                 Space Jam (1996)
26              20,000 Leagues Under the Sea (1954)
30                  Escape to Witch Mountain (1975)
36     Willy Wonka and the Chocolate Factory (1971)
54             Kid in King Arthur's Court, A (1995)
55        Star Wars: Episode IV - A New Hope (1977)
58                   Kids of the Round Table (1995)
59               Indian in the Cupboard, The (1995)
67                NeverEnding Story III, The (1994)
84                                   Jumanji (1995)
88                           Pagemaster, The (1994)
89                               Dragonheart (1996)
90                                 Space Jam (1996)
91              20,000 Leagues Under the Sea (1954)
92                  Escape to Witch Mountain (1975)
94     Willy Wonka and the Chocolate Factory (1971)
96          

In [120]:
table_list.shape

(8, 2)

In [106]:
l = table_list.shape[0]
l

8

In [117]:
for i in range(0,table_list.shape[0]):
    for r in table_list:
        print(table_list.loc[i,r])

Nutty Professor, The (1996)
Comedy|Fantasy|Romance|Sci-Fi
20,000 Leagues Under the Sea (1954)
Adventure|Children's|Fantasy|Sci-Fi
E.T. the Extra-Terrestrial (1982)
Children's|Drama|Fantasy|Sci-Fi
Star Wars: Episode IV - A New Hope (1977)
Action|Adventure|Fantasy|Sci-Fi
Nutty Professor, The (1996)
Comedy|Fantasy|Romance|Sci-Fi
20,000 Leagues Under the Sea (1954)
Adventure|Children's|Fantasy|Sci-Fi
E.T. the Extra-Terrestrial (1982)
Children's|Drama|Fantasy|Sci-Fi
Star Wars: Episode IV - A New Hope (1977)
Action|Adventure|Fantasy|Sci-Fi


In [93]:
table_list.iloc[1,0]

'20,000 Leagues Under the Sea (1954)'

In [33]:
t_list = table_list.to_numpy()

In [36]:
t_list[1]

array(['Pagemaster, The (1994)',
       "Action|Adventure|Animation|Children's|Fantasy"], dtype=object)

In [38]:
for i in t_list:
    print(t_list[i])

TypeError: 'int' object is not iterable

In [23]:
j = len(t_list)-1
for i,r in t_list:
    print(i,' BS ',r)
    #for i in range(0,11):
        #print(i)
        #print(table_list.iloc[r,i])

Jumanji (1995)  BS  Adventure|Children's|Fantasy
Pagemaster, The (1994)  BS  Action|Adventure|Animation|Children's|Fantasy
Dragonheart (1996)  BS  Action|Adventure|Fantasy
Space Jam (1996)  BS  Adventure|Animation|Children's|Comedy|Fantasy
20,000 Leagues Under the Sea (1954)  BS  Adventure|Children's|Fantasy|Sci-Fi
Escape to Witch Mountain (1975)  BS  Adventure|Children's|Fantasy
Willy Wonka and the Chocolate Factory (1971)  BS  Adventure|Children's|Comedy|Fantasy
Kid in King Arthur's Court, A (1995)  BS  Adventure|Children's|Comedy|Fantasy|Romance
Star Wars: Episode IV - A New Hope (1977)  BS  Action|Adventure|Fantasy|Sci-Fi
Kids of the Round Table (1995)  BS  Adventure|Children's|Fantasy
Indian in the Cupboard, The (1995)  BS  Adventure|Children's|Fantasy
NeverEnding Story III, The (1994)  BS  Adventure|Children's|Fantasy
Jumanji (1995)  BS  Adventure|Children's|Fantasy
Pagemaster, The (1994)  BS  Action|Adventure|Animation|Children's|Fantasy
Dragonheart (1996)  BS  Action|Adventure|

In [164]:
str(t_list[0,0])

'Cemetery Man (Dellamorte Dellamore) (1994)'

In [135]:
for index, row in table_list.iterrows():
    print(row['title'], row['genres'])

Cemetery Man (Dellamorte Dellamore) (1994) Comedy|Horror
Frighteners, The (1996) Comedy|Horror
Dracula: Dead and Loving It (1995) Comedy|Horror
From Dusk Till Dawn (1996) Action|Comedy|Crime|Horror|Thriller
Serial Mom (1994) Comedy|Crime|Horror
Tales from the Hood (1995) Comedy|Horror
Cemetery Man (Dellamorte Dellamore) (1994) Comedy|Horror
Frighteners, The (1996) Comedy|Horror
Dracula: Dead and Loving It (1995) Comedy|Horror
From Dusk Till Dawn (1996) Action|Comedy|Crime|Horror|Thriller
Serial Mom (1994) Comedy|Crime|Horror
Tales from the Hood (1995) Comedy|Horror


In [107]:
table_list

Unnamed: 0,movieid,pred_rating,title,genres
1,735,3,Cemetery Man (Dellamorte Dellamore) (1994),Comedy|Horror
3,799,3,"Frighteners, The (1996)",Comedy|Horror
20,12,3,Dracula: Dead and Loving It (1995),Comedy|Horror
21,70,3,From Dusk Till Dawn (1996),Action|Comedy|Crime|Horror|Thriller
25,532,3,Serial Mom (1994),Comedy|Crime|Horror
28,330,3,Tales from the Hood (1995),Comedy|Horror
38,735,3,Cemetery Man (Dellamorte Dellamore) (1994),Comedy|Horror
65,799,3,"Frighteners, The (1996)",Comedy|Horror
235,12,3,Dracula: Dead and Loving It (1995),Comedy|Horror
239,70,3,From Dusk Till Dawn (1996),Action|Comedy|Crime|Horror|Thriller


In [68]:
df = table_list
gt = 'Horror|Comedy'

In [69]:
table_list[table_list['genres'].str.contains(gt)].head(2)

Unnamed: 0,movieid,pred_rating,title,genres
0,742,3,Thinner (1996),Horror|Thriller
1,735,3,Cemetery Man (Dellamorte Dellamore) (1994),Comedy|Horror


In [70]:
df[(df['genres'].str.contains('Horror')) & (df['genres'].str.contains('Comedy'))]

Unnamed: 0,movieid,pred_rating,title,genres
1,735,3,Cemetery Man (Dellamorte Dellamore) (1994),Comedy|Horror
3,799,3,"Frighteners, The (1996)",Comedy|Horror
20,12,3,Dracula: Dead and Loving It (1995),Comedy|Horror
21,70,3,From Dusk Till Dawn (1996),Action|Comedy|Crime|Horror|Thriller
25,532,3,Serial Mom (1994),Comedy|Crime|Horror
28,330,3,Tales from the Hood (1995),Comedy|Horror
38,735,3,Cemetery Man (Dellamorte Dellamore) (1994),Comedy|Horror
65,799,3,"Frighteners, The (1996)",Comedy|Horror
235,12,3,Dracula: Dead and Loving It (1995),Comedy|Horror
239,70,3,From Dusk Till Dawn (1996),Action|Comedy|Crime|Horror|Thriller


In [92]:
s = ''
for i in items:
    str_ = f'(?=.*{i})'
    s += str_
s

'(?=.*Horror)(?=.*Comedy)'

In [78]:
base = r'^{}'
expr = '(?=.*{})'
words = ['Horror','Comedy']  # example
base.format(''.join(expr.format(w) for w in words))

'^(?=.*Horror)(?=.*Comedy)'

In [101]:
df[df['genres'].str.contains(fr'^\b{s}\b',regex=True)]

Unnamed: 0,movieid,pred_rating,title,genres
1,735,3,Cemetery Man (Dellamorte Dellamore) (1994),Comedy|Horror
3,799,3,"Frighteners, The (1996)",Comedy|Horror
20,12,3,Dracula: Dead and Loving It (1995),Comedy|Horror
21,70,3,From Dusk Till Dawn (1996),Action|Comedy|Crime|Horror|Thriller
25,532,3,Serial Mom (1994),Comedy|Crime|Horror
28,330,3,Tales from the Hood (1995),Comedy|Horror
38,735,3,Cemetery Man (Dellamorte Dellamore) (1994),Comedy|Horror
65,799,3,"Frighteners, The (1996)",Comedy|Horror
235,12,3,Dracula: Dead and Loving It (1995),Comedy|Horror
239,70,3,From Dusk Till Dawn (1996),Action|Comedy|Crime|Horror|Thriller


In [87]:
df[df['genres'].str.contains(r'\b(?:{})\b'.format(words))]

Unnamed: 0,movieid,pred_rating,title,genres
34,1,3,Toy Story (1995),Animation|Children's|Comedy
55,810,3,Kazaam (1996),Children's|Comedy|Fantasy
63,837,3,Matilda (1996),Children's|Comedy
64,801,3,Harriet the Spy (1996),Children's|Comedy
76,586,3,Home Alone (1990),Children's|Comedy
78,588,3,Aladdin (1992),Animation|Children's|Comedy|Musical
83,575,3,"Little Rascals, The (1994)",Children's|Comedy
86,551,3,"Nightmare Before Christmas, The (1993)",Children's|Comedy|Musical
93,569,3,Little Big League (1994),Children's|Comedy
103,673,3,Space Jam (1996),Adventure|Animation|Children's|Comedy|Fantasy
