In [1]:
import pandas as pd
import numpy as np

In [3]:
#load movies and ratings
movies = pd.read_csv('../Data/movies_sml.csv')
ratings = pd.read_hdf('../Data/ratings_hdf.h5')


In [5]:
# load movies with bag of words. Needed for content filtering
movies_bow = pd.read_csv('../Data/movies_bow.csv')


In [None]:
# load predictions
preds = pd.read_hdf('../Data/predsfin_hdf.h5')

In [None]:
preds.head()

In [32]:


def recommend_movies(predictions, userId, movies, original_ratings, num_recommendations):
    
    # Get and sort the user's predictions
    user_row_number = preds.loc[preds['id']== userId].index[0]
    
    sorted_user_predictions = preds.iloc[user_row_number].sort_values(ascending=False) 
    sorted_user_predictions = pd.DataFrame(sorted_user_predictions[1:]).reset_index()
   
    
    # Get the user's data and merge in the movie information.
    user_data = original_ratings[original_ratings.userId == (userId)]
    user_full = (user_data.merge(movies, how = 'left', left_on = 'movieId', right_on = 'movieId').
                     sort_values(['rating'], ascending=False)
                 )

#     print (f"User {0} has already rated {1} movies. format(userID, user_full.shape[0]")
#     print 'Recommending highest {0} predicted ratings movies not already rated.'.format(num_recommendations)
    
    # Recommend the highest predicted rating movies that the user hasn't seen yet.
    recommendations = (movies[~movies['movieId'].isin(user_full['movieId'])].
         merge(sorted_user_predictions, how = 'left',
               left_on = 'movieId',
               right_on = 'movieId').
         rename(columns = {user_row_number: 'Predictions'}).
         sort_values('Predictions', ascending = False).
                       iloc[:num_recommendations]
                      )
    rec_movies = recommendations['Title']
    already_rated = user_full['Title_x']
    return  already_rated, rec_movies

In [33]:
already_rated, predictions = recommend_movies(preds, 37, movies, ratings, 20)

In [34]:
predictions

1605                                      American Beauty
770                       One Flew Over the Cuckoo's Nest
773                                                Psycho
1356                                  Edward Scissorhands
2158                                              Memento
379                                         Jurassic Park
3066    Pirates of the Caribbean: The Curse of the Bla...
2401                                         Donnie Darko
725                                        Reservoir Dogs
2875                                          City of God
1521                                         Notting Hill
1014                                    L.A. Confidential
1122                                   As Good as It Gets
3155                                  Lost in Translation
787                                    Young Frankenstein
458                                  Beauty and the Beast
3247                                        Love Actually
302           

###### test content system first to see if any additional files are needed

In [10]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [11]:
# Initializing and generating the count matrix 

count = CountVectorizer()
count_matrix = count.fit_transform(movies_bow['bag_of_words'])

In [12]:
# Generating the cosine similarity matrix

cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [13]:
#map index to Title. This will be used in the function and is integral to 
# how we access the recommended movies

indices = pd.Series(movies_bow.index, index = movies_bow['Title'])

#Create recommendation function

def get_recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = indices[title]

    # Get the pairwsie similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:26]

    # Get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return movies_bow['Title'].iloc[movie_indices]

In [14]:
get_recommendations('Rumble in the Bronx')

10841                   Kung Fu Yoga
4038                       Who Am I?
3401                     Crime Story
660                       Supercop 2
679      Around the World in 80 Days
680      Around the World in 80 Days
1127                       Mr. Magoo
1393                       Rush Hour
1565                      Black Mask
1970                   Shanghai Noon
2391                     Rush Hour 2
3983                     City Hunter
4852                     Thunderbolt
5184                     Rush Hour 3
9400                    Dragon Blade
10585                      Skiptrace
10678                Railroad Tigers
1209                    Mr. Nice Guy
2993                Shanghai Knights
4294                        Gorgeous
4536                New Police Story
6246               Shinjuku Incident
6658              Little Big Soldier
4850                    Swordsman II
0                          Toy Story
Name: Title, dtype: object

#### Combined Systems

In [15]:
#Content-based Recommendations Sections
indices = pd.Series(movies_bow.index, index = movies_bow['Title'])

indices.head()

Title
Toy Story                      0
Jumanji                        1
Grumpier Old Men               2
Waiting to Exhale              3
Father of the Bride Part II    4
dtype: int64

In [16]:
idx = indices['American History X']

In [17]:
sim_scores = list(enumerate(cosine_sim[idx]))


In [18]:
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

In [19]:
sim_scores = sim_scores[1:51]

In [20]:
movie_indices = [i[0] for i in sim_scores]

In [21]:
content_results = pd.DataFrame(movies_bow['movieId'].iloc[movie_indices])


content_results 

Unnamed: 0,movieId
396,487
935,1461
4250,31045
4252,31083
1929,3536
9581,137900
104,113
228,268
467,589
741,1057


In [22]:
# Collaborative Filtering Sections

# get user predictions

user= 37 # User ID starts at 1, not 0

 # Get and sort the user's predictions
user_row_number = preds.loc[preds['id']== user].index[0]
    
sorted_user_predictions = preds.iloc[user_row_number].sort_values(ascending=False) 
sorted_user_predictions = pd.DataFrame(sorted_user_predictions[1:]).reset_index()



In [42]:
sorted_user_predictions.head()

Unnamed: 0,movieId,4
0,1,4.743518
1,260,4.319458
2,527,4.237125
3,2028,3.716957
4,356,3.411886


In [28]:
# Get the movies the user originally rated
user_data = ratings[ratings.userId == user]

#combine the movies the user originally rated with the movies dataset and only keeping the movies
#the user rated

user_full = (user_data.merge(movies, how = 'left', left_on = 'movieId', right_on = 'movieId').
                     sort_values(['rating'], ascending=False))

In [29]:
print(user_full.columns)

user_full = user_full[['userId', 'movieId', 'Title_x', 'rating', 'genres', 'Actors', 'Director', 'Plot', 'Poster']].\
            rename(columns = {'Title_x': 'Title'})


user_full.head()

Index(['userId', 'num_user_rated', 'movieId', 'Title_x', 'rating', 'Title_y',
       'genres', 'Actors', 'Director', 'Plot', 'Poster'],
      dtype='object')


Unnamed: 0,userId,movieId,Title,rating,genres,Actors,Director,Plot,Poster
39,37,1213,Goodfellas,5.0,Crime|Drama,"Robert De Niro, Ray Liotta, Joe Pesci, Lorrain...",Martin Scorsese,The story of Henry Hill and his life in the mo...,https://m.media-amazon.com/images/M/MV5BY2NkZj...
42,37,2997,Being John Malkovich,5.0,Comedy|Drama|Fantasy,"John Cusack, Cameron Diaz, Ned Bellamy, Eric W...",Spike Jonze,A puppeteer discovers a portal that leads lite...,https://m.media-amazon.com/images/M/MV5BYmUxY2...
105,37,27790,Millions,4.5,Children|Comedy|Crime|Drama|Fantasy,"Alex Etel, Lewis McGibbon, James Nesbitt, Dais...",Danny Boyle,"Ethics, being human and the soul come to the f...",https://m.media-amazon.com/images/M/MV5BNTI3Y2...
36,37,778,Trainspotting,4.5,Comedy|Crime|Drama,"Ewan McGregor, Ewen Bremner, Jonny Lee Miller,...",Danny Boyle,"Renton, deeply immersed in the Edinburgh drug ...",https://m.media-amazon.com/images/M/MV5BMzA5Zj...
21,37,150,Apollo 13,4.0,Adventure|Drama|IMAX,"Tom Hanks, Bill Paxton, Kevin Bacon, Gary Sinise",Ron Howard,NASA must devise a strategy to return Apollo 1...,https://m.media-amazon.com/images/M/MV5BNjEzYj...


In [30]:

num_recommendations = 1000

In [39]:
# Original Code from Collaborative Filter Function

# Recommend the highest predicted rating movies that the user hasn't seen yet.
recommendations = (movies[~movies['movieId'].isin(user_full['movieId'])].
         merge(sorted_user_predictions, how = 'left',
               left_on = 'movieId',
               right_on = 'movieId').
         rename(columns = {user_row_number: 'Predictions'}).
         sort_values('Predictions', ascending = False).
                       iloc[:num_recommendations]
                      )


In [40]:
# look up the similarity scores of the movies listed in content results and return the top 5.

movie_recs = pd.merge(content_results, recommendations, how='left', on='movieId').\
    sort_values('Predictions', ascending=False).dropna().head()

In [41]:
movie_recs

Unnamed: 0,movieId,Title,genres,Actors,Director,Plot,Poster,Predictions
36,2,Jumanji,Adventure|Children|Fantasy,"Robin Williams, Jonathan Hyde, Kirsten Dunst, ...",Joe Johnston,When two kids find and play a magical board ga...,https://m.media-amazon.com/images/M/MV5BZTk2Zm...,0.725185
8,589,Terminator 2: Judgment Day,Action|Sci-Fi,"Arnold Schwarzenegger, Linda Hamilton, Edward ...",James Cameron,"A cyborg, identical to the one who failed to k...",https://m.media-amazon.com/images/M/MV5BMGU2Nz...,0.553445
40,6,Heat,Action|Crime|Thriller,"Al Pacino, Robert De Niro, Val Kilmer, Jon Voight",Michael Mann,A group of professional bank robbers start to ...,https://m.media-amazon.com/images/M/MV5BNDc0YT...,0.340626
26,94959,Moonrise Kingdom,Comedy|Drama|Romance,"Bruce Willis, Edward Norton, Bill Murray, Fran...",Wes Anderson,A pair of young lovers flee their New England ...,https://m.media-amazon.com/images/M/MV5BMTEwMT...,0.279297
42,7,Sabrina,Comedy|Romance,"Humphrey Bogart, Audrey Hepburn, William Holde...",Billy Wilder,A playboy becomes interested in the daughter o...,https://m.media-amazon.com/images/M/MV5BYmFlNT...,0.199099
