### MoviEeze - Data Analysis and Model Creation
* Todd McCullough [Git](https://github.com/tamccullough)

In [1]:
import numpy as np
import pandas as pd
import heapq
from math import floor

##### Import Surprise
[Surprise](http://surpriselib.com/) is a Python scikit building and analyzing recommender systems that deal with explicit rating data.

In [2]:
from surprise import Reader, Dataset
from surprise import KNNWithMeans
from surprise.model_selection import cross_validate

In [19]:
import pickle
filename = 'model/moviEeze_recommender_model_2010_up.sav'
ml_model = pickle.load(open(filename, 'rb'))

##### Import Data

In [20]:
movies = pd.read_csv('datasets/movies_2010.csv')
ratings = pd.read_csv('datasets/ratings_2010.csv')
links = pd.read_csv('datasets/links_2010.csv')
#tags = 

In [21]:
genres = pd.DataFrame(movies.genres,columns=['genres'])
genres['count'] = 1

In [22]:
genres_g = genres.groupby('genres').count().reset_index()
genres_g = genres_g.sort_values('count', ascending=False)
genres_image_list = genres_g.genres.head(28).values

In [23]:
a = []
for item in genres_image_list:
    split = item.split('|')
    #print('the item',split,'\n')
    b = []
    c= []
    for i in split:
        b.append(i)
    a.append(b)

In [24]:
b = []
for i in range(0,len(a)):
    if len(a[i]) > 1:
        l = ''
        for j in range(0,len(a[i])):
            l += a[i][j]
        b.append(l.lower())
    else:
        genre = str(a[i][0])
        b.append(genre.lower())

In [33]:
genre_image_list = pd.DataFrame(b,columns=['genre'])
genre_image_list['image'] = genre_image_list['genre'] + '.jpg'
genre_image_list.to_csv('datasets/genre_image_list.csv',index=False)

### Inference

The main function to run the model and get inferences

In [10]:
def get_r(user_id):
    # Select which system to use. Due to memory constraints, item based is the only viable option
    recommender_system = ml_model
    # N will represent how many items to recommend
    N = 2000

    # The setting to a set and back to list is a failsafe.
    rated_items = list(set(ratings.loc[ratings['userId'] == user_id]['movieId'].tolist()))
    ratings_list = movies['movieId'].values.tolist()
    reduced_ratings = ratings.loc[ratings['movieId'].isin(ratings_list)].copy()

    # Self explanitory name
    all_item_ids = list(set(reduced_ratings['movieId'].tolist()))

    # New_items just represents all the items not rated by the user
    new_items = [x for x in all_item_ids if x not in rated_items]

    # Estimate ratings for all unrated items
    predicted_ratings = {}
    for item_id in new_items:
        predicted_ratings[item_id] = recommender_system.predict(user_id, item_id).est
        pass

    # Get the item_ids for the top ratings
    recommended_ids = heapq.nlargest(N, predicted_ratings, key=predicted_ratings.get)
    recommended_ids = sorted(recommended_ids)

    # predicted_ratings
    recommended_df = movies.loc[movies['movieId'].isin(recommended_ids)].copy()
    #recommended_df.insert(1, 'pred_rating', np.zeros(len(recommended_ids)))
    recommended_df.insert(1, 'pred_rating', 0)

    # recommended_df = movies.copy()
    for idx,item_id in enumerate(recommended_ids):
        recommended_df.iloc[idx, recommended_df.columns.get_loc('pred_rating')] = int(predicted_ratings[item_id])
        pass
    return recommended_df.head(N).sort_values('pred_rating', ascending=False)

def cap_str(item):
    string = item
    return string.capitalize()

def reg_frame(f_list,items):
    s_ = ''
    for i in items:
        j = i.strip()
        j = cap_str(j)
        str_ = f'(?=.*{j})'
        s_ += str_
    s_
    f_list = f_list[f_list['genres'].str.contains(fr'^\b{s_}\b',regex=True)]
    return f_list

def set_up_ml(user_id,genre_list):
    film_list = get_r(user_id)
    items = genre_list.split(',')
    film_list = reg_frame(film_list,items)
    film_list.pop('date')
    return film_list

### Get a Recommendation Based on Genres
The final code that will be impletented in a cleaner fashion through the browser interface.

In [107]:
genre_list = 'comedy, romance'

In [108]:
user_1 = 123711 # user with a medium amount of ratings
user_2 = 15078 # user with the lowest number of ratings
user_3 = 72315 # user with a lot of ratings
recommended_list_1 = set_up_ml(user_1,genre_list) # generate a list of recommendations for each user
recommended_list_2 = set_up_ml(user_2,genre_list)
recommended_list_3 = set_up_ml(user_3,genre_list)

In [109]:
def get_final_recommendation(list_1,list_2,list_3): # combine all recommendations
    film_recommendation = pd.DataFrame()
    film_recommendation = pd.concat([list_1,list_2,list_3]) # concat lists
    film_recommendation = film_recommendation.drop_duplicates() # drop recommended duplicates of films
    film_recommendation = film_recommendation.sort_values('pred_rating',ascending=False) # sort by predicted rating
    film_recommendation.pop('pred_rating') # drop the rating column
    film_recommendation = film_recommendation.reset_index()
    film_recommendation.pop('index') # reset and pop the old index
    a = []
    b = []
    for i in range(0,film_recommendation.shape[0]): # iterate through the dataframe and get the appropriate link for each movie
        link = links[links['movieId'] == film_recommendation.iloc[i]['movieId']]
        link = 'https://www.themoviedb.org/movie/' + str(int(link.iloc[0][2])) # append the link to the array
        a.append(link)
        genres = film_recommendation.iloc[i]['genres']
        genres = genres.split('|')
        genre1 = genres[0]
        genre2 = genres[1]
        genre = genre1.lower() + genre2.lower()
        #if genre.isin()
        b.append(genre)
    film_recommendation['link'] = a # add the array to the dataframe
    film_recommendation['image'] = b
    film_recommendation.pop('movieId')
    if film_recommendation['image'].isin(genre_image_list['genre']).any():
        a = []
        for i in range(0,film_recommendation.shape[0]):
            image = genre_image_list[genre_image_list['genre'] == film_recommendation.iloc[i]['image'] ]
            if image.empty:
                image = film_recommendation.iloc[i]['genres']
                image = image.split('|')
                image = image[0]+ '.jpg'
                image = image.lower()
                a.append(image)
            else:
                image = image.reset_index()
                image.pop('index')
                image = image.iloc[0]['image']
                a.append(image)
    film_recommendation['image'] = a
    return film_recommendation

In [110]:
final_recommendation = get_final_recommendation(recommended_list_1,recommended_list_2,recommended_list_3)
final_recommendation

Unnamed: 0,title,genres,link,image
0,Seeking a Friend for the End of the World (2012),Comedy|Drama|Romance,https://www.themoviedb.org/movie/88005,comedydrama.jpg
1,Ruby Sparks (2012),Comedy|Fantasy|Romance,https://www.themoviedb.org/movie/103332,comedy.jpg
2,Swiss Army Man (2016),Comedy|Drama|Romance,https://www.themoviedb.org/movie/347031,comedydrama.jpg
3,Mr. Right (2016),Action|Comedy|Romance,https://www.themoviedb.org/movie/333385,actioncomedy.jpg
4,The Big Sick (2017),Comedy|Romance,https://www.themoviedb.org/movie/416477,comedyromance.jpg
...,...,...,...,...
78,Larry Crowne (2011),Comedy|Drama|Romance,https://www.themoviedb.org/movie/59861,comedydrama.jpg
79,Bridget Jones's Baby (2016),Comedy|Romance,https://www.themoviedb.org/movie/95610,comedyromance.jpg
80,Mamma Mia: Here We Go Again! (2018),Comedy|Romance,https://www.themoviedb.org/movie/458423,comedyromance.jpg
81,New Year's Eve (2011),Comedy|Romance,https://www.themoviedb.org/movie/62838,comedyromance.jpg
