In [9]:
import pandas as pd
from surprise.model_selection import cross_validate
from surprise.prediction_algorithms import SVD
from surprise.prediction_algorithms import KNNWithMeans, KNNBasic, KNNBaseline
from surprise.model_selection import GridSearchCV
import numpy as np
import time

In [10]:
df = pd.read_csv('anime_data.csv')
df = df.drop('Unnamed: 0', axis = 1)
df.shape

(478058, 6)

In [11]:
pd.options.display.max_colwidth = 1000

In [12]:
df.head()

Unnamed: 0,title,genres,ratings,type,User_ID,Title_ID
0,Fullmetal Alchemist: Brotherhood,"['Action', 'Adventure', 'Drama', 'Fantasy']",8,TV,0,1244
1,Fullmetal Alchemist: Brotherhood,"['Action', 'Adventure', 'Drama', 'Fantasy']",10,TV,3,1244
2,Fullmetal Alchemist: Brotherhood,"['Action', 'Adventure', 'Drama', 'Fantasy']",10,TV,5,1244
3,Fullmetal Alchemist: Brotherhood,"['Action', 'Adventure', 'Drama', 'Fantasy']",10,TV,6,1244
4,Fullmetal Alchemist: Brotherhood,"['Action', 'Adventure', 'Drama', 'Fantasy']",6,TV,14,1244


### Prep The DataFrame For the Model W/ User, Item, And Rating For Surprise

In [13]:
rec_df = df.drop(columns = df[['title', 'genres', 'type']])
rec_df = rec_df[['User_ID', 'Title_ID', 'ratings']]

In [15]:
rec_df.head()

Unnamed: 0,User_ID,Title_ID,ratings
0,0,1244,8
1,3,1244,10
2,5,1244,10
3,6,1244,10
4,14,1244,6


In [75]:
rec_match = df[['title', 'Title_ID', 'genres']].drop_duplicates('Title_ID')

In [76]:
rec_match.head()

Unnamed: 0,title,Title_ID,genres
0,Fullmetal Alchemist: Brotherhood,1244,"['Action', 'Adventure', 'Drama', 'Fantasy']"
130,your name.,4661,"['Drama', 'Romance', 'Body Swapping', 'Gender Bender']"
235,Attack on Titan 3rd Season: Part II,279,"['Action', 'Fantasy', 'Horror', 'Shounen']"
373,A Silent Voice,49,"['Drama', 'Shounen', 'Melancholy', 'School Life']"
480,Haikyuu!! Karasuno High School vs Shiratorizawa Academy,1531,"['Shounen', 'Sports', 'Animeism', 'School Club']"


### Fitting the Models with our Data

In [18]:
from surprise import Reader, Dataset
from surprise.prediction_algorithms import knns
from surprise.similarities import cosine, msd, pearson
from surprise import accuracy

reader = Reader(rating_scale = (1,10))
data = Dataset.load_from_df(rec_df, reader)

In [19]:
from surprise.model_selection import train_test_split
from surprise import accuracy
#split data for accurate prediction
trainset, testset = train_test_split(data, test_size=0.25, random_state = 100)

### SVD Recommender Model

In [20]:
svd = SVD()
svd.fit(trainset)
preds = svd.test(testset)
# Rating the Model
accuracy.rmse(preds)
# RMSE 1.4013

RMSE: 1.4013


1.4012932686927881

## Model Tuning

### Grid Search

In [21]:
## Perform Grid search with SVD
params = {'n_factors': [30,31,32,33,34,36,37,38,39],
         'reg_all' : [0.02,0.05,0.1],
         'n_epochs': (20,40,60)}
gs_svd_e = GridSearchCV(SVD,param_grid= params, n_jobs=-1)
gs_svd_e.fit(data)

In [22]:
print(gs_svd_e.best_score)
print(gs_svd_e.best_params)
# {'rmse': 1.3695922019976408, 'mae': 1.0154223123105877}
# {'rmse': {'n_factors': 39, 'reg_all': 0.1, 'n_epochs': 40}, 'mae': {'n_factors': 39, 'reg_all': 0.1, 'n_epochs': 60}}

{'rmse': 1.3695922019976408, 'mae': 1.0154223123105877}
{'rmse': {'n_factors': 39, 'reg_all': 0.1, 'n_epochs': 40}, 'mae': {'n_factors': 39, 'reg_all': 0.1, 'n_epochs': 60}}


In [23]:
## Perform Grid search with SVD
params = {'n_factors': [30,31,32,33,34,36,37,38,39],
         'reg_all' : [0.02,0.05,0.1]}
gs_svd = GridSearchCV(SVD,param_grid= params, n_jobs=-1)
gs_svd.fit(data)

In [24]:
print(gs_svd.best_score)
print(gs_svd.best_params)
# {'rmse': 1.3882633369322612, 'mae': 1.0364054473968252}
# {'rmse': {'n_factors': 39, 'reg_all': 0.05}, 'mae': {'n_factors': 39, 'reg_all': 0.05}}

{'rmse': 1.3882633369322612, 'mae': 1.0364054473968252}
{'rmse': {'n_factors': 39, 'reg_all': 0.05}, 'mae': {'n_factors': 39, 'reg_all': 0.05}}


### KNN_BASIC Recommender Model

In [25]:
sim_pearson = {'name':'pearson','user_based':False}
basic_pearson = knns.KNNBasic(sim_options=sim_pearson)
basic_pearson.fit(trainset)
predictions = basic_pearson.test(testset)
print(accuracy.rmse(predictions))
# RMSE: 1.4883
# 1.4882986594504284

Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 1.4883
1.4882986594504284


In [26]:
# cross validating with KNNBasic
knn_basic = KNNBasic(sim_options={'name':'pearson','user_based':True, 'k':(40, 60, 70, 80, 90, 100, 120), 'min_k':(1,2,3,4)})
cv_knn_basic= cross_validate(knn_basic,data,n_jobs=-1)

### KNN_MEAN Recommender Model

In [27]:
sim_pearson = {'name':'pearson','user_based':False}
knn_means = knns.KNNWithMeans(sim_options=sim_pearson)
knn_means.fit(trainset)
predictions = knn_means.test(testset)
print(accuracy.rmse(predictions))
# RMSE: 1.4258
# 1.4258176210692983

Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 1.4258
1.4258176210692983


In [31]:
for i in cv_knn_baseline.items():
    print(i)

np.mean(cv_knn_baseline['test_rmse'])
# 1.4301173926328858

('test_rmse', array([1.42967765, 1.42393694, 1.43217969, 1.43295429, 1.43183839]))
('test_mae', array([1.06792647, 1.06638962, 1.06822984, 1.07216357, 1.07001707]))
('fit_time', (72.90968418121338, 64.91526317596436, 58.292004108428955, 66.4349091053009, 63.06723690032959))
('test_time', (17.386303901672363, 18.513285875320435, 19.74295973777771, 17.58661413192749, 17.80790114402771))


1.4301173926328858

### KNN_BASELINE Recommender Model

In [29]:
sim_pearson = {'name':'pearson','user_based':False}
knn_baseline = knns.KNNBaseline(sim_options=sim_pearson)
knn_baseline.fit(trainset)
predictions = knn_baseline.test(testset)
print(accuracy.rmse(predictions))
# RMSE: 1.4141
# 1.4141288433441572

Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 1.4141
1.4141288433441572


In [30]:
# cross validating with KNNBaseline
knn_baseline = KNNBaseline(sim_options={'name':'pearson','user_based':True, 'k':(40, 60, 80, 100, 120), 
                                        'min_k':(1,2,3,4)})
cv_knn_baseline = cross_validate(knn_baseline,data)

Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.


### SVD W/ Tuned Hyperparameter

In [32]:
svd = SVD(n_factors=39,n_epochs=60,lr_all=0.005,reg_all=0.1)
svd.fit(trainset)
predictions = svd.test(testset)
print(accuracy.rmse(predictions))

RMSE: 1.3837
1.3837085368570397


## Function Model Predicting 

In [85]:
def recommend_anime(uid, iid, actual):
    pred = svd.predict(uid, iid, actual, verbose =True)
    match = rec_match.loc[rec_match['Title_ID'] == iid]
    return match

In [96]:
recommend_anime(45, 3, 1)

user: 45         item: 3          r_ui = 1.00   est = 6.41   {'was_impossible': False}


Unnamed: 0,title,Title_ID,genres
264309,.hack//G.U. Trilogy,3,"['MMORPG', 'RPG', 'Virtual Reality', 'CG Animation']"


In [83]:
def get_recommendations(user_ratings, num_recs, genre):
    new_ratings_df = rec_match.append(user_ratings,ignore_index=True)
    new_ratings_df = new_ratings_df[new_ratings_df.genres.str.contains(genre, na=False)]
    new_ratings_df = new_ratings_df.drop(columns=['title', 'genres'])

    #load in new df
    new_data = Dataset.load_from_df(new_ratings_df,reader)
    #create new svd object
    svd_new = SVD(n_factors=39,n_epochs=60,lr_all=0.005,reg_all=0.1)
    #re fit the model
    svd_new.fit(new_data.build_full_trainset())

    # make predictions for the user
    list_of_animes = []
    for a_id in new_ratings_df['Title_ID'].unique():
        list_of_animes.append((a_id, svd_new.predict(new_ratings_df['User_ID'].max(),a_id)[3]))

    # order the predictions from highest to lowest rated
    ranked_animes = sorted(list_of_animes, key=lambda x:x[1], reverse=True)

    rec_num = 1
    for i in ranked_animes[:num_recs]:
        recommended = rec_match[rec_match['Title_ID'] == i[0]]
        print('Recommendation number:', rec_num)
        print('Anime: ' + recommended.values[0][0])
        print('Genres: ' + (recommended.values[0][2]))
#         print('Type' + recommended.values[0][-1])
        print('\n')
        rec_num +=1
    print("Thank You For Using John And Paul's Anim-endation")

In [82]:
def anime_rater(df, num):
    userID = rec_df.User_ID.max()+1
    num_recs = input('How many recommendations would you like? Please enter a number from 1 to 10:\n')
    
    while int(num_recs) > 10 or int(num_recs) < 1:
        num_recs = input('You entered a number outside of 1 to 10. Please enter a number from 1 to 10 to continue. \n')
    genre = input('Please enter your favorite genre. ')
    rating_list = []
    
    while num > 0:
        anime = df[df['genres'].str.contains(genre)].sample(1)
        print('\nPlease rate the following {} Animes. \n'.format(num))
        print('Anime: ' + str(anime.values[0][0]))
        print('Genre: ' + str(anime.values[0][1]))
#         print('Type: ' + str(anime.values[0][3]))
        rating = input('How do you rate this Anime on a scale of 1-10, press n if you are never watched this anime. :\n')
            
        if rating == 'n':
            continue
        
        if int(rating) > 10:
            print('Rating must be below 10!')
            continue
            
        if int(rating) < 1:
            print('Rating must be above 0!')
            continue
        
        else:
            rating_one_anime = {'User_ID':userID,'Title_ID': anime['Title_ID'].values[0],'rating': rating}
            rating_list.append(rating_one_anime) 
            num -= 1
        time.sleep(.5)
    print('\n'+'-----Making Recommendations-----'+'\n')
    time.sleep(1)
    get_recommendations(rating_list, int(num_recs), genre)
    

In [84]:
user_ratings = anime_rater(df,5)

How many recommendations would you like? Please enter a number from 1 to 10:
10
Please enter your favorite genre. Action

Please rate the following 5 Animes. 

Anime: Godzilla Movie 2: City on the Edge of Battle
Genre: ['Action', 'Mecha', 'Sci Fi', 'Military']
How do you rate this Anime on a scale of 1-10, press n if you are never watched this anime. :
n

Please rate the following 5 Animes. 

Anime: AKB0048: Next Stage
Genre: ['Action', 'Drama', 'Mecha', 'Sci Fi']
How do you rate this Anime on a scale of 1-10, press n if you are never watched this anime. :
n

Please rate the following 5 Animes. 

Anime: Bubuki Buranki: The Gentle Giants of the Galaxy
Genre: ['Action', 'Drama', 'Mecha', 'Sci Fi']
How do you rate this Anime on a scale of 1-10, press n if you are never watched this anime. :
n

Please rate the following 5 Animes. 

Anime: Pokemon XY & Z Specials
Genre: ['Action', 'Adventure', 'Fantasy', 'Shounen']
How do you rate this Anime on a scale of 1-10, press n if you are never watc