In [22]:
import pandas as pd
from surprise.model_selection import cross_validate
from surprise.prediction_algorithms import SVD
from surprise.prediction_algorithms import KNNWithMeans, KNNBasic, KNNBaseline
from surprise.model_selection import GridSearchCV
import numpy as np
import time
import pickle

In [2]:
df = pd.read_csv('csv_files/anime_data.csv')
df = df.drop('Unnamed: 0', axis = 1)
df.shape

(478058, 6)

In [3]:
pd.options.display.max_colwidth = 1000

In [4]:
df.head()

Unnamed: 0,title,genres,ratings,type,User_ID,Title_ID
0,Fullmetal Alchemist: Brotherhood,"['Action', 'Adventure', 'Drama', 'Fantasy']",8,TV,0,1244
1,Fullmetal Alchemist: Brotherhood,"['Action', 'Adventure', 'Drama', 'Fantasy']",10,TV,3,1244
2,Fullmetal Alchemist: Brotherhood,"['Action', 'Adventure', 'Drama', 'Fantasy']",10,TV,5,1244
3,Fullmetal Alchemist: Brotherhood,"['Action', 'Adventure', 'Drama', 'Fantasy']",10,TV,6,1244
4,Fullmetal Alchemist: Brotherhood,"['Action', 'Adventure', 'Drama', 'Fantasy']",6,TV,14,1244


### Prep The DataFrame For the Model W/ User, Item, And Rating For Surprise

In [5]:
rec_df = df.drop(columns = df[['title', 'genres', 'type']])
rec_df = rec_df[['User_ID', 'Title_ID', 'ratings']]

In [6]:
rec_df.head()

Unnamed: 0,User_ID,Title_ID,ratings
0,0,1244,8
1,3,1244,10
2,5,1244,10
3,6,1244,10
4,14,1244,6


In [7]:
rec_match = df[['title', 'Title_ID', 'genres']].drop_duplicates('Title_ID')

In [8]:
rec_match.head()

Unnamed: 0,title,Title_ID,genres
0,Fullmetal Alchemist: Brotherhood,1244,"['Action', 'Adventure', 'Drama', 'Fantasy']"
130,your name.,4661,"['Drama', 'Romance', 'Body Swapping', 'Gender Bender']"
235,Attack on Titan 3rd Season: Part II,279,"['Action', 'Fantasy', 'Horror', 'Shounen']"
373,A Silent Voice,49,"['Drama', 'Shounen', 'Melancholy', 'School Life']"
480,Haikyuu!! Karasuno High School vs Shiratorizawa Academy,1531,"['Shounen', 'Sports', 'Animeism', 'School Club']"


### Fitting the Models with our Data

In [11]:
from surprise import Reader, Dataset
from surprise.prediction_algorithms import knns
from surprise.similarities import cosine, msd, pearson
from surprise import accuracy

reader = Reader(rating_scale = (1,10))
data = Dataset.load_from_df(rec_df, reader)

In [12]:
from surprise.model_selection import train_test_split
from surprise import accuracy
#split data for accurate prediction
trainset, testset = train_test_split(data, test_size=0.25, random_state = 100)

### SVD Recommender Model

In [13]:
svd = SVD()
svd.fit(trainset)
preds = svd.test(testset)
# Rating the Model
accuracy.rmse(preds)
# RMSE: 1.4051
# 1.4050804519853317

RMSE: 1.4060


1.4059564424377973

### SVD W/ Tuned Hyperparameter

In [21]:
svd = SVD(n_factors=100,n_epochs=10,lr_all=0.005,reg_all=0.4)
svd.fit(trainset)
predictions = svd.test(testset)
print(accuracy.rmse(predictions))
# RMSE: 1.4772
# 1.4772472149202394

RMSE: 1.4772
1.4771977025281353


NameError: name 'pickle' is not defined

In [23]:
filename_svd = 'svd.pickle'
pickle.dump(svd, open(filename_svd, 'wb'))

### KNN_BASIC Recommender Model

In [35]:
sim_pearson = {'name':'pearson','user_based':False}
basic_pearson = knns.KNNBasic(sim_options=sim_pearson)
basic_pearson.fit(trainset)
predictions = basic_pearson.test(testset)
print(accuracy.rmse(predictions))
# RMSE: 1.4883
# 1.4882986594504284

Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 1.4883
1.4882986594504284


### KNN_MEAN Recommender Model

In [36]:
sim_pearson = {'name':'pearson','user_based':False}
knn_means = knns.KNNWithMeans(sim_options=sim_pearson)
knn_means.fit(trainset)
predictions = knn_means.test(testset)
print(accuracy.rmse(predictions))
#RMSE: 1.4258
# 1.4258176210692983

Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 1.4258
1.4258176210692983


### KNN_BASELINE Recommender Model

In [37]:
sim_pearson = {'name':'pearson','user_based':False}
knn_baseline = knns.KNNBaseline(sim_options=sim_pearson)
knn_baseline.fit(trainset)
predictions = knn_baseline.test(testset)
print(accuracy.rmse(predictions))
# RMSE: 1.4141
# 1.4141288433441572

Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 1.4141
1.4141288433441572


## Function Model Predicting 

In [14]:
def recommend_anime(uid, iid, actual):
    pred = svd.predict(uid, iid, actual, verbose =True)
    match = rec_match.loc[rec_match['Title_ID'] == iid]
    return match

In [15]:
recommend_anime(45, 1, 22)

user: 45         item: 1          r_ui = 22.00   est = 6.03   {'was_impossible': False}


Unnamed: 0,title,Title_ID,genres
270213,.hack // The Movie,1,"['Action', 'Sci Fi', 'MMORPG', 'CG Animation']"


In [16]:
def get_recommendations(user_ratings, num_recs, genre):
    new_ratings_df = rec_match.append(user_ratings,ignore_index=True)
    new_ratings_df = new_ratings_df[new_ratings_df.genres.str.contains(genre, na=False)]
    new_ratings_df = new_ratings_df.drop(columns=['title', 'genres'])

    #load in new df
    new_data = Dataset.load_from_df(new_ratings_df,reader)
    #create new svd object
    svd_new = SVD()
    #re fit the model
    svd_new.fit(new_data.build_full_trainset())

    # make predictions for the user
    list_of_animes = []
    for a_id in new_ratings_df['Title_ID'].unique():
        list_of_animes.append((a_id, svd_new.predict(new_ratings_df['User_ID'].max(),a_id)[3]))

    # order the predictions from highest to lowest rated
    ranked_animes = sorted(list_of_animes, key=lambda x:x[1], reverse=True)

    rec_num = 1
    for i in ranked_animes[:num_recs]:
        recommended = rec_match[rec_match['Title_ID'] == i[0]]
        print('Recommendation number:', rec_num)
        print('Anime: ' + recommended.values[0][0])
        print('Genres: ' + (recommended.values[0][2]))
#         print('Type' + recommended.values[0][-1])
        print('\n')
        rec_num +=1
    print("Thank You For Using John And Paul's Anim-endation")

In [19]:
def anime_rater(df, num):
    userID = rec_df.User_ID.max()+1
    num_recs = input('How many recommendations would you like? Please enter a number from 1 to 10:\n')
    
    while int(num_recs) > 10 or int(num_recs) < 1:
        num_recs = input('You entered a number outside of 1 to 10. Please enter a number from 1 to 10 to continue. \n')
    genre = input('Please enter your favorite genre. ').title()
    rating_list = []
    
    while num > 0:
        anime = df[df['genres'].str.contains(genre)].sample(1)
        print('\nPlease rate the following {} Animes. \n'.format(num))
        print('Anime: ' + str(anime.values[0][0]))
        print('Genre: ' + str(anime.values[0][1]))
        print('Type: ' + str(anime.values[0][3]))
        
        rating = input('How do you rate this Anime on a scale of 1-10, press n if you are never watched this anime. :\n')
            
        if rating == 'n':
            continue
        
        if int(rating) > 10:
            print('Rating must be below 10!')
            continue
            
        if int(rating) < 1:
            print('Rating must be above 0!')
            continue
        
        else:
            rating_one_anime = {'User_ID':userID,'Title_ID': anime['Title_ID'].values[0],'rating': rating}
            rating_list.append(rating_one_anime) 
            num -= 1
        time.sleep(.5)
    print('\n'+'-----Making Recommendations-----'+'\n')
    time.sleep(1)
    get_recommendations(rating_list, int(num_recs), genre)
    

In [24]:
user_ratings = anime_rater(df,5)

How many recommendations would you like? Please enter a number from 1 to 10:
1
Please enter your favorite genre. action

Please rate the following 5 Animes. 

Anime: Tatakae!! Ramenman Movie
Genre: ['Action', 'Shounen', 'Hand to Hand Combat', 'Martial Arts']
Type: Movie
How do you rate this Anime on a scale of 1-10, press n if you are never watched this anime. :
n

Please rate the following 5 Animes. 

Anime: Mobile Suit Victory Gundam
Genre: ['Action', 'Drama', 'Mecha', 'Sci Fi']
Type: TV
How do you rate this Anime on a scale of 1-10, press n if you are never watched this anime. :
n

Please rate the following 5 Animes. 

Anime: Ginga Densetsu Weed
Genre: ['Action', 'Adventure', 'Shounen', 'Animal Protagonists']
Type: TV
How do you rate this Anime on a scale of 1-10, press n if you are never watched this anime. :
n

Please rate the following 5 Animes. 

Anime: Beyond the Boundary Movie: I'll Be Here - Future
Genre: ['Action', 'Drama', 'Fantasy', 'Amnesia']
Type: Movie
How do you rate t

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Recommendation number: 1
Anime: Fullmetal Alchemist: Brotherhood
Genres: ['Action', 'Adventure', 'Drama', 'Fantasy']


Thank You For Using John And Paul's Anim-endation
