In [1]:
import pandas as pd
from surprise.model_selection import cross_validate
from surprise.prediction_algorithms import SVD
from surprise.prediction_algorithms import KNNWithMeans, KNNBasic, KNNBaseline
from surprise.model_selection import GridSearchCV
import numpy as np
import time

In [228]:
df = pd.read_csv('anime_data.csv')
df = df.drop('Unnamed: 0', axis = 1)
df.shape

(18986, 5)

In [229]:
pd.options.display.max_colwidth = 1000

In [230]:
df.head()

Unnamed: 0,title,genres,ratings,User_ID,Title_ID
0,Fullmetal Alchemist: Brotherhood,"['Action', 'Adventure', 'Drama', 'Fantasy']",8,0,45
1,Fullmetal Alchemist: Brotherhood,"['Action', 'Adventure', 'Drama', 'Fantasy']",0,1,45
2,Fullmetal Alchemist: Brotherhood,"['Action', 'Adventure', 'Drama', 'Fantasy']",10,2,45
3,Fullmetal Alchemist: Brotherhood,"['Action', 'Adventure', 'Drama', 'Fantasy']",10,3,45
4,Fullmetal Alchemist: Brotherhood,"['Action', 'Adventure', 'Drama', 'Fantasy']",10,4,45


### Prep The DataFrame For the Model W/ User, Item, And Rating For Surprise

In [231]:
rec_df = df.drop(columns = df[['title', 'genres']])
rec_df = rec_df[['User_ID', 'Title_ID', 'ratings']]

In [254]:
rec_df.head()

Unnamed: 0,User_ID,Title_ID,ratings
0,0,45,8
1,1,45,0
2,2,45,10
3,3,45,10
4,4,45,10


In [233]:
rec_match = df[['title', 'Title_ID', 'genres']].drop_duplicates('Title_ID')

In [234]:
rec_match.head()

Unnamed: 0,title,Title_ID,genres
0,Fullmetal Alchemist: Brotherhood,45,"['Action', 'Adventure', 'Drama', 'Fantasy']"
154,your name.,170,"['Drama', 'Romance', 'Body Swapping', 'Gender Bender']"
273,Attack on Titan 3rd Season: Part II,6,"['Action', 'Fantasy', 'Horror', 'Shounen']"
419,A Silent Voice,0,"['Drama', 'Shounen', 'Melancholy', 'School Life']"
541,Haikyuu!! Karasuno High School vs Shiratorizawa Academy,59,"['Shounen', 'Sports', 'Animeism', 'School Club']"


### Fitting the Models with our Data

In [236]:
from surprise import Reader, Dataset
from surprise.prediction_algorithms import knns
from surprise.similarities import cosine, msd, pearson
from surprise import accuracy

reader = Reader(rating_scale = (1,10))
data = Dataset.load_from_df(rec_df, reader)

In [237]:
from surprise.model_selection import train_test_split
from surprise import accuracy
#split data for accurate prediction
trainset, testset = train_test_split(data, test_size=0.25, random_state = 100)

### SVD Recommender Model

In [238]:
svd = SVD()
svd.fit(trainset)
preds = svd.test(testset)
# Rating the Model
accuracy.rmse(preds)

RMSE: 2.3912


2.391215612394428

### SVD W/ Tuned Hyperparameter

In [239]:
svd = SVD(n_factors=100,n_epochs=10,lr_all=0.005,reg_all=0.4)
svd.fit(trainset)
predictions = svd.test(testset)
print(accuracy.rmse(predictions))

RMSE: 2.7018
2.7018277358975653


### KNN_BASIC Recommender Model

In [240]:
sim_pearson = {'name':'pearson','user_based':False}
basic_pearson = knns.KNNBasic(sim_options=sim_pearson)
basic_pearson.fit(trainset)
predictions = basic_pearson.test(testset)
print(accuracy.rmse(predictions))

Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.3107
2.3107489192622923


### KNN_MEAN Recommender Model

In [241]:
sim_pearson = {'name':'pearson','user_based':False}
knn_means = knns.KNNWithMeans(sim_options=sim_pearson)
knn_means.fit(trainset)
predictions = knn_means.test(testset)
print(accuracy.rmse(predictions))

Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.2914
2.2913520687309057


### KNN_BASELINE Recommender Model

In [242]:
sim_pearson = {'name':'pearson','user_based':False}
knn_baseline = knns.KNNBaseline(sim_options=sim_pearson)
knn_baseline.fit(trainset)
predictions = knn_baseline.test(testset)
print(accuracy.rmse(predictions))

Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
RMSE: 2.2777
2.2777293145036723


## Function Model Predicting 

In [243]:
def recommend_anime(uid, iid, actual):
    pred = svd.predict(uid, iid, actual, verbose =True)
    match = rec_match.loc[rec_match['Title_ID'] == iid]
    return match

In [244]:
recommend_anime(45, 1, 22)

user: 45         item: 1          r_ui = 22.00   est = 4.87   {'was_impossible': False}


Unnamed: 0,title,Title_ID,genres
18103,Air Gear: Kuro no Hane to Nemuri no Mori -Break on the Sky-,1,"['Ecchi', 'Shounen', 'Based on a Manga', 'Ecchi', 'Shounen']"


In [250]:
def get_recommendations(user_ratings, num_recs, genre):
    new_ratings_df = rec_match.append(user_ratings,ignore_index=True)
    new_ratings_df = new_ratings_df[new_ratings_df.genres.str.contains(genre, na=False)]
    new_ratings_df = new_ratings_df.drop(columns=['title', 'genres'])

    #load in new df
    new_data = Dataset.load_from_df(new_ratings_df,reader)
    #create new svd object
    svd_new = SVD()
    #re fit the model
    svd_new.fit(new_data.build_full_trainset())

    # make predictions for the user
    list_of_animes = []
    for a_id in new_ratings_df['Title_ID'].unique():
        list_of_animes.append((a_id, svd_new.predict(new_ratings_df['User_ID'].max(),a_id)[3]))

    # order the predictions from highest to lowest rated
    ranked_animes = sorted(list_of_animes, key=lambda x:x[1], reverse=True)

    rec_num = 1
    for i in ranked_animes[:num_recs]:
        recommended = rec_match[rec_match['Title_ID'] == i[0]]
        print('Recommendation number:', rec_num)
        print('Anime: ' + recommended.values[0][0])
        print('Genres: ' + (recommended.values[0][2]))
        print('\n')
        rec_num +=1
    print("Thank You For Using John And Paul's Anime Recommender")

In [251]:
def anime_rater(df, num):
    userID = rec_df.User_ID.max()+1
    num_recs = input('How many recommendations would you like? Please enter a number from 1 to 10:\n')
    
    while int(num_recs) > 10:
        num_recs = input('You entered a number over 5. Please enter a number from 1 to 5 to continue. \n')
    genre = input('Please enter your favorite genre. ')
    rating_list = []
    
    while num > 0:
        anime = df[df['genres'].str.contains(genre)].sample(1)
        print('\nPlease rate the following {} Animes. \n'.format(num))
        print('Anime: ' + str(anime.values[0][0]))
        print('Genre: ' + str(anime.values[0][1]))
        rating = input('How do you rate this Anime on a scale of 1-5, press n if you are never watched this anime. :\n')
            
        if rating == 'n':
            continue
        
        if int(rating) > 5:
            print('Rating must be below 5!')
            continue
            
        if int(rating) < 1:
            print('Rating must be above 0!')
            continue
        
        else:
            rating_one_anime = {'User_ID':userID,'Title_ID': anime['Title_ID'].values[0],'rating': rating}
            rating_list.append(rating_one_anime) 
            num -= 1
        time.sleep(.5)
    print('\n'+'-----Making Recommendations-----'+'\n')
    time.sleep(1)
    get_recommendations(rating_list, int(num_recs), genre)
    
    

In [253]:
user_ratings = anime_rater(df,5)

How many recommendations would you like? Please enter a number from 1 to 10:
3
Please enter your favorite genre. Action

Please rate the following 5 Animes. 

Anime: Parasyte -the maxim-
Genre: ['Action', 'Drama', 'Horror', 'Sci Fi']
How do you rate this Anime on a scale of 1-5, press n if you are never watched this anime. :
3

Please rate the following 4 Animes. 

Anime: City Hunter 3
Genre: ['Action', 'Shounen', 'Based on a Manga', 'Action', 'Shounen']
How do you rate this Anime on a scale of 1-5, press n if you are never watched this anime. :
4

Please rate the following 3 Animes. 

Anime: No Game No Life: Zero
Genre: ['Action', 'Adventure', 'Drama', 'Fantasy']
How do you rate this Anime on a scale of 1-5, press n if you are never watched this anime. :
2

Please rate the following 2 Animes. 

Anime: Gintama: Porori-hen
Genre: ['Action', 'Comedy', 'Drama', 'Sci Fi']
How do you rate this Anime on a scale of 1-5, press n if you are never watched this anime. :
3

Please rate the followi