In [5]:
import pandas as pd
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

In [6]:
animes_df = pd.read_csv('animes.csv')
reviews_df = pd.read_csv('rating.csv')

animes_df = animes_df[['uid', 'title']]
animes_df.rename(columns={'uid':'anime_id'}, inplace=True)
animes_df = animes_df.drop_duplicates()

In [7]:
print(f"Anime Dataframe Length {animes_df.shape}")
animes_df.sample(5)

Anime Dataframe Length (16216, 2)


Unnamed: 0,anime_id,title
7984,25291,Yomiuri Shimbun
9959,33195,Shi Wan Ge Leng Xiaohua Movie 1
8313,25969,Clock
11384,1198,Tsuyokiss
10029,28087,Ehon Yose


In [8]:
print(f"Reviews Dataframe Length {reviews_df.shape}")
reviews_df.sample(5)

Reviews Dataframe Length (7813737, 3)


Unnamed: 0,user_id,anime_id,rating
1299518,12314,14227,9
7185791,67011,5040,8
2869866,26804,7593,8
84313,871,28907,5
1874483,18180,1361,4


In [9]:
reviews_df_new = reviews_df[reviews_df['rating'] != -1]
reviews_df_new = reviews_df_new.sample(frac=1)[:1000000].sort_values('user_id')
print(f"Reviews New Dataframe Length {reviews_df_new.shape}")

Reviews New Dataframe Length (1000000, 3)


In [10]:
review_data = Dataset.load_from_df(reviews_df_new, reader=Reader(rating_scale=(1, 10)))
trainset = review_data.build_full_trainset()

In [11]:
model = SVD()
model.fit(trainset)
cross_validate(model, review_data, measures=["RMSE", "MAE"], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.2797  1.2820  1.2780  1.2823  1.2825  1.2809  0.0017  
MAE (testset)     0.9775  0.9771  0.9777  0.9783  0.9789  0.9779  0.0006  
Fit time          8.47    8.72    8.65    8.58    8.55    8.59    0.09    
Test time         1.55    1.54    1.50    1.79    1.80    1.64    0.13    


{'test_rmse': array([1.27968551, 1.28200548, 1.27804806, 1.28227001, 1.28246053]),
 'test_mae': array([0.9774937 , 0.9770601 , 0.97768915, 0.97828545, 0.97887608]),
 'fit_time': (8.468531131744385,
  8.719434261322021,
  8.64927864074707,
  8.576908826828003,
  8.548457622528076),
 'test_time': (1.55448317527771,
  1.544205904006958,
  1.5040476322174072,
  1.7871649265289307,
  1.802079439163208)}

In [12]:
model.predict(4797, 3484, 4)

Prediction(uid=4797, iid=3484, r_ui=4, est=7.978566620636486, details={'was_impossible': False})

In [13]:
reviews_df_new[reviews_df_new['user_id'] == 4797]

Unnamed: 0,user_id,anime_id,rating
465935,4797,807,8
465980,4797,3483,8
465933,4797,645,8
465918,4797,225,7
465962,4797,1210,9
465959,4797,1011,9
465927,4797,372,8
465963,4797,1453,9
465920,4797,238,8
465982,4797,4214,6


In [16]:
def anime_predict(userid):
    anime_total = reviews_df_new.anime_id.unique()
    watched = reviews_df_new[reviews_df_new.user_id==userid].anime_id
    unwatched = [anime for anime in anime_total if anime not in watched]

    score = [model.predict(userid, anime_id) for anime_id in unwatched]
    anime_id = []
    prediction_score = []
    for i in range(0, len(score)):
        anime_id.append(score[i].iid)
        prediction_score.append(score[i].est)
    prediction_df = pd.DataFrame({'anime_id':anime_id, 'pred_score':prediction_score})

    all_user_pred = prediction_df.sort_values('pred_score', ascending=False).head(10)
    all_user_pred.merge(animes_df, how='left', on='anime_id')
    
    return all_user_pred.merge(animes_df, how='left', on='anime_id')
    

In [17]:
user_input = int(input("Enter User ID: "))
anime_predict(user_input)

Unnamed: 0,anime_id,pred_score,title
0,11757,7.934198,Sword Art Online
1,2251,7.835651,Baccano!
2,164,7.433971,Mononoke Hime
3,5114,7.428781,Fullmetal Alchemist: Brotherhood
4,11061,7.358139,Hunter x Hunter (2011)
5,339,7.344123,Serial Experiments Lain
6,11981,7.268309,Mahou Shoujo Madoka★Magica Movie 3: Hangyaku n...
7,820,7.239986,Ginga Eiyuu Densetsu
8,777,7.224165,Hellsing Ultimate
9,7785,7.174965,Yojouhan Shinwa Taikei
