# Spirited Away (Sen to Chihiro no Kamikakushi : 千と千尋の神隠し)

In [None]:
import IPython.display
IPython.display.YouTubeVideo('ByXuk9QqQkk')

# Pre-Processing Data
* [create tfidf of anime side](https://www.kaggle.com/wordroid/create-tfidf-of-anime-side)
* [create tfidf of user side](https://www.kaggle.com/wordroid/create-tfidf-of-user-side-another-way)

# Related Notebooks
* [Recommendations using topic model](https://www.kaggle.com/wordroid/recommendations-using-topic-model)
* [Load and Confirm tfidf of anime side](https://www.kaggle.com/wordroid/load-and-confirm-tfidf-of-anime-side)
  * [similar anime to Mobile Suit Gundam](https://www.kaggle.com/wordroid/load-and-confirm-tfidf-of-anime-side#Mobile-Suit-Gundam)
  * [similar anime to 'Kimi no Na wa.'](https://www.kaggle.com/wordroid/load-and-confirm-tfidf-of-anime-side#Kimi-no-Na-wa.)
* [Load and Confirm tfidf of user side](https://www.kaggle.com/wordroid/load-and-confirm-tfidf-of-user-side)

In [None]:
!pip install git+https://github.com/darecophoenixx/wordroid.sblo.jp

In [None]:
%matplotlib inline
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

import os.path
import sys
import re
import itertools
import csv
import datetime
import pickle
import random
from collections import defaultdict, Counter
import gc

import matplotlib.pyplot as plt
from matplotlib.ticker import NullFormatter
import seaborn as sns
import pandas as pd
import numpy as np
import scipy
import gensim
from sklearn.metrics import f1_score, classification_report, confusion_matrix, log_loss
from sklearn.model_selection import train_test_split
import gensim
from keras.preprocessing.sequence import skipgrams
import tensorflow as tf

In [None]:
def hexbin(x, y, color, **kwargs):
    cmap = sns.light_palette(color, as_cmap=True)
    plt.hexbin(x, y, cmap=cmap, **kwargs)
def scatter(x, y, color, **kwargs):
    plt.scatter(x, y, marker='.')

# Prepare Data
---

In [None]:
ls -la ../input

In [None]:
dir_data_src = '../input/anime-recommendations-database'
os.listdir(dir_data_src)

## load anime.csv

In [None]:
anime_csv = pd.read_csv(os.path.join(dir_data_src, 'anime.csv'))
print(anime_csv.shape)
anime_csv.index = anime_csv.anime_id.values
anime_csv.head()

In [None]:
def Anime_title2id(titlename, csv=anime_csv):
    try:
        return csv.query('name==@titlename').anime_id.values[0]
    except:
        return 9999999

Anime_title2id('Kimi no Na wa.'), Anime_title2id('Fullmetal Alchemist: Brotherhood'), Anime_title2id('Gintama°'), Anime_title2id('PL_ANIME')

In [None]:
def Anime_id2title(anime_id, csv=anime_csv):
    try:
        return csv.query('anime_id==@anime_id').name.values[0]
    except:
        return '*****'

Anime_id2title(32281), Anime_id2title(5114), Anime_id2title(28977), Anime_id2title(9999999)

## load rating.csv

In [None]:
'''
load rating.csv
'''
rating_csv = pd.read_csv(os.path.join(dir_data_src, 'rating.csv'))
print(rating_csv.shape)
rating_csv.head()

In [None]:
'''
`Yuri!!! on Ice` doesn't exist in rating_csv
'''
print(Anime_title2id('Yuri!!! on Ice'))
rating_csv.query('anime_id==32995')

## create rating_csv2
delete rating == -1

In [None]:
rating_csv2 = rating_csv.loc[rating_csv.rating.values != -1]
rating_csv2.shape
rating_csv2.head(10)

## anime no-rated

In [None]:
'''
show anime no rated
'''
anime_id_norated = anime_csv.anime_id.values[~np.isin(anime_csv.anime_id.values, rating_csv2.groupby('anime_id').size().keys().values)]
print(anime_csv.loc[anime_id_norated].shape)

# Load Pre-Processing Data
---

In [None]:
src_tfidf_anime = '../input/create-tfidf-of-anime-side'
os.listdir(src_tfidf_anime)

In [None]:
corpus_csr_anime = scipy.sparse.load_npz(os.path.join(src_tfidf_anime, 'corpus_csr.npz'))
tfidf_anime = gensim.models.TfidfModel.load(os.path.join(src_tfidf_anime, 'tfidf'))
dic_user = gensim.corpora.Dictionary.load(os.path.join(src_tfidf_anime, 'dic_user'))
dic_anime = gensim.corpora.Dictionary.load(os.path.join(src_tfidf_anime, 'dic_anime'))

corpus_csr_anime, tfidf_anime, dic_user, dic_anime

In [None]:
src_tfidf_user = '../input/create-tfidf-of-user-side-another-way'
os.listdir(src_tfidf_user)

In [None]:
corpus_csr_user = scipy.sparse.load_npz(os.path.join(src_tfidf_user, 'corpus_csr.npz'))
tfidf_user = gensim.models.TfidfModel.load(os.path.join(src_tfidf_user, 'tfidf'))
#dic_user = gensim.corpora.Dictionary.load(os.path.join(src_tfidf_user, 'dic_user'))
#dic_anime = gensim.corpora.Dictionary.load(os.path.join(src_tfidf_user, 'dic_anime'))

corpus_csr_user, tfidf_user

In [None]:
def Anime2id(title):
    return dic_anime.token2id['anime_id-'+str(Anime_title2id(title))]

Anime2id('Kimi no Na wa.')

In [None]:
def get_animeid(name):
    Id = anime_csv.query("name==@name").anime_id.values[0]
    return 'anime_id-' + str(Id)

get_animeid('Mobile Suit Gundam')

# Create MatrixSimilarity
---

In [None]:
from feature_eng import neg_smpl

In [None]:
sim_anime = neg_smpl.MySparseMatrixSimilarity(corpus_csr_anime, num_features=max(dic_user.keys())+1, tfidf=tfidf_anime)
sim_anime

In [None]:
sim_user = neg_smpl.MySparseMatrixSimilarity(corpus_csr_user, num_features=max(dic_anime.keys())+1, tfidf=tfidf_user)
sim_user

# Sen to Chihiro no Kamikakushi
---

In [None]:
anime_csv.query('name.str.contains("Sen to")', engine='python')

In [None]:
anime_title = 'Sen to Chihiro no Kamikakushi'
get_animeid(anime_title)

In [None]:
query_anime = dic_anime.doc2bow([get_animeid(anime_title)])
query_anime

In [None]:
'''
Users who like this anime
'''
sim_user.num_best = 30
user_rating = sim_user[query_anime]
user_rating

In [None]:
for idx, wgt in user_rating:
    print(dic_user[idx])

# Similar anime to Spirited Away (Sen to Chihiro no Kamikakushi)

In [None]:
'''
get anime list
'''
sim_anime.num_best = 30
anime_rating = sim_anime[user_rating]
anime_rating

In [None]:
anime_list = []
for idx, rating in anime_rating:
    #print(dic_anime[idx])
    print(rating, Anime_id2title(re.sub('anime_id-', '', dic_anime[idx])))
    anime_list.append(int(re.sub('anime_id-', '', dic_anime[idx])))

In [None]:
anime_list
anime_csv.loc[anime_list]

# Show favorite anime of users

In [None]:
for idx, wgt in user_rating[:5]:
    print(idx, wgt, dic_user[idx])

In [None]:
user_rating[:1]

In [None]:
'''
Favorite anime of user_id-35437
'''
sim_anime.num_best = 30
anime_rating = sim_anime[user_rating[:1]]
anime_rating
for idx, rating in anime_rating:
    #print(dic_anime[idx])
    print(rating, Anime_id2title(re.sub('anime_id-', '', dic_anime[idx])))

In [None]:
'''
Favorite anime of user_id-52104
'''
sim_anime.num_best = 30
anime_rating = sim_anime[user_rating[1:2]]
anime_rating
for idx, rating in anime_rating:
    #print(dic_anime[idx])
    print(rating, Anime_id2title(re.sub('anime_id-', '', dic_anime[idx])))

In [None]:
'''
Favorite anime of user_id-47893
'''
sim_anime.num_best = 30
anime_rating = sim_anime[user_rating[2:3]]
anime_rating
for idx, rating in anime_rating:
    #print(dic_anime[idx])
    print(rating, Anime_id2title(re.sub('anime_id-', '', dic_anime[idx])))

In [None]:
'''
Favorite anime of user_id-1534
'''
sim_anime.num_best = 30
anime_rating = sim_anime[user_rating[3:4]]
anime_rating
for idx, rating in anime_rating:
    #print(dic_anime[idx])
    print(rating, Anime_id2title(re.sub('anime_id-', '', dic_anime[idx])))

In [None]:
'''
Favorite anime of user_id-8278
'''
sim_anime.num_best = 30
anime_rating = sim_anime[user_rating[4:5]]
anime_rating
for idx, rating in anime_rating:
    #print(dic_anime[idx])
    print(rating, Anime_id2title(re.sub('anime_id-', '', dic_anime[idx])))