# Anime character description 

- 애니메이션 캐릭터 dataset에서 description 추출
- Description 문자열 벡터화
- 최애캐와 다른 모든 캐릭터 description 사이 코사인 유사도 추출
- 유사도 기반 top 20 characters 추천

In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [2]:
characters_data = pd.read_csv('/content/drive/MyDrive/CUAI_summer/character_full.csv')

In [3]:
characters_data.head(3)

Unnamed: 0,ID,CharName,Gender,HairColor,Tags,Description,url,MAL_ID
0,0,L,Male,Black,"Analytical,Barefoot,Detectives,EyeBags,SweetTooth","Secretive, meticulous and cunning, L's desire ...",https://www.anime-planet.com/characters/l-deat...,1535.0
1,98,Kakashi HATAKE,Male,Grey,"Adults,Anti-GravityHair,Beastmasters,ExoticEye...",Kakashi is a jounin ninja from Konoha who is k...,https://www.anime-planet.com/characters/kakash...,34566.0
2,348,Light YAGAMI,Male,Brown,"Analytical,BigEgo,Charismatic,GodComplex,Manip...","Armed with the power of Ryuk's Death Note, Lig...",https://www.anime-planet.com/characters/light-...,1535.0


In [4]:
characters_data['Description'].head(3)

0    Secretive, meticulous and cunning, L's desire ...
1    Kakashi is a jounin ninja from Konoha who is k...
2    Armed with the power of Ryuk's Death Note, Lig...
Name: Description, dtype: object

In [5]:
tfidf = TfidfVectorizer(stop_words='english', sublinear_tf = True)    # 객체 지정. 'the', 'a' 같은 stop words 제거, smoothing


tfidf_matrix = tfidf.fit_transform(characters_data['Description'])    # 구축

tfidf_matrix.shape

(13888, 9396)

In [6]:
from sklearn.metrics.pairwise import linear_kernel

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [7]:
indices = pd.Series(characters_data.index, index = characters_data['CharName']).drop_duplicates()

In [8]:
def get_recommendations(CharName, cosine_sim=cosine_sim):

  # character name에 해당하는 character의 index 불러오기
  idx = indices[CharName]

  # idx character와 모든 character 사이 코사인 유사도 리스트 생성
  sim_scores = list(enumerate(cosine_sim[idx]))

  # 코사인 유사도 기준 정렬
  sim_scores = sorted(sim_scores, key = lambda x: x[1], reverse = True)

  # 가장 유사한 점수 20개 반환
  sim_scores = sim_scores[1:21]

  # character의 인덱스 반환
  char_indices = [i[0] for i in sim_scores]

  # top 20 character 반환
  return characters_data['CharName'].iloc[char_indices], sim_scores, list(char_indices)

In [9]:
get_recommendations('Light YAGAMI')

(3050         Sachiko YAGAMI
 1433          Heiji HATTORI
 599                  Kizaru
 10962              Cybersix
 5387         Kazunobu CHIBA
 7632              The Raven
 233           Ritsu TAINAKA
 243        Tsumugi KOTOBUKI
 2558                  Dorry
 226           Medusa GORGON
 3195           Yuusaku KUDO
 156           Kouichi SHIDO
 445        Silvers RAYLEIGH
 1534                 Sosuke
 2233            Johnny GILL
 8446          Heizo HATTORI
 695         Emporio IVANKOV
 1840     Sonic The Hedgehog
 7328       Riichi JINNOUCHI
 125          Kenshin HIMURA
 Name: CharName, dtype: object,
 [(3050, 0.16059488520684762),
  (1433, 0.10929357400015212),
  (599, 0.0907920217440325),
  (10962, 0.08915917807873473),
  (5387, 0.08576495230727071),
  (7632, 0.08472282514648062),
  (233, 0.08424687364844216),
  (243, 0.07695025071714368),
  (2558, 0.07447096275861159),
  (226, 0.07402580743155779),
  (3195, 0.07054748066655989),
  (156, 0.07024112882800342),
  (445, 0.06960127177

# Anime Synopsis 

- Anime dataset에서 synopsis 추출
- Synopsis 문자열 벡터화
- 최애 Anime와 다른 모든 Anime description 사이 코사인 유사도 추출
- 유사도 기반 top 20 Anime 추천

In [10]:
anime_data = pd.read_csv('/content/drive/MyDrive/CUAI_summer/anime_full.csv')

In [11]:
anime_data.head(3)

Unnamed: 0,MAL_ID,Name,Genres,synopsis,Type,Source,Year
0,1,Cowboy Bebop,"Action,Adventure,Comedy,Drama,Sci-Fi,Space","In the year 2071, humanity has colonized sever...",TV,Original,1990.0
1,5,Cowboy Bebop: Tengoku no Tobira,"Action,Drama,Mystery,Sci-Fi,Space","other day, another bounty—such is the life of ...",Movie,Original,2000.0
2,6,Trigun,"Action,Sci-Fi,Adventure,Comedy,Drama,Shounen","Vash the Stampede is the man with a $$60,000,0...",TV,Manga,1990.0


In [12]:
anime_data['synopsis'].head(3)

0    In the year 2071, humanity has colonized sever...
1    other day, another bounty—such is the life of ...
2    Vash the Stampede is the man with a $$60,000,0...
Name: synopsis, dtype: object

In [13]:
tfidf = TfidfVectorizer(stop_words='english', sublinear_tf = True)    # 객체 지정. 'the', 'a' 같은 stop words 제거, smoothing

anime_data['synopsis'] = anime_data['synopsis'].fillna('')    # NaN은 공백문자열로 변환

tfidf_matrix2 = tfidf.fit_transform(anime_data['synopsis'])    # 구축

tfidf_matrix2.shape

(12181, 40927)

In [14]:
cosine_sim2 = linear_kernel(tfidf_matrix2, tfidf_matrix2)

In [15]:
indices2 = pd.Series(anime_data.index, index = anime_data['Name']).drop_duplicates()

In [16]:
def get_recommendations_anime(Name, cosine_sim=cosine_sim2):

  # Anime name에 해당하는 Anime의 index 불러오기
  idx2 = indices2[Name]

  # idx Anime와 모든 Anime 사이 코사인 유사도 리스트 생성
  sim_scores2 = list(enumerate(cosine_sim2[idx2]))

  # 코사인 유사도 기준 정렬
  sim_scores2 = sorted(sim_scores2, key = lambda x: x[1], reverse = True)

  # 가장 유사한 점수 20개 반환
  sim_scores2 = sim_scores2[1:21]

  # character의 인덱스 반환
  anime_indices2 = [i[0] for i in sim_scores2]

  # top 20 character 반환
  return anime_data['Name'].iloc[anime_indices2], sim_scores2, list(anime_indices2)

In [17]:
get_recommendations_anime('Naruto')

(8010                       Boruto: Naruto Next Generations
 1366                                    Naruto: Shippuuden
 6069                              Boruto: Naruto the Movie
 4611            Naruto: Shippuuden Movie 6 - Road to Ninja
 4525      Naruto SD: Rock Lee no Seishun Full-Power Ninden
 533      Naruto: Takigakure no Shitou - Ore ga Eiyuu Da...
 4932                            The Last: Naruto the Movie
 3574           Naruto: Shippuuden Movie 4 - The Lost Tower
 2609                   Naruto: Shippuuden Movie 2 - Kizuna
 650              Naruto: Akaki Yotsuba no Clover wo Sagase
 1831                            Naruto: Shippuuden Movie 1
 3392                               Naruto: The Cross Roads
 6980     Boruto: Naruto the Movie - Naruto ga Hokage ni...
 4162             Naruto: Shippuuden Movie 5 - Blood Prison
 3114     Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsu...
 787      Naruto Movie 2: Dai Gekitotsu! Maboroshi no Ch...
 4181     Naruto: Honoo no Chuunin Shike

#Character Recommender
#Anime Recommender

In [18]:
get_recommendations('Taiga AISAKA')

(506     Yuusaku KITAMURA
 3868              Miyako
 1338          Mira NYGUS
 6650          Naomi MIWA
 1136       Yamato ISHIDA
 237        Chihiro OGINO
 1433       Heiji HATTORI
 5256     Majiru ITOSHIKI
 4427           Wadatsumi
 2201       Hiromu SHINBO
 2764       Hiroshi AGASA
 948           Dr. Kureha
 3496       Kakeru DAICHI
 9945       Kaya BERGMANN
 4928         Rika SASAKI
 166      Minori KUSHIEDA
 5797    Reiichirou SHIBA
 2501              Marulk
 4451      Najimi TENKUJI
 75         Ryuuji TAKASU
 Name: CharName, dtype: object,
 [(506, 0.16127228489276607),
  (3868, 0.12177815441061064),
  (1338, 0.11130802138265963),
  (6650, 0.10883364067510277),
  (1136, 0.09695775476396162),
  (237, 0.09579430577439475),
  (1433, 0.08646664154171202),
  (5256, 0.08526239607224648),
  (4427, 0.08339308844430773),
  (2201, 0.08293396541666226),
  (2764, 0.08254502623083763),
  (948, 0.08228626028562822),
  (3496, 0.08164426281922424),
  (9945, 0.07920319411783477),
  (4928, 0.078363

In [19]:
get_recommendations_anime('Toradora!')

(4317                Toradora!: Bentou no Gokui
 9326                           Puzzle & Dragon
 4355                          Kuroko no Basket
 3064              Gokujou!! Mecha Mote Iinchou
 3067      Toradora!: SOS! Kuishinbou Banbanzai
 9986                           Musunde Hiraite
 11415          Koi to Yobu ni wa Kimochi Warui
 4014            Angel Beats!: Another Epilogue
 47                              Azumanga Daioh
 2322                       Tenshi Nanka ja Nai
 2989                       Seitokai no Ichizon
 6664                        Wagamama High Spec
 14                               School Rumble
 3356                    Ladies versus Butlers!
 3691                       Seitokai Yakuindomo
 4576       Kono Naka ni Hitori, Imouto ga Iru!
 3337                     Kaichou wa Maid-sama!
 636                             Umi ga Kikoeru
 1384     Ring ni Kakero 1: Nichibei Kessen-hen
 997                                  Tsuyokiss
 Name: Name, dtype: object,
 [(4317, 0.1