<a href="https://colab.research.google.com/github/siripagadala/anime_recommender/blob/main/prototype_anime.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing the Dependencies

In [103]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

Data Collection and Pre-Processing

In [104]:
anime_data = pd.read_csv('/content/anime.csv')

In [105]:
#printing the first 5 rows of the pandas dataframe
anime_data.head()

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,229170.0,182126.0,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",Unknown,...,30043.0,49201.0,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",Trigun,トライガン,TV,26,"Apr 1, 1998 to Sep 30, 1998",Spring 1998,...,50229.0,75651.0,86142.0,49432.0,15376.0,5838.0,1965.0,664.0,316.0,533.0
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),TV,26,"Jul 2, 2002 to Dec 24, 2002",Summer 2002,...,2182.0,4806.0,10128.0,11618.0,5709.0,2920.0,1083.0,353.0,164.0,131.0
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",Beet the Vandel Buster,冒険王ビィト,TV,52,"Sep 30, 2004 to Sep 29, 2005",Fall 2004,...,312.0,529.0,1242.0,1713.0,1068.0,634.0,265.0,83.0,50.0,27.0


In [106]:
#no of rows and columns in the data frame
anime_data.shape

(17562, 35)

In [107]:
#selecting the relevant features-> feature selection for recommendation
selected_features = ['Score','Genres','English name','Japanese name']
print(selected_features)

['Score', 'Genres', 'English name', 'Japanese name']


In [108]:
#replacing null values with null string
for feature in selected_features:
  anime_data[feature] = anime_data[feature].fillna('')

In [109]:
#combine all 3 selected features
combined_features = anime_data['Score']+' '+anime_data['Genres']+' '+anime_data['English name']+' '+anime_data['Japanese name']

In [110]:
print(combined_features)

0        8.78 Action, Adventure, Comedy, Drama, Sci-Fi,...
1        8.39 Action, Drama, Mystery, Sci-Fi, Space Cow...
2        8.24 Action, Sci-Fi, Adventure, Comedy, Drama,...
3        7.27 Action, Mystery, Police, Supernatural, Dr...
4        6.98 Adventure, Fantasy, Shounen, Supernatural...
                               ...                        
17557    Unknown Adventure, Mystery, Supernatural Unkno...
17558    Unknown Comedy, Horror, Supernatural Unknown 見...
17559    Unknown Mystery, Dementia, Horror, Psychologic...
17560    Unknown Adventure, Slice of Life, Comedy Unkno...
17561        Unknown Action, Fantasy Unknown SCARLET NEXUS
Length: 17562, dtype: object


In [111]:
vectorizer = TfidfVectorizer()

In [112]:
feature_vectors= vectorizer.fit_transform(combined_features)

In [113]:
print(feature_vectors)

  (0, 12199)	0.49217954147001386
  (0, 983)	0.49217954147001386
  (0, 1777)	0.49217954147001386
  (0, 6808)	0.23788712932875197
  (0, 2657)	0.15289127728589702
  (0, 6374)	0.1529115638452233
  (0, 2239)	0.15198641205173397
  (0, 1662)	0.10848142158190618
  (0, 458)	0.14538300406313512
  (0, 429)	0.13145026687584352
  (0, 347)	0.3108801099108452
  (1, 19123)	0.4474387620506464
  (1, 4919)	0.2345680377306245
  (1, 7233)	0.13710102570373892
  (1, 4975)	0.18513645492304276
  (1, 228)	0.26047508126354524
  (1, 12199)	0.4166720425737814
  (1, 983)	0.4166720425737814
  (1, 1777)	0.4166720425737814
  (1, 6808)	0.20139178435449726
  (1, 2657)	0.1294355320177614
  (1, 6374)	0.12945270632388153
  (1, 2239)	0.12866948626899533
  (1, 429)	0.111283884398034
  (2, 14470)	0.5834490894933229
  :	:
  (17559, 3416)	0.3842211524015803
  (17559, 7245)	0.34059192134320226
  (17559, 7898)	0.3230454006669122
  (17559, 1829)	0.32030548417177224
  (17559, 2023)	0.1800300745800711
  (17559, 7266)	0.2339394874230

Cosine Similarity

In [114]:
#getting the similarity scores using cosine similarity
similarity = cosine_similarity(feature_vectors)

In [115]:
print(similarity)

[[1.         0.73690957 0.13807024 ... 0.         0.03720511 0.02356417]
 [0.73690957 1.         0.08484807 ... 0.03066629 0.         0.01994908]
 [0.13807024 0.08484807 1.         ... 0.         0.04279298 0.02710329]
 ...
 [0.         0.03066629 0.         ... 1.         0.00929173 0.01120669]
 [0.03720511 0.         0.04279298 ... 0.00929173 1.         0.03345858]
 [0.02356417 0.01994908 0.02710329 ... 0.01120669 0.03345858 1.        ]]


In [116]:
print(similarity.shape)

(17562, 17562)


In [117]:
#getting the anime name from the user
anime_name = input(' Enter your favourite anime: ')

 Enter your favourite anime: evangelion


In [118]:
#creating a list with all the movie names given in the dataset
list_of_all_titles=anime_data['English name'].tolist()
print(list_of_all_titles)



In [119]:
#finding the close-match for the anime name given by the user
find_close_match = difflib.get_close_matches(anime_name,list_of_all_titles)
print(find_close_match)

['Revelation', 'Neon Genesis Evangelion']


In [120]:
close_match = find_close_match[1]
print(close_match)

Neon Genesis Evangelion


In [121]:
#finding the index of the anime witht he title
index_of_anime = anime_data[anime_data['English name'] == close_match]['MAL_ID'].values[0]
print(index_of_anime)

30


In [122]:
#getting a list of similar movies
similarity_score = list(enumerate(similarity[index_of_anime]))
print(similarity_score)

[(0, 0.012505893710366998), (1, 0.0), (2, 0.014384163444730312), (3, 0.04642761801980551), (4, 0.031966114730256616), (5, 0.018189542366711896), (6, 0.04164722807273174), (7, 0.0126490356216751), (8, 0.04594821014360429), (9, 0.05114767787439792), (10, 0.01448911902383728), (11, 0.010545652651297165), (12, 0.015116217579180571), (13, 0.0), (14, 0.04884036239586804), (15, 0.012546882215610466), (16, 0.0), (17, 0.13324872102162355), (18, 0.011646468749358447), (19, 0.06180362951745874), (20, 0.09219750423670366), (21, 0.0), (22, 0.0), (23, 0.11701948023983225), (24, 0.03356649710150687), (25, 0.021904459116343013), (26, 0.038589561667152605), (27, 0.0), (28, 0.06462843088364856), (29, 0.029886929018972167), (30, 1.0), (31, 0.6963772451266921), (32, 0.07080445317443086), (33, 0.01056896136142633), (34, 0.04126645755414024), (35, 0.0), (36, 0.0), (37, 0.0), (38, 0.00993959205878808), (39, 0.021903663447899343), (40, 0.10248637826204765), (41, 0.06428956388298895), (42, 0.10869192665865807)

In [123]:
len(similarity_score)

17562

In [124]:
#sorting the movies based on their similarity score
sorted_similar_anime=sorted(similarity_score,key=lambda x:x[1], reverse=True)
print(sorted_similar_anime)

[(30, 1.0), (31, 0.6963772451266921), (279, 0.6657851100136276), (280, 0.6345827571089964), (908, 0.5586852148651782), (5844, 0.5586852148651782), (795, 0.5135674982053415), (2011, 0.4960052706972846), (2945, 0.4847792245590625), (2810, 0.4715641542032205), (13343, 0.2929608233739612), (13659, 0.27418665700731776), (2937, 0.2443332857298156), (9292, 0.23642893450016805), (7113, 0.23613545571781366), (2339, 0.22733916424474387), (277, 0.22559159807717913), (5715, 0.2240475142121831), (4416, 0.22391797179554618), (13226, 0.22357070691812464), (96, 0.21960770426911963), (9764, 0.21746768595983085), (3042, 0.21592600483636148), (2815, 0.21463115999952517), (14350, 0.21452042181755665), (13757, 0.21436959912136092), (14455, 0.21222485991277615), (11849, 0.20863260086078955), (10208, 0.20810649815196897), (14862, 0.20018130553398694), (14165, 0.1986844587617658), (740, 0.1971253282275395), (554, 0.18720846180395959), (1545, 0.18439849288081037), (78, 0.18179083705672258), (9900, 0.1807168450

In [125]:
#print the name of similar anime based on their index
print('Anime suggested for you: \n')

i=1
for anime in sorted_similar_anime:
  index = anime[0]
  matched_anime_data = anime_data[anime_data['MAL_ID'] == index]
  if not matched_anime_data.empty:
    english_name = matched_anime_data['English name'].values[0]
    if(english_name == 'Unknown'):
      japanese_name = matched_anime_data['Japanese name'].values[0]
      print(i, ', ', japanese_name)
    else:
      print(i,', ', english_name)
      i += 1
      if(i>10):
        break

Anime suggested for you: 

1 ,  Neon Genesis Evangelion
2 ,  Neon Genesis Evangelion:Death & Rebirth
3 ,  Requiem from the Darkness
4 ,  Animal Lane
5 ,  Fullmetal Alchemist:Premium OVA Collection
6 ,  Pokemon:Pikachu's Ghost Festival!
7 ,  Brother, Dear Brother
8 ,  You're Under Arrest Mini Specials
9 ,  Desperate Carnal Housewives
10 ,  The Secret Garden
