<a href="https://colab.research.google.com/github/siripagadala/anime_recommender/blob/main/prototype_anime.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing the Dependencies

In [None]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

Data Collection and Pre-Processing

In [None]:
anime_data = pd.read_csv('/content/anime.csv')

In [None]:
#printing the first 5 rows of the pandas dataframe
anime_data.head()

Unnamed: 0,MAL_ID,Name,Score,Genres,English name,Japanese name,Type,Episodes,Aired,Premiered,...,Score-10,Score-9,Score-8,Score-7,Score-6,Score-5,Score-4,Score-3,Score-2,Score-1
0,1,Cowboy Bebop,8.78,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Cowboy Bebop,カウボーイビバップ,TV,26,"Apr 3, 1998 to Apr 24, 1999",Spring 1998,...,229170.0,182126.0,131625.0,62330.0,20688.0,8904.0,3184.0,1357.0,741.0,1580.0
1,5,Cowboy Bebop: Tengoku no Tobira,8.39,"Action, Drama, Mystery, Sci-Fi, Space",Cowboy Bebop:The Movie,カウボーイビバップ 天国の扉,Movie,1,"Sep 1, 2001",Unknown,...,30043.0,49201.0,49505.0,22632.0,5805.0,1877.0,577.0,221.0,109.0,379.0
2,6,Trigun,8.24,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",Trigun,トライガン,TV,26,"Apr 1, 1998 to Sep 30, 1998",Spring 1998,...,50229.0,75651.0,86142.0,49432.0,15376.0,5838.0,1965.0,664.0,316.0,533.0
3,7,Witch Hunter Robin,7.27,"Action, Mystery, Police, Supernatural, Drama, ...",Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),TV,26,"Jul 2, 2002 to Dec 24, 2002",Summer 2002,...,2182.0,4806.0,10128.0,11618.0,5709.0,2920.0,1083.0,353.0,164.0,131.0
4,8,Bouken Ou Beet,6.98,"Adventure, Fantasy, Shounen, Supernatural",Beet the Vandel Buster,冒険王ビィト,TV,52,"Sep 30, 2004 to Sep 29, 2005",Fall 2004,...,312.0,529.0,1242.0,1713.0,1068.0,634.0,265.0,83.0,50.0,27.0


In [None]:
#no of rows and columns in the data frame
anime_data.shape

(17562, 35)

In [None]:
#selecting the relevant features-> feature selection for recommendation
selected_features = ['Score','Genres','English name']
print(selected_features)

['Score', 'Genres', 'English name']


In [None]:
#replacing null values with null string
for feature in selected_features:
  anime_data[feature] = anime_data[feature].fillna('')

In [None]:
#combine all 3 selected features
combined_features = anime_data['Score']+' '+anime_data['Genres']+' '+anime_data['English name']

In [None]:
print(combined_features)

0        8.78 Action, Adventure, Comedy, Drama, Sci-Fi,...
1        8.39 Action, Drama, Mystery, Sci-Fi, Space Cow...
2        8.24 Action, Sci-Fi, Adventure, Comedy, Drama,...
3        7.27 Action, Mystery, Police, Supernatural, Dr...
4        6.98 Adventure, Fantasy, Shounen, Supernatural...
                               ...                        
17557     Unknown Adventure, Mystery, Supernatural Unknown
17558         Unknown Comedy, Horror, Supernatural Unknown
17559    Unknown Mystery, Dementia, Horror, Psychologic...
17560     Unknown Adventure, Slice of Life, Comedy Unknown
17561                      Unknown Action, Fantasy Unknown
Length: 17562, dtype: object


In [None]:
vectorizer = TfidfVectorizer()

In [None]:
feature_vectors= vectorizer.fit_transform(combined_features)

In [None]:
print(feature_vectors)

  (0, 689)	0.5653662466053059
  (0, 1397)	0.5653662466053059
  (0, 5777)	0.2734969402882887
  (0, 2152)	0.1756260882352929
  (0, 5399)	0.17564939138980556
  (0, 1794)	0.1745866702627489
  (0, 1293)	0.12461252242014773
  (0, 229)	0.16700134077471884
  (0, 206)	0.15099681668379392
  (0, 156)	0.3571077342215123
  (1, 4138)	0.30445481346072956
  (1, 6148)	0.1844463778235757
  (1, 4186)	0.23277165452021983
  (1, 107)	0.3274947424709371
  (1, 689)	0.523880835800464
  (1, 1397)	0.523880835800464
  (1, 5777)	0.25342829807652967
  (1, 2152)	0.16273900758228924
  (1, 5399)	0.16276060079931656
  (1, 1794)	0.16177585995989055
  (1, 206)	0.13991698125328625
  (2, 6331)	0.7184009038514474
  (2, 5621)	0.23181696877427038
  (2, 80)	0.43519373321395505
  (2, 2152)	0.21652920855924168
  :	:
  (17558, 2875)	0.6854923275577652
  (17558, 6454)	0.41551346978620496
  (17558, 5979)	0.5136586865732226
  (17558, 1293)	0.30594701991282436
  (17559, 5763)	0.43590341829739143
  (17559, 2810)	0.41837002849831423
  

Cosine Similarity

In [None]:
#getting the similarity scores using cosine similarity
similarity = cosine_similarity(feature_vectors)

In [None]:
print(similarity)

[[1.         0.76822189 0.19528564 ... 0.         0.10350193 0.08196473]
 [0.76822189 1.         0.13135421 ... 0.04198351 0.         0.07595032]
 [0.19528564 0.13135421 1.         ... 0.         0.12760741 0.10105422]
 ...
 [0.         0.04198351 0.         ... 1.         0.02451171 0.0369644 ]
 [0.10350193 0.         0.12760741 ... 0.02451171 1.         0.24545613]
 [0.08196473 0.07595032 0.10105422 ... 0.0369644  0.24545613 1.        ]]


In [None]:
print(similarity.shape)

(17562, 17562)


In [None]:
#getting the anime name from the user
anime_name = input(' Enter your favourite anime: ')

 Enter your favourite anime: cowboy bepop


In [None]:
#creating a list with all the movie names given in the dataset
list_of_all_titles=anime_data['English name'].tolist()
print(list_of_all_titles)



In [None]:
#finding the close-match for the anime name given by the user
find_close_match = difflib.get_close_matches(anime_name,list_of_all_titles)
print(find_close_match)

['Cowboy Bebop']


In [None]:
close_match = find_close_match[0]
print(close_match)

Cowboy Bebop


In [None]:
#finding the index of the anime witht he title
index_of_anime = anime_data[anime_data['English name'] == close_match]['MAL_ID'].values[0]
print(index_of_anime)

1


In [None]:
#getting a list of similar movies
similarity_score = list(enumerate(similarity[index_of_anime]))
print(similarity_score)

[(0, 0.7682218909824695), (1, 1.0), (2, 0.13135421027921754), (3, 0.10028966593250163), (4, 0.032288407043638556), (5, 0.03962985387707427), (6, 0.026952428293499378), (7, 0.0), (8, 0.07465169935932796), (9, 0.09365350018249254), (10, 0.024286864369390544), (11, 0.05475990606883786), (12, 0.06977874792896574), (13, 0.04215434526837078), (14, 0.0), (15, 0.07981980508471552), (16, 0.12750988634344843), (17, 0.023243594495726636), (18, 0.0), (19, 0.16337201037206517), (20, 0.09307901584695656), (21, 0.06489099586778213), (22, 0.07940929412080214), (23, 0.05284153004960564), (24, 0.11447124006828042), (25, 0.03833218293657063), (26, 0.020184577932219675), (27, 0.060542670711719426), (28, 0.08600706094184647), (29, 0.11394473513816866), (30, 0.0), (31, 0.0), (32, 0.055362768872582194), (33, 0.06976801860629304), (34, 0.026726782783796702), (35, 0.1729666253084667), (36, 0.13247733432732578), (37, 0.10611626650911926), (38, 0.02152785460979415), (39, 0.15045200043013124), (40, 0.100968062954

In [None]:
len(similarity_score)

17562

In [None]:
#sorting the movies based on their similarity score
sorted_similar_anime=sorted(similarity_score,key=lambda x:x[1], reverse=True)
print(sorted_similar_anime)

[(1, 1.0), (0, 0.7682218909824695), (3454, 0.6058358252804368), (4959, 0.472182686464366), (991, 0.43760975108553574), (8382, 0.3668565574792043), (5228, 0.3658261176632185), (7579, 0.3658261176632185), (11778, 0.36179269948928605), (2765, 0.3591991949237179), (1750, 0.3550500833319398), (1263, 0.3536284286776701), (2868, 0.3447550752067996), (15266, 0.33848322591847957), (2247, 0.33633416244940534), (14811, 0.32914758703533886), (10115, 0.32727633194923006), (10568, 0.32727633194923006), (16224, 0.32706323184132907), (17155, 0.32706323184132907), (3571, 0.32016602842169545), (4212, 0.32016602842169545), (2217, 0.3200871440418924), (14399, 0.3191419191449736), (133, 0.31776816651788276), (6832, 0.31772946454186546), (16885, 0.31772946454186546), (8961, 0.3170115776557626), (2384, 0.31499106492121925), (318, 0.31271460064004436), (7512, 0.3123737590991874), (7853, 0.3123737590991874), (6308, 0.31124516643261047), (7945, 0.31124516643261047), (10602, 0.31124516643261047), (11757, 0.31124

In [None]:
#print the name of similar anime based on their index
print('Anime suggested for you: \n')

i=1
for anime in sorted_similar_anime:
  index = anime[0]
  matched_anime_data = anime_data[anime_data['MAL_ID'] == index]
  if not matched_anime_data.empty:
    title_from_index = matched_anime_data['English name'].values[0]
    if(i<=7):
      print(i,', ',title_from_index)
      i += 1

Anime suggested for you: 

1 ,  Cowboy Bebop
2 ,  Slave Doll - Maid to Order
3 ,  Unknown
4 ,  Initial D Extra Stage 2
5 ,  Unknown
6 ,  Momotaro:Sacred Sailors
7 ,  Ah My Buddha Katsu Special
