In [2]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# Loading the dataset
anime_data = pd.read_csv('Anime_data.csv')

In [4]:
# Showing first 5 rows from the dataset
anime_data.head()

Unnamed: 0,Anime_id,Title,Genre,Synopsis,Type,Producer,Studio,Rating,ScoredBy,Popularity,Members,Episodes,Source,Aired,Link
0,1,Cowboy Bebop,"['Action', 'Adventure', 'Comedy', 'Drama', 'Sc...","In the year 2071, humanity has colonized sever...",TV,['Bandai Visual'],['Sunrise'],8.81,363889.0,39.0,704490.0,26.0,Original,"Apr 3, 1998 to Apr 24, 1999",https://myanimelist.net/anime/1/Cowboy_Bebop
1,5,Cowboy Bebop: Tengoku no Tobira,"['Action', 'Space', 'Drama', 'Mystery', 'Sci-Fi']","Another day, another bounty—such is the life o...",Movie,"['Sunrise', 'Bandai Visual']",['Bones'],8.41,111187.0,475.0,179899.0,1.0,Original,"Sep 1, 2001",https://myanimelist.net/anime/5/Cowboy_Bebop__...
2,6,Trigun,"['Action', 'Sci-Fi', 'Adventure', 'Comedy', 'D...","Vash the Stampede is the man with a $$60,000,0...",TV,['Victor Entertainment'],['Madhouse'],8.31,197451.0,158.0,372709.0,26.0,Manga,"Apr 1, 1998 to Sep 30, 1998",https://myanimelist.net/anime/6/Trigun
3,7,Witch Hunter Robin,"['Action', 'Magic', 'Police', 'Supernatural', ...",Witches are individuals with special powers li...,TV,['Bandai Visual'],['Sunrise'],7.34,31875.0,1278.0,74889.0,26.0,Original,"Jul 2, 2002 to Dec 24, 2002",https://myanimelist.net/anime/7/Witch_Hunter_R...
4,8,Bouken Ou Beet,"['Adventure', 'Fantasy', 'Shounen', 'Supernatu...",It is the dark century and the people are suff...,TV,,['Toei Animation'],7.04,4757.0,3968.0,11247.0,52.0,Manga,"Sep 30, 2004 to Sep 29, 2005",https://myanimelist.net/anime/8/Bouken_Ou_Beet


In [6]:
# Number of rows and cols in our dataset
anime_data.shape

(17002, 15)

In [9]:
# Selecting the relevant cols
selected_features = ['Genre','Synopsis','Type','Producer','Studio']
print(selected_features)

['Genre', 'Synopsis', 'Type', 'Producer', 'Studio']


In [10]:
anime_data.describe()

Unnamed: 0,Anime_id,Rating,ScoredBy,Popularity,Members,Episodes
count,17002.0,14425.0,13227.0,16368.0,17002.0,14085.0
mean,20446.579638,6.287867,11390.84,8131.919599,20381.3,11.482712
std,14342.513259,1.141401,43284.34,4714.683351,71214.04,44.08904
min,1.0,1.0,1.0,1.0,0.0,1.0
25%,5581.5,5.62,43.0,4042.5,145.0,1.0
50%,21334.0,6.41,478.0,8115.0,1113.0,1.0
75%,34789.25,7.09,3831.0,12208.25,7855.75,12.0
max,40960.0,10.0,1006242.0,16338.0,1451708.0,1818.0


In [11]:
# Replacing missing values with null string
for feature in selected_features:
    anime_data[feature] = anime_data[feature].fillna("")

In [13]:
# Combining all the selected features
combined_features = anime_data['Genre']+' '+anime_data['Synopsis']+' '+anime_data['Type']+' '+anime_data['Producer']+' '+anime_data['Studio']

In [14]:
print(combined_features)

0        ['Action', 'Adventure', 'Comedy', 'Drama', 'Sc...
1        ['Action', 'Space', 'Drama', 'Mystery', 'Sci-F...
2        ['Action', 'Sci-Fi', 'Adventure', 'Comedy', 'D...
3        ['Action', 'Magic', 'Police', 'Supernatural', ...
4        ['Adventure', 'Fantasy', 'Shounen', 'Supernatu...
                               ...                        
16997                                            Special  
16998                                                 TV  
16999                                            Special  
17000                                                     
17001                                                OVA  
Length: 17002, dtype: object


In [15]:
# Converting text data to feature Vector
vectorizer = TfidfVectorizer()

In [17]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [18]:
print(feature_vectors)

  (0, 37892)	0.056281414496982575
  (0, 42275)	0.05554992930743152
  (0, 3704)	0.05420814957716939
  (0, 40850)	0.02581805289353793
  (0, 32643)	0.03964119929977549
  (0, 23452)	0.03827711853623936
  (0, 43286)	0.03914667049079709
  (0, 1300)	0.03771451676720628
  (0, 26301)	0.046101009288666096
  (0, 18995)	0.03140577285155169
  (0, 26001)	0.03610622426157933
  (0, 18000)	0.10773905794579014
  (0, 36305)	0.09939446005768138
  (0, 16983)	0.10044196479121079
  (0, 6909)	0.07272181062616453
  (0, 42832)	0.08128730847329146
  (0, 8113)	0.10285778109764956
  (0, 16273)	0.0810664085142733
  (0, 22556)	0.061911752196353975
  (0, 9322)	0.12096493268203946
  (0, 16617)	0.04459279591555815
  (0, 42802)	0.05198192317311087
  (0, 22722)	0.10051053115862048
  (0, 29070)	0.055735347648041854
  (0, 26116)	0.045676901283701776
  :	:
  (16976, 25809)	1.0
  (16977, 28570)	1.0
  (16978, 25809)	1.0
  (16979, 40850)	1.0
  (16980, 25809)	1.0
  (16981, 40850)	1.0
  (16982, 28570)	1.0
  (16983, 36785)	1.0
  

In [19]:
# Getting Sinmilarity Score using Cosine Similarity
similarity = cosine_similarity(feature_vectors)

In [20]:
print(similarity)

[[1.         0.29075527 0.11623803 ... 0.         0.         0.        ]
 [0.29075527 1.         0.11064093 ... 0.         0.         0.        ]
 [0.11623803 0.11064093 1.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 1.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         1.        ]]


In [21]:
similarity.shape

(17002, 17002)

In [65]:
# Taking user input for anime name
anime_name = input("Please Enter Anime Name: ")

Please Enter Anime Name: Dragon Ball


In [66]:
# Create a list with all anime names
list_of_all_titles = anime_data['Title'].tolist()
print(list_of_all_titles)

['Cowboy Bebop', 'Cowboy Bebop: Tengoku no Tobira', 'Trigun', 'Witch Hunter Robin', 'Bouken Ou Beet', 'Eyeshield 21', 'Hachimitsu to Clover', 'Hungry Heart: Wild Striker', 'Initial D Fourth Stage', 'Monster', 'Naruto', 'One Piece', 'Tennis no Ouji-sama', 'Ring ni Kakero 1', 'School Rumble', 'Sunabouzu', 'Texhnolyze', 'Texhnolyze', 'Trinity Blood', 'Yakitate!! Japan', 'Zipang', 'Neon Genesis Evangelion', 'Neon Genesis Evangelion: Death & Rebirth', 'Neon Genesis Evangelion: The End of Evangelion', 'Kenpuu Denki Berserk', 'Koukaku Kidoutai', 'Rurouni Kenshin: Meiji Kenkaku Romantan - Tsuioku-hen', 'Rurouni Kenshin: Meiji Kenkaku Romantan', 'Rurouni Kenshin: Meiji Kenkaku Romantan - Ishinshishi e no Chinkonka', 'Akira', '.hack//Sign', 'Aa! Megami-sama!', 'Aa! Megami-sama! (TV)', 'Tenshi Kinryouku', 'Kidou Tenshi Angelic Layer', 'Ai Yori Aoshi', 'Appleseed (Movie)', 'Arc the Lad', 'Avenger', 'Beck', 'Blue Gender', 'Chobits', 'Chrno Crusade', 'Chrno Crusade', 'D.N.Angel', 'D.C.: Da Capo', 'D

In [67]:
# Now finding the closest match by the user input from the list
find_close_match = difflib.get_close_matches(anime_name, list_of_all_titles)
print(find_close_match)

['Dragon Ball', 'Dragon Ball Z', 'Dragon Ball GT']


In [68]:
closest_match = find_close_match[0]
print(closest_match)

Dragon Ball


In [69]:
# Now we will find the index of the closest_match
anime_id = anime_data[anime_data.Title == closest_match]["Anime_id"].values[0]
print(anime_id)

223


In [70]:
# Getting similar anime
similarity_score = list(enumerate(similarity[anime_id]))

In [71]:
print(similarity_score)

[(0, 0.06386608582937064), (1, 0.06308094633874649), (2, 0.06847698341372929), (3, 0.0445093260447085), (4, 0.061694427862283394), (5, 0.07033489358889325), (6, 0.06868980271826032), (7, 0.05912454081987553), (8, 0.02960509675121857), (9, 0.08506907511192559), (10, 0.0993099572786946), (11, 0.09013985883121198), (12, 0.10978148937685159), (13, 0.058527538529318655), (14, 0.060297328760309264), (15, 0.07537441065641741), (16, 0.0774414059290778), (17, 0.0774414059290778), (18, 0.07873842842087495), (19, 0.04371996732247336), (20, 0.06564984522366872), (21, 0.08646155948722577), (22, 0.08069979666885692), (23, 0.057129091839483975), (24, 0.09065080993521367), (25, 0.1059481003272714), (26, 0.0897610556774352), (27, 0.0630355011736999), (28, 0.06101635672537968), (29, 0.0784589634365033), (30, 0.08862583589131819), (31, 0.045703367215489815), (32, 0.05408666934921666), (33, 0.06959502195943758), (34, 0.02165878612523096), (35, 0.10273272306323551), (36, 0.06167265304918398), (37, 0.052601

In [72]:
print(len(similarity_score))

17002


In [73]:
# Sort the similarity score in descending order
sorted_similar_anime = sorted(similarity_score, key = lambda x:x[1], reverse = True)

In [74]:
print(sorted_similar_anime)

[(223, 1.0000000000000002), (3839, 0.19887530743713475), (2403, 0.19184824242950835), (13925, 0.1794750780450216), (10548, 0.1775931899338451), (10549, 0.1775931899338451), (10551, 0.1775931899338451), (10552, 0.1775931899338451), (10553, 0.1775931899338451), (5338, 0.1737838966107416), (14147, 0.17315053804658923), (6166, 0.16566523720724746), (2746, 0.1653923631108648), (16147, 0.16464664487062763), (13429, 0.1627857271983419), (13430, 0.1627857271983419), (11316, 0.16149879990205268), (5691, 0.15929989750845217), (7357, 0.15768905427927063), (594, 0.15658694017667224), (1506, 0.15524819943367343), (2516, 0.1535200583894571), (773, 0.1530896419297683), (6607, 0.15148493151469056), (2170, 0.15115249769121047), (10139, 0.15090722418208952), (12186, 0.15090722418208952), (10547, 0.15066344971313494), (8568, 0.15006905983335045), (12035, 0.1494959658900493), (3668, 0.14947626742242118), (12396, 0.14903405035674672), (129, 0.148580994763434), (15115, 0.14824641183489137), (6128, 0.1481590

In [75]:
list_of_all_anime_id = anime_data['Anime_id'].tolist()
print(list_of_all_anime_id)

[1, 5, 6, 7, 8, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32, 33, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 71, 72, 73, 74, 75, 76, 77, 79, 80, 81, 82, 83, 84, 85, 86, 87, 87, 88, 89, 90, 91, 92, 93, 94, 95, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 214, 215, 216, 217, 218, 219, 221, 223, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237

In [76]:
# name of the similar movies:
print("Suggested Animes:")
print("--------------------------------------------------------------")
count = 1
for anime in sorted_similar_anime[1:]:
    anime_id = anime[0]
    if anime_id in list_of_all_anime_id:
        name = anime_data.loc[anime_data['Anime_id'] == anime_id]
        titles = name['Title']
        if count <= 5:
            print(titles.values[0])
            count += 1

Suggested Animes:
--------------------------------------------------------------
Himitsu no Akko-chan 3
Kodomo no Jikan (TV)
Precure All Stars GoGo Dream Live!
Akebi no Hana: Maho
Betsu ni Anta no Tame ni Ookiku Natta n ja Nai n Dakara ne!!
