# The objective of this assignment is to implement a recommendation system using cosine similarity on an anime dataset

# Data Preprocessing:

In [1]:
import pandas as pd  #import pandas lib. using it's short form pd
data=pd.read_csv(r'anime.csv',header=0)  #read dataset & save it in var. data
data.head()   #display top 5 rows

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [2]:
data.info()  #gives information about null values and data type of each column

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [3]:
data.shape  #no. of rows and columns

(12294, 7)

In [4]:
data.isnull().sum()  #display no. of null values in each column

anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64


Let's check is there any duplicated row(s) in our dataset. If yes, then remove such rows.

In [5]:
data[data.duplicated()]  #print duplicated rows 

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members


It shows that there is no duplicated rows in our dataset

# Handle missing values

we can treat null values using Imputation technique. Imputation technique is used for replacing the missing data with some substitute values to retain most of the data/information of the dataset. These substitute values can be either mean, median or mode.

Null values of columns genre and type can be treated using mode because they are categorical columns . Use mode specifying it's index position

In [6]:
mode=data['genre'].mode() #calculate mode of col genre and save in var. mode

In [7]:
mode

0    Hentai
Name: genre, dtype: object

In [8]:
data['genre'].fillna(mode[0],inplace=True)  #fill missing values of column genre with mode 

In [9]:
data.isnull().sum() # we can see no. of null values in col genre have become zero now because they are filled with mode value

anime_id      0
name          0
genre         0
type         25
episodes      0
rating      230
members       0
dtype: int64

In [10]:
mode=data['type'].mode() #calculate mode of col type and save in var. mode

In [11]:
mode

0    TV
Name: type, dtype: object

In [12]:
data['type'].fillna(mode[0],inplace=True)  #fill missing values of column type with mode 

In [13]:
data.isnull().sum() # we can see no. of null values in col type have become zero now because they are filled with mode value

anime_id      0
name          0
genre         0
type          0
episodes      0
rating      230
members       0
dtype: int64

# we can treat null values of column rating using mean

In [14]:
mean=data['rating'].mean() #calculate mean of col rating and save in var. mean

In [15]:
mean

6.473901690981445

In [16]:
data['rating'].fillna(mean,inplace=True)  #fill missing values of column rating with mean

In [17]:
data.isnull().sum() # we can see no. of null values in col rating have become zero now because they are filled with mean value

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [18]:
data.dtypes  #display  data dtypes of each column

anime_id      int64
name         object
genre        object
type         object
episodes     object
rating      float64
members       int64
dtype: object

Note: episodes column should be int type but it's given as object. So, let's check the unique values in this column

In [19]:
data['episodes'].unique()  # unique values in episodes column

array(['1', '64', '51', '24', '10', '148', '110', '13', '201', '25', '22',
       '75', '4', '26', '12', '27', '43', '74', '37', '2', '11', '99',
       'Unknown', '39', '101', '47', '50', '62', '33', '112', '23', '3',
       '94', '6', '8', '14', '7', '40', '15', '203', '77', '291', '120',
       '102', '96', '38', '79', '175', '103', '70', '153', '45', '5',
       '21', '63', '52', '28', '145', '36', '69', '60', '178', '114',
       '35', '61', '34', '109', '20', '9', '49', '366', '97', '48', '78',
       '358', '155', '104', '113', '54', '167', '161', '42', '142', '31',
       '373', '220', '46', '195', '17', '1787', '73', '147', '127', '16',
       '19', '98', '150', '76', '53', '124', '29', '115', '224', '44',
       '58', '93', '154', '92', '67', '172', '86', '30', '276', '59',
       '72', '330', '41', '105', '128', '137', '56', '55', '65', '243',
       '193', '18', '191', '180', '91', '192', '66', '182', '32', '164',
       '100', '296', '694', '95', '68', '117', '151', '130',

We can see '1' , '64',.....,'Unknown'....all are string type. So, let's convert string into number

In [20]:
data['episodes'] = pd.to_numeric(data['episodes'], errors= 'coerce') # convert string into number

In [21]:
data['episodes'].dtypes

dtype('float64')

In [22]:
data['episodes'].unique()  # unique values in episodes column

array([1.000e+00, 6.400e+01, 5.100e+01, 2.400e+01, 1.000e+01, 1.480e+02,
       1.100e+02, 1.300e+01, 2.010e+02, 2.500e+01, 2.200e+01, 7.500e+01,
       4.000e+00, 2.600e+01, 1.200e+01, 2.700e+01, 4.300e+01, 7.400e+01,
       3.700e+01, 2.000e+00, 1.100e+01, 9.900e+01,       nan, 3.900e+01,
       1.010e+02, 4.700e+01, 5.000e+01, 6.200e+01, 3.300e+01, 1.120e+02,
       2.300e+01, 3.000e+00, 9.400e+01, 6.000e+00, 8.000e+00, 1.400e+01,
       7.000e+00, 4.000e+01, 1.500e+01, 2.030e+02, 7.700e+01, 2.910e+02,
       1.200e+02, 1.020e+02, 9.600e+01, 3.800e+01, 7.900e+01, 1.750e+02,
       1.030e+02, 7.000e+01, 1.530e+02, 4.500e+01, 5.000e+00, 2.100e+01,
       6.300e+01, 5.200e+01, 2.800e+01, 1.450e+02, 3.600e+01, 6.900e+01,
       6.000e+01, 1.780e+02, 1.140e+02, 3.500e+01, 6.100e+01, 3.400e+01,
       1.090e+02, 2.000e+01, 9.000e+00, 4.900e+01, 3.660e+02, 9.700e+01,
       4.800e+01, 7.800e+01, 3.580e+02, 1.550e+02, 1.040e+02, 1.130e+02,
       5.400e+01, 1.670e+02, 1.610e+02, 4.200e+01, 

In [23]:
#data['episodes'] = data['episodes'].astype('int64')  # conversion of float64 into int64
# we get error because nan can't be converted into integer. So, first handle null values

In [24]:
data.isnull().sum()  # 340 null values in episodes column

anime_id      0
name          0
genre         0
type          0
episodes    340
rating        0
members       0
dtype: int64

we can treat null values of column episodes using mean

In [25]:
mean=data['episodes'].mean() #calculate mean of col episodes and save in var. mean

In [26]:
mean

12.382549774134182

In [27]:
data['episodes'].fillna(mean,inplace=True)  #fill missing values of column episodes with mean

In [28]:
data.isnull().sum() # no. of null values in col episodes have become zero now because they are filled with mean value

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [29]:
data['episodes'].dtypes

dtype('float64')

In [30]:
data['episodes'] = data['episodes'].astype('int64')  # conversion of float64 into int64

In [31]:
data['episodes'].dtypes

dtype('int64')

In [32]:
data['episodes'].unique()

array([   1,   64,   51,   24,   10,  148,  110,   13,  201,   25,   22,
         75,    4,   26,   12,   27,   43,   74,   37,    2,   11,   99,
         39,  101,   47,   50,   62,   33,  112,   23,    3,   94,    6,
          8,   14,    7,   40,   15,  203,   77,  291,  120,  102,   96,
         38,   79,  175,  103,   70,  153,   45,    5,   21,   63,   52,
         28,  145,   36,   69,   60,  178,  114,   35,   61,   34,  109,
         20,    9,   49,  366,   97,   48,   78,  358,  155,  104,  113,
         54,  167,  161,   42,  142,   31,  373,  220,   46,  195,   17,
       1787,   73,  147,  127,   16,   19,   98,  150,   76,   53,  124,
         29,  115,  224,   44,   58,   93,  154,   92,   67,  172,   86,
         30,  276,   59,   72,  330,   41,  105,  128,  137,   56,   55,
         65,  243,  193,   18,  191,  180,   91,  192,   66,  182,   32,
        164,  100,  296,  694,   95,   68,  117,  151,  130,   87,  170,
        119,   84,  108,  156,  140,  331,  305,  3

In [33]:
data.dtypes

anime_id      int64
name         object
genre        object
type         object
episodes      int64
rating      float64
members       int64
dtype: object

In [34]:
data.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


# Feature Extraction:

In [35]:
data.shape

(12294, 7)

since, too many entries (12294) in our dataframe, so, filter down to the members who gave rating more than one time

In [36]:
# groupby func groups members and rating to count no. of times each member has given rating

data.groupby('members').count()['rating']  # count no. of rating for each member

members
5          1
11         1
12         1
13         2
15         1
          ..
717796     1
793665     1
893100     1
896229     1
1013917    1
Name: rating, Length: 6706, dtype: int64

In [37]:
x = data.groupby('members').count()['rating'] > 1 # if no. of rating for each member > 1, then True else False

In [38]:
x    

members
5          False
11         False
12         False
13          True
15         False
           ...  
717796     False
793665     False
893100     False
896229     False
1013917    False
Name: rating, Length: 6706, dtype: bool

In [39]:
x[x]  # display only True values of rating in x 

members
13        True
20        True
21        True
22        True
24        True
          ... 
61410     True
70559     True
94820     True
97674     True
184525    True
Name: rating, Length: 1636, dtype: bool

In [40]:
good_members = x[x].index  # display only True values of members in x i.e. these are the members who gave rating more than one time

In [41]:
good_members

Int64Index([    13,     20,     21,     22,     24,     25,     26,     29,
                30,     31,
            ...
             34177,  37792,  44030,  47587,  59815,  61410,  70559,  94820,
             97674, 184525],
           dtype='int64', name='members', length=1636)

Let's represent these members in a dataframe using isin()

In [42]:
data['members'].isin(good_members)

0        False
1        False
2        False
3        False
4        False
         ...  
12289     True
12290     True
12291     True
12292     True
12293     True
Name: members, Length: 12294, dtype: bool

In [43]:
df_good_members = data[data['members'].isin(good_members)] #False replaced by NaN and True replaced by original values in this dataframe

In [44]:
df_good_members

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
58,24415,Kuroko no Basket 3rd Season,"Comedy, School, Shounen, Sports",TV,25,8.62,184525
89,30346,Doukyuusei (Movie),"Romance, School, Shounen Ai, Slice of Life",Movie,1,8.53,28864
108,7655,Major S6,"Comedy, Drama, Sports",TV,25,8.49,24788
137,1365,Detective Conan Movie 06: The Phantom of Baker...,"Adventure, Mystery, Police, Shounen",Movie,1,8.42,28462
140,10937,Mobile Suit Gundam: The Origin,"Action, Mecha, Military, Sci-Fi, Shounen, Space",OVA,6,8.42,15420
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


entries in this dataframe have reduced from 12,294 to 7,224 . Let's further filter down to the genre that received rating atleast 10 times

In [45]:
df_good_members.groupby('genre').count()['rating'] # count no. of rating for each genre

genre
Action                                                     43
Action, Adventure                                          13
Action, Adventure, Cars, Comedy, Sci-Fi, Shounen            1
Action, Adventure, Cars, Mecha, Sci-Fi, Shounen, Sports     1
Action, Adventure, Comedy                                   8
                                                           ..
Super Power, Supernatural, Vampire                          1
Supernatural                                               12
Thriller                                                    1
Vampire                                                     1
Yaoi                                                        2
Name: rating, Length: 1752, dtype: int64

In [46]:
y = df_good_members.groupby('genre').count()['rating'] >=10 # if no. of rating for each genre >=10, True else False

In [47]:
y

genre
Action                                                      True
Action, Adventure                                           True
Action, Adventure, Cars, Comedy, Sci-Fi, Shounen           False
Action, Adventure, Cars, Mecha, Sci-Fi, Shounen, Sports    False
Action, Adventure, Comedy                                  False
                                                           ...  
Super Power, Supernatural, Vampire                         False
Supernatural                                                True
Thriller                                                   False
Vampire                                                    False
Yaoi                                                       False
Name: rating, Length: 1752, dtype: bool

In [48]:
y[y]  # display only True values of rating in y 

genre
Action                                True
Action, Adventure                     True
Action, Adventure, Comedy, Shounen    True
Action, Adventure, Fantasy            True
Action, Adventure, Mecha, Sci-Fi      True
                                      ... 
Sci-Fi, Space                         True
Shounen, Sports                       True
Slice of Life                         True
Sports                                True
Supernatural                          True
Name: rating, Length: 90, dtype: bool

In [49]:
famous_genre = y[y].index # display only True values of genre in y i.e. these are the genre that received rating atleast 10 times

In [50]:
famous_genre

Index(['Action', 'Action, Adventure', 'Action, Adventure, Comedy, Shounen',
       'Action, Adventure, Fantasy', 'Action, Adventure, Mecha, Sci-Fi',
       'Action, Adventure, Sci-Fi', 'Action, Comedy',
       'Action, Comedy, Shounen, Sports',
       'Action, Drama, Mecha, Military, Sci-Fi', 'Action, Fantasy',
       'Action, Mecha', 'Action, Mecha, Sci-Fi',
       'Action, Mecha, Sci-Fi, Shounen', 'Action, Sci-Fi',
       'Action, Shounen, Sports', 'Adventure', 'Adventure, Comedy',
       'Adventure, Comedy, Fantasy', 'Adventure, Comedy, Fantasy, Kids',
       'Adventure, Comedy, Fantasy, Kids, Shounen', 'Adventure, Comedy, Kids',
       'Adventure, Drama', 'Adventure, Fantasy', 'Adventure, Fantasy, Kids',
       'Adventure, Kids', 'Adventure, Mecha, Sci-Fi, Shounen',
       'Adventure, Sci-Fi', 'Comedy', 'Comedy, Ecchi',
       'Comedy, Ecchi, Kids, School, Shounen, Slice of Life',
       'Comedy, Fantasy', 'Comedy, Fantasy, Kids', 'Comedy, Historical',
       'Comedy, Kids', 'Comed

Let's represent these genre in a dataframe using isin()

In [51]:
df_good_members['genre'].isin(famous_genre)

58        True
89       False
108      False
137      False
140      False
         ...  
12289     True
12290     True
12291     True
12292     True
12293     True
Name: genre, Length: 7224, dtype: bool

In [52]:
final_df = df_good_members[df_good_members['genre'].isin(famous_genre)]

In [53]:
final_df #False replaced by NaN and True replaced by original values in this dataframe

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
58,24415,Kuroko no Basket 3rd Season,"Comedy, School, Shounen, Sports",TV,25,8.62,184525
421,32613,Elsword: El Lady,"Action, Fantasy",ONA,12,8.11,3846
678,11237,Hidamari Sketch x SP,"Comedy, School, Slice of Life",Special,2,7.90,7999
725,33446,Huyao Xiao Hongniang: Yue Hong,"Comedy, Romance",ONA,13,7.87,1043
749,22125,Kuroko no Basket: Mou Ikkai Yarimasen ka,"Comedy, School, Shounen, Sports",Special,1,7.86,20397
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


entries in this dataframe further reduced to 4,196. Now, we can create a pivot table for this dataframe

In [54]:
# create a pivot table
# make name as index column, anime_id as header, and rating as values
pivot_table = final_df.pivot_table(index='name', columns= 'anime_id', values = 'rating')

In [55]:
pivot_table

anime_id,115,141,216,286,307,351,363,434,498,545,...,34474,34491,34492,34497,34498,34501,34503,34506,34519,34522
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"&quot;Aesop&quot; no Ohanashi yori: Ushi to Kaeru, Yokubatta Inu",,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
100%,,,,,,,,,,,...,,,,,,,,,,
100-man-nen Chikyuu no Tabi: Bander Book,,,,,,,,,,,...,,,,,,,,,,
11eyes Picture Drama,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
lilac (bombs Jun Togawa),,,,,,,,,,,...,,,,,,,,,,
makemagic,,,,,,,,,,,...,,,,,,,,,,
"on-chan, Yume Power Daibouken!",,,,,,,,,,,...,,,,,,,,,,
Üks Uks,,,,,,,,,,,...,,,,,,,,,,


In [56]:
# fill null values with '0'
pivot_table.fillna(0, inplace= True)

In [57]:
pivot_table.head()  # top 5 rows

anime_id,115,141,216,286,307,351,363,434,498,545,...,34474,34491,34492,34497,34498,34501,34503,34506,34519,34522
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"&quot;Aesop&quot; no Ohanashi yori: Ushi to Kaeru, Yokubatta Inu",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100-man-nen Chikyuu no Tabi: Bander Book,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11eyes Picture Drama,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [58]:
# standardise the pivot table
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler(with_mean=True, with_std=True)
pivot_table_normalized = scaler.fit_transform(pivot_table)

In [59]:
pivot_table_normalized

array([[-0.01544137, -0.01544137, -0.01544137, ..., -0.01544137,
        -0.01544137, -0.01544137],
       [-0.01544137, -0.01544137, -0.01544137, ..., -0.01544137,
        -0.01544137, -0.01544137],
       [-0.01544137, -0.01544137, -0.01544137, ..., -0.01544137,
        -0.01544137, -0.01544137],
       ...,
       [-0.01544137, -0.01544137, -0.01544137, ..., -0.01544137,
        -0.01544137, -0.01544137],
       [-0.01544137, -0.01544137, -0.01544137, ..., -0.01544137,
        -0.01544137, -0.01544137],
       [-0.01544137, -0.01544137, -0.01544137, ..., -0.01544137,
        -0.01544137, -0.01544137]])

In [60]:
#use cosine similarity on the standardised pivot table 

from sklearn.metrics.pairwise import cosine_similarity
similarity_score = cosine_similarity(pivot_table_normalized) # calculate the similarity score for all the genre

In [61]:
similarity_score

array([[ 1.00000000e+00, -2.38379009e-04, -2.38379009e-04, ...,
        -2.38379009e-04, -2.38379009e-04, -2.38379009e-04],
       [-2.38379009e-04,  1.00000000e+00, -2.38379009e-04, ...,
        -2.38379009e-04, -2.38379009e-04, -2.38379009e-04],
       [-2.38379009e-04, -2.38379009e-04,  1.00000000e+00, ...,
        -2.38379009e-04, -2.38379009e-04, -2.38379009e-04],
       ...,
       [-2.38379009e-04, -2.38379009e-04, -2.38379009e-04, ...,
         1.00000000e+00, -2.38379009e-04, -2.38379009e-04],
       [-2.38379009e-04, -2.38379009e-04, -2.38379009e-04, ...,
        -2.38379009e-04,  1.00000000e+00, -2.38379009e-04],
       [-2.38379009e-04, -2.38379009e-04, -2.38379009e-04, ...,
        -2.38379009e-04, -2.38379009e-04,  1.00000000e+00]])

Let's us define a function recommend() using similarity_score. This function recommends top 5 similar Anime name

In [62]:
import numpy as np

In [63]:
def recommend(anime_name): 
    
    # Returns the numerical index for the anime_name
    index = np.where(pivot_table.index==anime_name)[0][0]
    
    # Sorts the similarities for the anime_name in descending order ([1:6] means 5 names)
    similar_anime = sorted(list(enumerate(similarity_score[index])),key=lambda x:x[1], reverse=True)[1:6]
    
    # To return result in list format
    data_1 = []
    
    for index,similarity in similar_anime:
        item = []
        # Get the anime details by index (it selects the top 5 similar anime names excluding the given anime name)
        temp_df = data[data['name'] == pivot_table.index[index]]
        
# Only add the title, genre, and type to the result (retrieves these details of the top 5 similar anime names from the data)
        item.extend(temp_df['name'].values)
        item.extend(temp_df['genre'].values)
        item.extend(temp_df['type'].values)
        
        data_1.append(item)
    return data_1    # return info as a list

when we pass any name from the pivot table as a parameter to the recommend() func, it will return the top 5 similar names of anime

In [64]:
# Model Validating
# Call the recommend method
recommend('11eyes Picture Drama') # retrieves the top 5 similar names of anime  excluding the given name 

[['Kenka Banchou Otome: Girl Beats Boys', 'Hentai', 'TV'],
 ['Idol Kyousei Sousa', 'Hentai', 'OVA'],
 ['Wake Up, Girls! Shin Shou', 'Drama, Music', 'TV'],
 ['Kochinpa! Dainiki', 'Comedy', 'TV'],
 ['Nuki Doki! Tenshi to Akuma no Sakusei Battle - Revolution',
  'Hentai',
  'OVA']]