In [5]:
import numpy as np
import pandas as pd
import itertools
import collections
from sklearn.metrics.pairwise import cosine_similarity

In [6]:
animeData = pd.read_csv('anime.csv')
animeData.head()

Unnamed: 0,Anime_ID,Name,Genre,Type,Episodes,Rating,Members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [7]:
animeData['Genre'] = animeData['Genre'].fillna('None')
animeData['Genre'] = animeData['Genre'].apply(lambda x: x.split(', '))
genreData = itertools.chain(*animeData['Genre'].values.tolist())
genreCounter = collections.Counter(genreData)
genres = pd.DataFrame.from_dict(genreCounter, orient='index').reset_index().rename(columns={'index':'Genre', 0:'count'})
genres.sort_values('count', ascending=False, inplace=True)
animeData.head()

Unnamed: 0,Anime_ID,Name,Genre,Type,Episodes,Rating,Members
0,32281,Kimi no Na wa.,"[Drama, Romance, School, Supernatural]",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"[Action, Adventure, Drama, Fantasy, Magic, Mil...",TV,64,9.26,793665
2,28977,Gintama°,"[Action, Comedy, Historical, Parody, Samurai, ...",TV,51,9.25,114262
3,9253,Steins;Gate,"[Sci-Fi, Thriller]",TV,24,9.17,673572
4,9969,Gintama&#039;,"[Action, Comedy, Historical, Parody, Samurai, ...",TV,51,9.16,151266


In [3]:
count = 0
genreMap={}
for i in genreCounter.keys():
    genreMap[i] = count
    count += 1


In [4]:
def featureSetExtraction(genre):
    feature = np.zeros(len(genreMap.keys()), dtype=int)
    feature[[genreMap[idx] for idx in genre]] += 1
    return feature

In [5]:
animeData['Genre'] = animeData['Genre'].apply(lambda x: featureSetExtraction(x))
animeData.head()

Unnamed: 0,Anime_ID,Name,Genre,Type,Episodes,Rating,Members
0,32281,Kimi no Na wa.,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",TV,64,9.26,793665
2,28977,Gintama°,"[0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...",TV,51,9.25,114262
3,9253,Steins;Gate,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",TV,24,9.17,673572
4,9969,Gintama&#039;,"[0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...",TV,51,9.16,151266


In [6]:
#test_data = animeData.take([0, 19, 1, 2, 16, 23, 6, 49, 220, 66])
#for row in test_data.iterrows():

 #   print('Similar anime like {}:'.format(row[1]['Name']))
#  search = animeData.drop([row[0]]) # drop current anime
#    search['result'] = 0
    
#   Y = row[1]['Genre'] #Converting in a 2D array 
#    Y = np.array(Y).reshape((1, -1))
#    for i,rowData in search.iterrows():
#        x = rowData['Genre']
#        x = np.array(x).reshape((1,-1))
#        search.set_value(i,'Genre',x)
        
#    search['result'] = search['Genre'].apply(lambda x: cosine_similarity(Y, x))
#    search_result = search.sort_values('result', ascending=False)['Name'].head(25)
#    for res in search_result.values:
#        print('\t{}'.format(res))
#    print()

In [7]:
userData = pd.read_csv('newRating.csv')
userData = pd.merge(userData,animeData, on='Anime_ID')
userData.head()

Unnamed: 0,User_ID,Anime_ID,Rating_x,Name,Genre,Type,Episodes,Rating_y,Members
0,1,20,6,Naruto,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...",TV,220,7.81,683297
1,3,20,8,Naruto,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...",TV,220,7.81,683297
2,5,20,6,Naruto,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...",TV,220,7.81,683297
3,6,20,8,Naruto,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...",TV,220,7.81,683297
4,10,20,5,Naruto,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...",TV,220,7.81,683297


In [8]:
userData.sort_values('User_ID',ascending = True, inplace = True)

In [9]:
userData.drop(['Type','Episodes','Rating_y','Members'],inplace=True,axis = 1)
#userData.to_csv('UserMap.csv')

In [10]:
userData.head()
userData.shape

(7813727, 5)

In [11]:
#For recommending keep only those data where the rating > 6
dropData = userData.loc[(userData['Rating_x'] < 7) == True]
recommendData = userData.drop(dropData.index.tolist(),axis = 0)
recommendData.shape

(6019221, 5)

In [12]:
#Create a dictionary of user and seen movies
userSeenMovies = {}

for i,rowData in userData.iterrows():
    if rowData['User_ID'] not in userSeenMovies:
        userSeenMovies[rowData['User_ID']] = []
    userSeenMovies[rowData['User_ID']].append(rowData['Anime_ID'])

In [13]:
#Similarly create a dictionary of user and favored movies
userFavMovies = {}
for i,rowData in recommendData.iterrows():
    if rowData['User_ID'] not in userFavMovies:
        userFavMovies[rowData['User_ID']] = []
    userFavMovies[rowData['User_ID']].append(rowData['Anime_ID'])

In [14]:
def giveGenreArray(strGenre = []):
    genre = []
    for i in strGenre:
        if (i == '[') or (i == ']') or (i == ' ') or (i == '\r') or (i=='\n'):
            continue
        else:
            genre.append(int(i))
    return genre

In [17]:
Y = animeData.loc[(animeData['Anime_ID'] == userFavMovies[100][0]) == True]
Y = Y['Genre'].tolist()[0]
Y = giveGenreArray(Y)
print Y

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]


In [24]:
def recommendMovies(userId):  
    seenMovies = animeData.loc[animeData['Anime_ID'].isin(userSeenMovies[userId])]
    userFavList = animeData.loc[animeData['Anime_ID'].isin(userFavMovies[userId])]
    notSeenMovies = animeData.drop(seenMovies.index.tolist(), axis = 0)
    notSeenMovies['result'] = 0
    Y = animeData.loc[(animeData['Anime_ID'] == userFavMovies[userId][0]) == True]
    #Get me the Genre Array
    Y = Y['Genre'].tolist()[0]
    Y = giveGenreArray(Y)
    Y = np.array(Y).reshape((1, -1))
    for i,rowData in notSeenMovies.iterrows():
        x = rowData['Genre']
        x = giveGenreArray(x)
        x = np.array(x).reshape((1,-1))
        notSeenMovies.set_value(i,'Genre',x)
    notSeenMovies['result'] = notSeenMovies['Genre'].apply(lambda x: cosine_similarity(Y, x))
    search_result = notSeenMovies.sort_values('result', ascending=False)['Name'].head(25)
    return search_result

In [25]:
recommendedMovies = {}
recommendedMovies[100] = recommendMovies(100)

In [26]:
print recommendedMovies

{100: 5307                                                11eyes
2663                            Supernatural The Animation
3192                                           Zombie-Loan
2177                                Blood-C: The Last Dark
5095                                              Devilman
11396                                      Mahou Shoujo Ai
10935                      Akiba&#039;s Trip The Animation
5395                                         Ookamikakushi
7951                               Idol Fight Suchie-Pai 2
8556                              Fujiko Fujio A no Mumako
2229                                               Gantz:O
4892                                           Blue Seed 2
4727                 Kishin Douji Zenki Gaiden: Anki Kitan
1765                                  Shikabane Hime: Kuro
7743                           Aru Zombie Shoujo no Sainan
3372                                        Hakaba Kitarou
3985                                             N

In [None]:
print 