In [1]:
import pandas as pd
import regex as re 
import numpy as np
import nltk
nltk.download('stopwords')  # download stopwords if necessary

from nltk.corpus import stopwords


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Dheer\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
from difflib import SequenceMatcher
import ast
from textblob import TextBlob

# Datasets

In [3]:
df1=pd.read_csv('animes.csv')
df2=pd.read_csv('reviews.csv')
df3=pd.read_csv('profiles.csv')

In [4]:
df1=df1.drop_duplicates(subset=['uid'])
df2=df2.drop_duplicates(subset=['uid'])
df3=df3.drop_duplicates(subset=['profile'])

In [5]:
df1.shape

(16216, 12)

# List Of Animes

In [6]:
df1.head(2)

Unnamed: 0,uid,title,synopsis,genre,aired,episodes,members,popularity,ranked,score,img_url,link
0,28891,Haikyuu!! Second Season,Following their participation at the Inter-Hig...,"['Comedy', 'Sports', 'Drama', 'School', 'Shoun...","Oct 4, 2015 to Mar 27, 2016",25.0,489888,141,25.0,8.82,https://cdn.myanimelist.net/images/anime/9/766...,https://myanimelist.net/anime/28891/Haikyuu_Se...
1,23273,Shigatsu wa Kimi no Uso,Music accompanies the path of the human metron...,"['Drama', 'Music', 'Romance', 'School', 'Shoun...","Oct 10, 2014 to Mar 20, 2015",22.0,995473,28,24.0,8.83,https://cdn.myanimelist.net/images/anime/3/671...,https://myanimelist.net/anime/23273/Shigatsu_w...


In [7]:
df2.shape

(130519, 7)

# User Reviews and Scores

In [8]:
df2.head(2)

Unnamed: 0,uid,profile,anime_uid,text,score,scores,link
0,255938,DesolatePsyche,34096,\n \n \n \n ...,8,"{'Overall': '8', 'Story': '8', 'Animation': '8...",https://myanimelist.net/reviews.php?id=255938
1,259117,baekbeans,34599,\n \n \n \n ...,10,"{'Overall': '10', 'Story': '10', 'Animation': ...",https://myanimelist.net/reviews.php?id=259117


In [9]:
df3.shape

(47885, 5)

# User profiles , Favaourite Anime List

In [10]:
df3['favorites_anime']=df3['favorites_anime'].apply( lambda x: ast.literal_eval(x))

In [11]:
df3.head(2)

Unnamed: 0,profile,gender,birthday,favorites_anime,link
0,DesolatePsyche,Male,"Oct 2, 1994","[33352, 25013, 5530, 33674, 1482, 269, 18245, ...",https://myanimelist.net/profile/DesolatePsyche
1,baekbeans,Female,"Nov 10, 2000","[11061, 31964, 853, 20583, 918, 9253, 34599, 3...",https://myanimelist.net/profile/baekbeans


# Name Search

In [12]:
#String matching for anime name  using SequenceMatcher
stopwords_list = stopwords.words('english')
def normalize_string(s):# Lower case the string and clean it.
    s = s.lower()
    s = re.sub(r'[^a-z0-9 ]+', '', s)
    return ' '.join(filter(lambda x: x not in stopwords_list, s.split()))

def score_strings(s1, s2):# Comparing the two strings and generating the score of similarity
    s1_norm = normalize_string(s1)
    s2_norm = normalize_string(s2)
    return SequenceMatcher(None,s1_norm,s2_norm ).ratio()
    


def search_name(s):# Search function using the above two functions to retrieve the dictionary of string matches
    results_dict={}
    for i,idx in df1.iterrows():
        x=score_strings(s,idx['title'])
        results_dict[idx['title']]=x
    return results_dict


    
  
            

In [13]:
# search function
name_result=search_name('naruto')

sorted_dict = sorted(name_result.items(), key=lambda x: x[1], reverse=True)# Sorting the dictionary based on values descending 
    
top_5_pairs = sorted_dict[:5]# Top 5 most similary values 

In [14]:
top_5_pairs

[('Naruto', 1.0),
 ('Naruto x UT', 0.7058823529411765),
 ('Nayuta', 0.6666666666666666),
 ('Haruwo', 0.6666666666666666),
 ('Tenuto', 0.6666666666666666)]

In [15]:
# Top result fetched 
df1[df1['title']==top_5_pairs[0][0]]

Unnamed: 0,uid,title,synopsis,genre,aired,episodes,members,popularity,ranked,score,img_url,link
144,20,Naruto,"Moments prior to Naruto Uzumaki's birth, a hug...","['Action', 'Adventure', 'Comedy', 'Super Power...","Oct 3, 2002 to Feb 8, 2007",220.0,1280914,9,670.0,7.93,https://cdn.myanimelist.net/images/anime/13/17...,https://myanimelist.net/anime/20/Naruto


# genre


In [16]:
# Converting the list in text form to list form
df1['genre']=df1['genre'].apply( lambda x: ast.literal_eval(x))

In [17]:
# Create a set of all the unique genres 
s=set()
for i,idx in df1.iterrows():
    for j in idx['genre']:
        s.add(j)

In [18]:
s

{'Action',
 'Adventure',
 'Cars',
 'Comedy',
 'Dementia',
 'Demons',
 'Drama',
 'Ecchi',
 'Fantasy',
 'Game',
 'Harem',
 'Hentai',
 'Historical',
 'Horror',
 'Josei',
 'Kids',
 'Magic',
 'Martial Arts',
 'Mecha',
 'Military',
 'Music',
 'Mystery',
 'Parody',
 'Police',
 'Psychological',
 'Romance',
 'Samurai',
 'School',
 'Sci-Fi',
 'Seinen',
 'Shoujo',
 'Shoujo Ai',
 'Shounen',
 'Shounen Ai',
 'Slice of Life',
 'Space',
 'Sports',
 'Super Power',
 'Supernatural',
 'Thriller',
 'Vampire',
 'Yaoi',
 'Yuri'}

In [19]:
len(s)# count of all the unique genres

43

# creating genre_index

In [20]:
c=0
g_dict={}
for i in s:
    g_dict[i]=c
    c+=1
g_dict

{'Shoujo': 0,
 'Magic': 1,
 'Yuri': 2,
 'Music': 3,
 'Samurai': 4,
 'Psychological': 5,
 'Slice of Life': 6,
 'Demons': 7,
 'Action': 8,
 'Parody': 9,
 'Space': 10,
 'Shounen Ai': 11,
 'Romance': 12,
 'Police': 13,
 'Ecchi': 14,
 'Horror': 15,
 'Thriller': 16,
 'Sports': 17,
 'Yaoi': 18,
 'Mystery': 19,
 'Harem': 20,
 'Super Power': 21,
 'Fantasy': 22,
 'Vampire': 23,
 'Hentai': 24,
 'Supernatural': 25,
 'Sci-Fi': 26,
 'Josei': 27,
 'Seinen': 28,
 'Comedy': 29,
 'Kids': 30,
 'Game': 31,
 'Dementia': 32,
 'Shoujo Ai': 33,
 'Shounen': 34,
 'Martial Arts': 35,
 'Military': 36,
 'School': 37,
 'Adventure': 38,
 'Drama': 39,
 'Historical': 40,
 'Cars': 41,
 'Mecha': 42}

In [21]:
# Creating genre index for each each row in the dataset
s_l=[]
for i,idx in df1.iterrows():
    c=0
    g_dict={}
    for i in s:
        g_dict[i]=c
        c+=1
    genre=['0']*43
    
    # Creating the genre_index for by using the genre array 
    for j in idx['genre']:
        genre[g_dict[j]]='1'
        
        
    s1=''.join(genre)# joining the list to create the string
    s_l.append(s1)# all the strings generated for the each row
    

In [22]:
# Assigning the genre_index to dataframe
df1['genre_index']=s_l

In [23]:
df1.head()

Unnamed: 0,uid,title,synopsis,genre,aired,episodes,members,popularity,ranked,score,img_url,link,genre_index
0,28891,Haikyuu!! Second Season,Following their participation at the Inter-Hig...,"[Comedy, Sports, Drama, School, Shounen]","Oct 4, 2015 to Mar 27, 2016",25.0,489888,141,25.0,8.82,https://cdn.myanimelist.net/images/anime/9/766...,https://myanimelist.net/anime/28891/Haikyuu_Se...,0000000000000000010000000000010000100101000
1,23273,Shigatsu wa Kimi no Uso,Music accompanies the path of the human metron...,"[Drama, Music, Romance, School, Shounen]","Oct 10, 2014 to Mar 20, 2015",22.0,995473,28,24.0,8.83,https://cdn.myanimelist.net/images/anime/3/671...,https://myanimelist.net/anime/23273/Shigatsu_w...,0001000000001000000000000000000000100101000
2,34599,Made in Abyss,The Abyss—a gaping chasm stretching down into ...,"[Sci-Fi, Adventure, Mystery, Drama, Fantasy]","Jul 7, 2017 to Sep 29, 2017",13.0,581663,98,23.0,8.83,https://cdn.myanimelist.net/images/anime/6/867...,https://myanimelist.net/anime/34599/Made_in_Abyss,0000000000000000000100100010000000000011000
3,5114,Fullmetal Alchemist: Brotherhood,"""In order for something to be obtained, someth...","[Action, Military, Adventure, Comedy, Drama, M...","Apr 5, 2009 to Jul 4, 2010",64.0,1615084,4,1.0,9.23,https://cdn.myanimelist.net/images/anime/1223/...,https://myanimelist.net/anime/5114/Fullmetal_A...,0100000010000000000000100000010000101011000
4,31758,Kizumonogatari III: Reiketsu-hen,After helping revive the legendary vampire Kis...,"[Action, Mystery, Supernatural, Vampire]","Jan 6, 2017",1.0,214621,502,22.0,8.83,https://cdn.myanimelist.net/images/anime/3/815...,https://myanimelist.net/anime/31758/Kizumonoga...,0000000010000000000100010100000000000000000


In [24]:
df1.iloc[0,[3,12]]

genre             [Comedy, Sports, Drama, School, Shounen]
genre_index    0000000000000000010000000000010000100101000
Name: 0, dtype: object

In [25]:
# Taking the input and converting it into same genre_index 
def genre_input(l1):
    
    genre=['0']*43
    c=0
    g_dict={}
    for i in s:
        g_dict[i]=c
        c+=1
        
        
    for j in l1:# given input list of genres, convert it into the genre_index
        genre[g_dict[j]]='1'
    s1=''.join(genre)
    print(s1)# Printing the generated string.
    return s1


In [26]:
# comparing two genre indexes of input genres and all the animes genres in the dataframe
def genre_search(s1,df1):
    genre_result_dict={}
    for i,idx in df1.iterrows():
        log=0
        for j in range(len(s1)):
            if s1[j]=='1' and idx['genre_index'][j]!='1':
                log=1
                break
        if log!=1:
            #print(idx['title'])
            genre_result_dict[idx['title']]=idx['score']
    return genre_result_dict
                
    

In [42]:
r1=genre_input(['Action','Super Power','Drama'])
#print(r1)
result_dict=genre_search(r1,df1)
#print(result_dict)
sorted_dict_genre = sorted(result_dict.items(), key=lambda x: x[1], reverse=True)# sorting in decreasing order of score 
top_5_pairs_genre = sorted_dict_genre[:5]

0000000010000000000001000000000000000001000


In [43]:
top_5_pairs_genre

[('Shingeki no Kyojin Season 3 Part 2', 9.07),
 ('Code Geass: Hangyaku no Lelouch R2', 8.93),
 ('Code Geass: Hangyaku no Lelouch', 8.76),
 ('One Piece', 8.53),
 ('Shingeki no Kyojin Season 3', 8.49)]

# Reviews retrieval

In [29]:
# # giving anime id as input - and dataframe - it retruns list of all the reviews 
# def get_reviews(a_id,df2):
#     gk=df2.groupby('anime_uid')
#     l=df2[df2.index.isin(temp.groups[a_id])]['text'].values
    

In [30]:
# passing the list of reviews for the specific anime id and returning the cleaned reviews and the sentiment scores 
def senti_analysis(l1):
    senti_dict={}
    for i in l1:
        
        if type(i)==str:
            #print('akhsfdk')
            text=i
            text = re.sub("[^a-zA-Z]", " ", text)
            text = text.lower()
            cleaned_text = re.sub('\s+', ' ', text).strip()# cleaning of the text 
            sentiment_score = TextBlob(cleaned_text).sentiment.polarity# calculating  sentiment score for each review of anime
            senti_dict[cleaned_text]=sentiment_score
            #print(sentiment_score,cleaned_text)
        
    return senti_dict

In [31]:
anime_id_groupby=df2.groupby('anime_uid')# Grouping based on animes to fectch the reviews

In [32]:
result_senti=senti_analysis(list(df2[df2.index.isin(anime_id_groupby.groups[int(input('enter anime uid'))])]['text'].values))

enter anime uid5114


In [33]:
#result_senti getting the reviews top 5 based on sentiment score .
sort_senti=sorted(result_senti.items(), key=lambda x: x[1], reverse=True)
sort_senti[:5]
#sort_senti[-5:]

[('more pics overall story animation sound character enjoyment this is one of the greatest anime s i have ever seen in my life after watching this show more then three times it is truly perfect characters and character development is amazing the story line has you thinking is one of the most original stories i have ever seen the sound and openings are great the art style fits the anime perfectly and it brings out the personality of the characters overall this is truly the greatest anime helpful',
  0.5767857142857142),
 ('more pics overall story animation sound character enjoyment the anime is great just a huge masterpiece you may not need a tissue box since it is not that kind of anime but hey it is a very good anime it s about two boys who s mother died and tried to transmute their mother the story is good art is nice and the characters background is very interesting i recommend this to any otaku out there if you are wondering if there is romance than there is romance but takes some 

In [34]:
# based on users search result , we suggest more animes using the profiles favorite animes history.

In [35]:
def get_recommend():
    gk1=df2.groupby('anime_uid')
    l3=[]
    for i,idx in df3[df3['profile'].isin(list(gk1.get_group(int(input('enter anime id ')))['profile'].values))].iterrows():
        for j in idx['favorites_anime']:
            l3.append(j)
    return df1[df1['uid'].isin([int(x) for x in list(pd.Series(l3).value_counts()[:5].index)])]
recommendations=get_recommend()
recommendations

enter anime id 5114


Unnamed: 0,uid,title,synopsis,genre,aired,episodes,members,popularity,ranked,score,img_url,link,genre_index
3,5114,Fullmetal Alchemist: Brotherhood,"""In order for something to be obtained, someth...","[Action, Military, Adventure, Comedy, Drama, M...","Apr 5, 2009 to Jul 4, 2010",64.0,1615084,4,1.0,9.23,https://cdn.myanimelist.net/images/anime/1223/...,https://myanimelist.net/anime/5114/Fullmetal_A...,0100000010000000000000100000010000101011000
740,1535,Death Note,"A shinigami, as a god of death, can kill any p...","[Mystery, Police, Psychological, Supernatural,...","Oct 4, 2006 to Jun 27, 2007",37.0,1871043,1,52.0,8.65,https://cdn.myanimelist.net/images/anime/9/945...,https://myanimelist.net/anime/1535/Death_Note,0000010000000100100100000100000000100000000
764,1,Cowboy Bebop,"In the year 2071, humanity has colonized sever...","[Action, Adventure, Comedy, Drama, Sci-Fi, Space]","Apr 3, 1998 to Apr 24, 1999",26.0,930311,39,26.0,8.81,https://cdn.myanimelist.net/images/anime/4/196...,https://myanimelist.net/anime/1/Cowboy_Bebop,0000000010100000000000000010010000000011000
772,11061,Hunter x Hunter (2011),Hunter x Hunter is set in a world where Hunte...,"[Action, Adventure, Fantasy, Shounen, Super Po...","Oct 2, 2011 to Sep 24, 2014",148.0,1052761,20,3.0,9.11,https://cdn.myanimelist.net/images/anime/11/33...,https://myanimelist.net/anime/11061/Hunter_x_H...,0000000010000000000001100000000000100010000
773,9253,Steins;Gate,The self-proclaimed mad scientist Rintarou Oka...,"[Thriller, Sci-Fi]","Apr 6, 2011 to Sep 14, 2011",24.0,1331710,7,2.0,9.11,https://cdn.myanimelist.net/images/anime/5/731...,https://myanimelist.net/anime/9253/Steins_Gate,0000000000000000100000000010000000000000000


# Below are the step wise results for the above function

In [36]:
df1[df1['uid']==20]

Unnamed: 0,uid,title,synopsis,genre,aired,episodes,members,popularity,ranked,score,img_url,link,genre_index
144,20,Naruto,"Moments prior to Naruto Uzumaki's birth, a hug...","[Action, Adventure, Comedy, Super Power, Marti...","Oct 3, 2002 to Feb 8, 2007",220.0,1280914,9,670.0,7.93,https://cdn.myanimelist.net/images/anime/13/17...,https://myanimelist.net/anime/20/Naruto,0000000010000000000001000000010000110010000


In [37]:
gk1=df2.groupby('anime_uid')
gk1.get_group(20)['profile']

4313        webkid94
4328     LazerzGoPew
4329      jaydevraol
4330         Xinaida
4331    Golden_Age12
            ...     
4791          lahzor
4792         link9us
4793    Ston3_FreeN7
4794     theeggman85
4795        TheLlama
Name: profile, Length: 331, dtype: object

In [38]:
df3[df3['profile'].isin(list(gk1.get_group(20)['profile'].values))]

Unnamed: 0,profile,gender,birthday,favorites_anime,link
15,12sed,,,"[30, 16201, 22135]",https://myanimelist.net/profile/12sed
31,ggultra2764,Male,"May 29, 1985","[512, 7193, 165, 440, 634]",https://myanimelist.net/profile/ggultra2764
50,BabyGirl06301,Female,"Sep 6, 1997","[20, 18507, 18689, 20583, 11771, 31964, 20507,...",https://myanimelist.net/profile/BabyGirl06301
52,ImTheRhetorician,Male,,"[269, 5958, 7724, 1735]",https://myanimelist.net/profile/ImTheRhetorician
118,Dante012,Male,,"[2581, 4181, 2904]",https://myanimelist.net/profile/Dante012
...,...,...,...,...,...
3957,milkman57,Male,"May 16, 1988","[1735, 237, 5114, 98, 9253]",https://myanimelist.net/profile/milkman57
3958,lahzor,Male,,"[1, 44, 392]",https://myanimelist.net/profile/lahzor
3959,Ston3_FreeN7,Male,"Sep 17, 1988","[32, 11061, 440, 30, 820, 3297, 801, 16, 1453,...",https://myanimelist.net/profile/Ston3_FreeN7
3960,link9us,Male,"Jan 6, 1985","[21, 16067, 30, 9253, 160, 437, 2001, 1293, 1,...",https://myanimelist.net/profile/link9us


In [39]:
l3=[]
for i,idx in df3[df3['profile'].isin(list(gk1.get_group(20)['profile'].values))].iterrows():
    for j in idx['favorites_anime']:
        l3.append(j)


In [40]:
[int(x) for x in list(pd.Series(l3).value_counts()[:5].index)]

[20, 1535, 5114, 1735, 21]

In [41]:
df1[df1['uid'].isin([int(x) for x in list(pd.Series(l3).value_counts()[:5].index)])]

Unnamed: 0,uid,title,synopsis,genre,aired,episodes,members,popularity,ranked,score,img_url,link,genre_index
3,5114,Fullmetal Alchemist: Brotherhood,"""In order for something to be obtained, someth...","[Action, Military, Adventure, Comedy, Drama, M...","Apr 5, 2009 to Jul 4, 2010",64.0,1615084,4,1.0,9.23,https://cdn.myanimelist.net/images/anime/1223/...,https://myanimelist.net/anime/5114/Fullmetal_A...,0100000010000000000000100000010000101011000
144,20,Naruto,"Moments prior to Naruto Uzumaki's birth, a hug...","[Action, Adventure, Comedy, Super Power, Marti...","Oct 3, 2002 to Feb 8, 2007",220.0,1280914,9,670.0,7.93,https://cdn.myanimelist.net/images/anime/13/17...,https://myanimelist.net/anime/20/Naruto,0000000010000000000001000000010000110010000
485,1735,Naruto: Shippuuden,It has been two and a half years since Naruto ...,"[Action, Adventure, Comedy, Super Power, Marti...","Feb 15, 2007 to Mar 23, 2017",500.0,1059649,19,318.0,8.2,https://cdn.myanimelist.net/images/anime/5/174...,https://myanimelist.net/anime/1735/Naruto__Shi...,0000000010000000000001000000010000110010000
707,21,One Piece,"Gol D. Roger was known as the ""Pirate King,"" t...","[Action, Adventure, Comedy, Super Power, Drama...","Oct 20, 1999 to ?",,948342,35,86.0,8.53,https://cdn.myanimelist.net/images/anime/6/732...,https://myanimelist.net/anime/21/One_Piece,0000000010000000000001100000010000100011000
740,1535,Death Note,"A shinigami, as a god of death, can kill any p...","[Mystery, Police, Psychological, Supernatural,...","Oct 4, 2006 to Jun 27, 2007",37.0,1871043,1,52.0,8.65,https://cdn.myanimelist.net/images/anime/9/945...,https://myanimelist.net/anime/1535/Death_Note,0000010000000100100100000100000000100000000
