PyAnimeList by Patrick Tjahjadi

Program to retrieve Anime/Donghua data from MyAnimeList, including score, year, genre, etc.

Allows users to filter Anime/Donghua based on these attributes with a sorting feature.

Search for your favourite Anime/Donghua or simply look for recommendation with the filtering and sorting feature!

In [5]:
# Imported Libraries
from jikanpy import Jikan
import pandas as pd
import time
from IPython.display import clear_output
import dill
from rake_nltk import Rake
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
import difflib

In [6]:
# Function to ask for an anime title and recommend anime based on cosine similarity
def anime_recommendations(cosine_sim, number_anime):
    anime_titles = list(anime_df['Title'])
    # Create another list to remove case sensitivity in searching anime
    anime_titles_lower = [title.lower() for title in anime_titles]      
    anime_title = input("Input an anime title for recommendation:\n")
    
    # Recommend an anime if a similar match is found
    try:
        anime_title = difflib.get_close_matches(anime_title.lower(), anime_titles_lower, n = 1, cutoff = 0.5)[0]
        
        # Find the position of the anime title in the list
        idx = anime_titles_lower.index(anime_title)
        
        # Retrieve the top 10 most similar anime based on cosine similarity
        score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False)
        clear_output(wait=True)
        print("Anime similar to "+anime_titles[idx]+" are:")
        top_10_indexes = list(score_series.iloc[1:(number_anime + 1)].index)
        for index in top_10_indexes:
            print(anime_df.Title.iloc[index])
            
    except IndexError:
        print("No results found.")

In [7]:
# Function to recommend anime based on query results in Phase 1
def anime_recommendations_from_query(query_recommendation_list, cosine_sim, number_anime):
    anime_titles = list(anime_df['Title'])
    num_output = round(number_anime / len(query_recommendation_list)) + 1
    top_anime = []
    for recommended_title in query_recommendation_list:
        anime_title = difflib.get_close_matches(recommended_title, anime_titles, n = 1, cutoff = 0.5)[0]
        idx = anime_titles.index(anime_title)
        score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False)
        testlist = list(score_series.iloc[1:num_output].index)
        top_anime.append(testlist)

    print("Based on your recent queries, here are some recommended anime for you:")
    for each_recommended_anime in top_anime:
        for index in each_recommended_anime:
            print(anime_df.Title.iloc[index])

In [137]:
# Function to clean words from punctuation and remove capital case to standardise text tokens
def clean_text(word):
    punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_0123456789~'''
    no_punct = ""
    for char in word:
        if char not in punctuations:
            no_punct = no_punct + char
    return no_punct.lower()

In [None]:
# We can load this later instead of retrieving data again
dill.load_session('my_anime_list.db')


In [15]:
"""
Skip running this block if the session "my_anime_list.db" has been loaded
"""

# Set up data for anime from 2000 to 2020 for retrieval using the Jikan API

jikan = Jikan()

years = [year for year in range (2000, 2021)]
seasons = ['winter', 'spring', 'summer', 'fall']

myanimelist = []

In [17]:
"""
Skip running this block if the session "my_anime_list.db" has been loaded
"""

# Retrieve anime data through Jikan
# Time delay of 7 seconds per year for API rate limiting
for year in years:
    for season in seasons:
        myanimelist.append(jikan.season(year = year, season = season))
    time.sleep(7)
myanimelist.append(jikan.season(year = 2021, season = 'winter'))
myanimelist.append(jikan.season(year = 2021, season = 'spring'))
time.sleep(7)
myanimelist.append(jikan.season(year = 2021, season = 'summer'))

PHASE 1: Store and retrieve anime data in dataframes for search and sort

In [50]:
# Collect all necessary attributes: Title, Score, Members, Genre, Producers, Year, Season and Synopsis
animedata = []
for animeseason in myanimelist:
    for show in animeseason['anime']:
        animedata.append([show['title'], show['score'], show['members'], ', '.join(genre['name'] for genre in show['genres']), 
                        ', '.join(producer['name'] for producer in show['producers']), animeseason["season_year"],
                        animeseason["season_name"], show['synopsis']])
        

In [111]:
# Create a dataframe to store Anime data and remove duplicate entries
anime_df = pd.DataFrame(animedata, columns = ["Title", "Score", "Members", "Genre", "Producers", "Year", "Season", "Synopsis"])
anime_df.drop_duplicates(subset= "Title", keep = 'first', inplace = True)
anime_df.index.name = "ID"

In [236]:
query_recommendation_list = []

# Function to retrieve anime based on filtering and sorting input
def get_my_anime(output_anime_df):
    list_of_queries = []
    list_of_sort = ["None"]
    query_loop = 0
    while (1):
        if (query_loop == 1):
            print("Your queries: \n"+", ".join(list_of_queries))
        method = input("Search anime based on (Title, Score, Members, Producers, Genre, Year, Season or Synopsis)? Otherwise, input 0.\n")
        if (method == "0"):
            break
        elif (method.lower() in ["title", "score", "members", "producers", "genre", "year", "season", "synopsis"]):
            output_anime_df = query_my_anime(output_anime_df, method, list_of_queries)
        query_loop = 1
    
    # Prefill the list of queries if the user searches without any query
    if not list_of_queries:
        list_of_queries = ["None"]
    output_anime_df = sort_my_anime(output_anime_df, list_of_queries, list_of_sort)
    clear_output(wait=True)
    
    # Print queries and sorting method, and ask for how many results to be output
    print("Your queries: \n"+", ".join(list_of_queries))
    print("\nYour sorting method: \n"+list_of_sort[0])
    limit = int(limit_my_anime())
    
    # Print queries, sorting method and the number of output results
    clear_output(wait=True)
    print("Your queries: \n"+", ".join(list_of_queries))
    print("\nYour sorting method: \n"+list_of_sort[0])
    print("\nYour search yields", output_anime_df.head(limit).shape[0], "results")
    # If the keywords column is present, remove it for querying
    try:
        output_anime_df.drop(columns = ['Keywords'], inplace = True)
    except:
        pass
    return output_anime_df.head(limit)

# Function to filter anime based on attributes
def query_my_anime(interim_df, method, list_of_queries):
    # For string-based variables, ask the user for string input and the algorithm will return anime
    # that contains the string input
    if (method.lower() in ["title", "genre", "producers", "season", "synopsis"]):
        query_content = input("Search by anime "+method.capitalize()+":\n")
        interim_df = interim_df.query('{}.str.contains("{}", case = False)'.format(method.capitalize(), query_content),
                                      engine = 'python')
        list_of_queries.append("{}: {}".format(method.capitalize(), query_content))
    
    # For number-based variables, ask the user if they would like to query less than, equal to or greater than
    # a particular number or if they would like to specify a numerical range
    elif (method.lower() in ["score", "members", "year"]):
        operator = input("Find anime "+method.capitalize()+
                         " less than, equal to, greater than, or range (L = Less, E = Equal, G = Greater, R = Range)?\n")
        if (operator.lower() in ["g", "greater", "greater than"]):
            value = input("Greater than which "+method.capitalize()+ "?\n")
            interim_df = interim_df.query('{} > {}'.format(method.capitalize(), value))
            list_of_queries.append("{} > {}".format(method.capitalize(), value))
        elif (operator.lower() in ["e", "equal", "equal to"]):
            value = input("Equal to which "+method.capitalize()+ "?\n")
            interim_df = interim_df.query('{} == {}'.format(method.capitalize(), value))
            list_of_queries.append("{} = {}".format(method.capitalize(), value))
        elif (operator.lower() in ["l", "less", "less than"]):
            value = input("Less than which "+method.capitalize()+ "?\n")
            interim_df = interim_df.query('{} < {}'.format(method.capitalize(), value))
            list_of_queries.append("{} < {}".format(method.capitalize(), value))
        elif (operator.lower() in ["r", "range"]):
            value_low = input("Between which values inclusive? Set lower limit:\n")
            value_high = input("Between which values inclusive? Set upper limit:\n")
            interim_df.query('{} > {} and {} < {}'.format
                             (method.capitalize(), value_low, method.capitalize(), value_high))
            list_of_queries.append("{} <= {} <= {}".format(value_low, method.capitalize(), value_high))
    clear_output(wait=True)        
    return interim_df

# Functions to sort the order of anime to be output
def sort_my_anime(interim_df, list_of_queries, list_of_sort):
    clear_output(wait = True)
    print("Your queries: \n"+", ".join(list_of_queries))
    while (1):
        sort_attribute = input("Any sorting method (Title, Score, Members, Genre, Year, Season or Synopsis)? Otherwise, input 0.\n")
        if (sort_attribute.lower() in ["title", "score", "members", "genre", "year", "season", "synopsis"]):
            while (1):
                sort_method = input("Ascending or Descending (A = Ascending, D = Descending)?\n")
                if (sort_method.lower() in ["a", "ascending"]):
                    interim_df = interim_df.sort_values(sort_attribute.capitalize(), ascending = True)
                    list_of_sort[0] = sort_attribute.capitalize()+": Ascending"
                    return interim_df
                elif (sort_method.lower() in ["d", "descending"]):
                    interim_df = interim_df.sort_values(sort_attribute.capitalize(), ascending = False)
                    list_of_sort[0] = sort_attribute.capitalize()+": Descending"
                    return interim_df
        elif (sort_attribute == "0"):
            return interim_df
        
def limit_my_anime():
    limit = input("How many results would you like?")
    return limit

In [237]:
# Query and search for anime here!
query_df = get_my_anime(anime_df)

# Add the top result of the query to the recommendation list, if it exists
try:
    if query_df["Title"].iloc[0] not in query_recommendation_list:
        query_recommendation_list.append(query_df["Title"].iloc[0])
except IndexError:
    pass
query_df

Your queries: 
Year = 2020, Score > 8

Your sorting method: 
Members: Descending

Your search yields 6 results


Unnamed: 0_level_0,Title,Score,Members,Genre,Producers,Year,Season,Synopsis
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
16838,Jujutsu Kaisen (TV),8.77,1261779,"Action, Demons, Supernatural, School, Shounen",MAPPA,2020,Fall,Idly indulging in baseless paranormal activiti...
16424,Kaguya-sama wa Kokurasetai?: Tensai-tachi no R...,8.68,832615,"Comedy, Psychological, Romance, School, Seinen",A-1 Pictures,2020,Spring,"After a slow but eventful summer vacation, Shu..."
16622,Re:Zero kara Hajimeru Isekai Seikatsu 2nd Season,8.43,765106,"Psychological, Drama, Thriller, Fantasy",White Fox,2020,Summer,A reunion that was supposed to spell the arriv...
17054,Kimetsu no Yaiba Movie: Mugen Ressha-hen,8.75,677362,"Action, Historical, Demons, Supernatural, Shounen",ufotable,2020,Fall,After a string of mysterious disappearances be...
16193,Haikyuu!!: To the Top,8.36,602873,"Comedy, Sports, Drama, School, Shounen",Production I.G,2020,Winter,After their triumphant victory over Shiratoriz...
16839,Haikyuu!!: To the Top 2nd Season,8.55,485548,"Comedy, Sports, Drama, School, Shounen",Production I.G,2020,Fall,"Once called a fallen powerhouse and known as ""..."


In [132]:
for i, j in anime_df.iterrows():
    print(j)

Title                                   Boogiepop wa Warawanai
Score                                                     7.16
Members                                                 101013
Genre        Psychological, Supernatural, Dementia, Mystery...
Producers                                             Madhouse
Year                                                      2000
Season                                                  Winter
Synopsis     Five years ago, a string of grisly murders sho...
Keywords     many forms source stories possibly exist takin...
Name: 0, dtype: object
Title                                          Megami Kouhosei
Score                                                     6.39
Members                                                  19791
Genre                   Action, Mecha, Military, Sci-Fi, Space
Producers                                                Xebec
Year                                                      2000
Season                          

Name: 740, dtype: object
Title                                        Nono-chan Theater
Score                                                      NaN
Members                                                    211
Genre                                                   Comedy
Producers                                          Opera House
Year                                                      2001
Season                                                  Summer
Synopsis        Animated shorts focusing on the Yamada family.
Keywords     animated shorts focusing comedy opera house ya...
Name: 741, dtype: object
Title                                              Bible Black
Score                                                     6.62
Members                                                  57593
Genre                                                   Hentai
Producers                                Milky Animation Label
Year                                                      2001
Seaso

Name: 1465, dtype: object
Title                        Onegai☆Teacher: Himitsu na Futari
Score                                                     7.31
Members                                                  43544
Genre                           Comedy, Ecchi, Romance, Sci-Fi
Producers                                                Daume
Year                                                      2002
Season                                                    Fall
Synopsis     Kei and Mizuho are having some marital difficu...
Keywords     family decides romance source fi daume marital...
Name: 1466, dtype: object
Title                                             Macross Zero
Score                                                     7.54
Members                                                  34265
Genre              Adventure, Mecha, Military, Sci-Fi, Shounen
Producers                                            Satelight
Year                                                      2002
Sea

Name: 2101, dtype: object
Title                                   Transformers Superlink
Score                                                     6.16
Members                                                   4543
Genre                          Sci-Fi, Adventure, Space, Mecha
Producers                                                Actas
Year                                                      2004
Season                                                  Winter
Synopsis     It has been 10 years since the fateful battle ...
Keywords     fateful battle evil source trying new order sc...
Name: 2102, dtype: object
Title                                          Kaiketsu Zorori
Score                                                     7.06
Members                                                   2664
Genre                                        Adventure, Comedy
Producers                                              Ajia-Do
Year                                                      2004
Sea

Name: 2802, dtype: object
Title                                        Tsubasa Chronicle
Score                                                     7.54
Members                                                 202564
Genre        Action, Adventure, Fantasy, Magic, Romance, Su...
Producers                                            Bee Train
Year                                                      2005
Season                                                  Spring
Synopsis     During an excavation at the mysterious ruins i...
Keywords     disappear memory many feathers clow country wi...
Name: 2803, dtype: object
Title                                            Trinity Blood
Score                                                     7.31
Members                                                 163229
Genre                            Action, Supernatural, Vampire
Producers                                                Gonzo
Year                                                      2005
Sea

Name: 3471, dtype: object
Title              IGPX: Immortal Grand Prix (2005) 2nd Season
Score                                                     7.28
Members                                                   9803
Genre                           Mecha, Sci-Fi, Shounen, Sports
Producers                                       Production I.G
Year                                                      2006
Season                                                  Spring
Synopsis     Team Satomi has just been deemed as the winner...
Keywords     river amy fi deemed number  ig sure opponents ...
Name: 3472, dtype: object
Title                                              Yume Tsukai
Score                                                     6.43
Members                                                   7940
Genre                     Drama, Fantasy, Magic, Slice of Life
Producers                                             Madhouse
Year                                                      2006
Sea

Name: 4165, dtype: object
Title                                           Romeo x Juliet
Score                                                     7.63
Members                                                 161930
Genre                      Drama, Fantasy, Historical, Romance
Producers                                                Gonzo
Year                                                      2007
Season                                                  Spring
Synopsis     On the floating continent of Neo Verona, the M...
Keywords     first sight seizes control vigilante called fo...
Name: 4166, dtype: object
Title                                                Bokura no
Score                                                     7.65
Members                                                 159261
Genre              Sci-Fi, Psychological, Drama, Mecha, Seinen
Producers                                                Gonzo
Year                                                      2007
Sea

Name: 4865, dtype: object
Title        Sayonara Zetsubou Sensei Jo: Zetsubou Shoujo S...
Score                                                     7.44
Members                                                  15157
Genre                                   Comedy, Parody, School
Producers                                                Shaft
Year                                                      2008
Season                                                  Winter
Synopsis     A special summary episode of the first season ...
Keywords     school shaft special summary episode seven com...
Name: 4866, dtype: object
Title                       Ookiku Furikabutte: Kihon no Kihon
Score                                                     7.26
Members                                                  11842
Genre                                    Sports, Drama, Seinen
Producers                                         A-1 Pictures
Year                                                      2008
Sea

Name: 5513, dtype: object
Title                               Afro Samurai: Resurrection
Score                                                     7.35
Members                                                  86277
Genre                               Action, Adventure, Samurai
Producers                                                Gonzo
Year                                                      2009
Season                                                  Winter
Synopsis     After obtaining the Number One headband, which...
Keywords     samurai action fights written mal rewrite peac...
Name: 5514, dtype: object
Title            Kara no Kyoukai Remix: Gate of Seventh Heaven
Score                                                     7.59
Members                                                  40301
Genre          Action, Mystery, Romance, Super Power, Thriller
Producers                                             ufotable
Year                                                      2009
Sea

Name: 6112, dtype: object
Title                       Peeping Life: Gekijou Original-ban
Score                                                     5.44
Members                                                    393
Genre                                    Comedy, Slice of Life
Producers                                                     
Year                                                      2009
Season                                                    Fall
Synopsis     The Peeping Life comedy anime series spawned f...
Keywords     replaced five shorts comedy running joy japane...
Name: 6113, dtype: object
Title                                                 Audition
Score                                                      NaN
Members                                                    279
Genre                                                    Music
Producers                                                     
Year                                                      2009
Sea

Name: 6698, dtype: object
Title                                                    Oroka
Score                                                      NaN
Members                                                    152
Genre                                                    Drama
Producers                                                     
Year                                                      2010
Season                                                  Summer
Synopsis     It is decided that you are not needed on this ...
Keywords     blunt words take care buddha strong mother put...
Name: 6699, dtype: object
Title                 Young Alive!: iPS Saibou Ga Hiraku Mirai
Score                                                      NaN
Members                                                     64
Genre                                            Slice of Life
Producers                            Dynamo Pictures, Madhouse
Year                                                      2010
Sea

Name: 7282, dtype: object
Title                                    JK to Inkou Kyoushi 4
Score                                                     6.63
Members                                                   8064
Genre                                                   Hentai
Producers                                                 PoRO
Year                                                      2011
Season                                                  Spring
Synopsis     Takashi is a teacher. One day, two new student...
Keywords     friendly quiet satsuki days later turns egoist...
Name: 7283, dtype: object
Title                                              Rinkan Club
Score                                                     6.59
Members                                                   7077
Genre                                                   Hentai
Producers                                                Seven
Year                                                      2011
Sea

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




Title                                           Secret Journey
Score                                                     6.23
Members                                                   7161
Genre                    Hentai, Comedy, Supernatural, Fantasy
Producers                                                     
Year                                                      2016
Season                                                  Winter
Synopsis     Genjo Sanzo is on a journey, he has to travel ...
Keywords     source comedy sex buddha happens supernatural ...
Name: 11971, dtype: object
Title                   Shinsei: Futanari Idol - Dekatama-kei!
Score                                                     6.61
Members                                                   7138
Genre                                                   Hentai
Producers                                      Studio 9 Maiami
Year                                                      2016
Season                     

Name: 12529, dtype: object
Title                      Mirai ni Mukete: Bousai wo Kangaeru
Score                                                      NaN
Members                                                    253
Genre                                                    Drama
Producers                                                     
Year                                                      2016
Season                                                  Summer
Synopsis     The Tohoku Regional Development Association is...
Keywords     proposed floodgate emphasize sparing fudai dam...
Name: 12530, dtype: object
Title              Taka no Tsume 8: Yoshida-kun no Batten File
Score                                                      NaN
Members                                                    249
Genre                     Sci-Fi, Comedy, Parody, Supernatural
Producers                                                  DLE
Year                                                      2016
S

Name: 13134, dtype: object
Title                             Ao Oni The Animation (Movie)
Score                                                     4.16
Members                                                   6344
Genre                                          Mystery, Horror
Producers                                          Studio Deen
Year                                                      2017
Season                                                  Winter
Synopsis     An animated film series based on the Ao Oni ga...
Keywords     horror mystery animated film series based feat...
Name: 13135, dtype: object
Title                                        Koisuru Shirokuma
Score                                                     6.59
Members                                                   5884
Genre                                Comedy, Josei, Shounen Ai
Producers                                            Gathering
Year                                                      2017
S

Name: 13726, dtype: object
Title                  Mahou Shoujo Lyrical Nanoha: Reflection
Score                                                     7.45
Members                                                  11670
Genre                             Action, Drama, Magic, Sci-Fi
Producers                                  Seven Arcs Pictures
Year                                                      2017
Season                                                  Summer
Synopsis     A pair of researchers stays behind on their dy...
Keywords     kirie source pair fate dream hayate wishes arr...
Name: 13727, dtype: object
Title        Free! Movie 2: Timeless Medley - Yakusoku: Cha...
Score                                                     6.74
Members                                                   4506
Genre                                                   Comedy
Producers                                      Kyoto Animation
Year                                                      2017
S

Name: 14293, dtype: object
Title                            Bungou Stray Dogs: Dead Apple
Score                                                     7.88
Members                                                 167818
Genre        Action, Comedy, Mystery, Super Power, Supernat...
Producers                                                Bones
Year                                                      2018
Season                                                  Winter
Synopsis     A large scale catastrophe is occurring across ...
Keywords     involve order comedy apparently save written m...
Name: 14294, dtype: object
Title              Code Geass: Hangyaku no Lelouch II - Handou
Score                                                     7.66
Members                                                  56323
Genre        Action, Mecha, Military, School, Sci-Fi, Super...
Producers                                              Sunrise
Year                                                      2018
S

Name: 14864, dtype: object
Title                            Queen's Blade: Unlimited
Score                                                6.16
Members                                              9087
Genre                   Action, Adventure, Ecchi, Fantasy
Producers                                                
Year                                                 2018
Season                                             Summer
Synopsis                               (No synopsis yet.)
Keywords     action fantasy ecchi synopsis yet  adventure
Name: 14865, dtype: object
Title                                 Terra Formars: Earth-hen
Score                                                     6.32
Members                                                   8781
Genre             Action, Sci-Fi, Space, Horror, Drama, Seinen
Producers                                           LIDENFILMS
Year                                                      2018
Season                                        

Name: 15431, dtype: object
Title        Fate/stay night Movie: Heaven's Feel - II. Los...
Score                                                     8.57
Members                                                 251661
Genre                     Action, Fantasy, Magic, Supernatural
Producers                                             ufotable
Year                                                      2019
Season                                                  Winter
Synopsis     The Fifth Holy Grail War continues, and the en...
Keywords     lost butterfly continues aspiring higher stake...
Name: 15432, dtype: object
Title                          Code Geass: Fukkatsu no Lelouch
Score                                                     7.94
Members                                                 240366
Genre        Action, Military, Sci-Fi, Super Power, Drama, ...
Producers                                              Sunrise
Year                                                      2019
S

Name: 15971, dtype: object
Title                                    Hataage! Kemono Michi
Score                                                     6.65
Members                                                 150634
Genre                                 Comedy, Fantasy, Shounen
Producers                                                 ENGI
Year                                                      2019
Season                                                    Fall
Synopsis     Professional wrestler Genzou Shibata sports th...
Keywords     wide professional wrestler pursues sake shoune...
Name: 15972, dtype: object
Title                                            Ahiru no Sora
Score                                                     7.31
Members                                                 148360
Genre                   Comedy, Sports, Drama, School, Shounen
Producers                                             Diomedéa
Year                                                      2019
S

Name: 16570, dtype: object
Title                  Dixia Cheng Yu Yongshi: Nizhuan Zhi Lun
Score                                                      NaN
Members                                                   1003
Genre                       Action, Adventure, Comedy, Fantasy
Producers                                           LIDENFILMS
Year                                                      2020
Season                                                  Spring
Synopsis     Sequel to Arad: Suming Zhi Men.\r\n\r\nThe Jap...
Keywords     july  action  suming zhi men japanese broadcas...
Name: 16571, dtype: object
Title                                       Shouxi Yu Ling Shi
Score                                                     6.68
Members                                                    710
Genre                                          Action, Fantasy
Producers                                                Soyep
Year                                                      2020
S

Name: 17129, dtype: object
Title                                       WIXOSS Diva(A)Live
Score                                                     5.64
Members                                                   7915
Genre                                      Game, Psychological
Producers                                            J.C.Staff
Year                                                      2021
Season                                                  Winter
Synopsis     The story moves the "Wixoss" card game to the ...
Keywords     fans source c idol popular compete continues b...
Name: 17130, dtype: object
Title                                   Show by Rock!! Stars!!
Score                                                     7.03
Members                                                   7851
Genre                                            Music, Comedy
Producers                                        Kinema Citrus
Year                                                      2021
S

PHASE 2: Use natural language processing to determine anime similarity for recommendation

In [228]:
"""
Skip running this block if the session "my_anime_list.db" has been loaded
"""

# Initializing a keywords column for natural language processing
anime_df['Keywords'] = ""

count = 0
# Iterate through each anime and get their keywords
for index, row in anime_df.iterrows():
    # Input relevant keywords such as synopsis, genre, title and producer
    keyword_order = [row['Synopsis'], row['Genre'], row['Title'], row['Producers']]
    keywords = " ".join(keyword_order)

    # Use rake to discard English stopwords
    r = Rake()

    # Extracting the keywords by passing the text
    r.extract_keywords_from_text(keywords)

    # Get the dictionary with keywords as keys and scores as values
    # Score = Degree(word) / Frequency(word)
    key_words_dict_scores = r.get_word_degrees()
    
    
    # Remove punctuations from all keywords
    wordlist = list(key_words_dict_scores.keys())
    clean_wordlist = []
    for index in range(0, len(wordlist)):
        wordlist[index] = clean_text(wordlist[index])
        if wordlist[index] != "":
            clean_wordlist.append(wordlist[index])

    # Assign the key words to the keywords column
    anime_df['Keywords'].iloc[count] = " ".join(clean_wordlist)
    count+= 1

In [229]:
"""
Skip running this block if the session "my_anime_list.db" has been loaded
"""

# Calculate frequency of keywords and generate the count matrix 
count = CountVectorizer()
count_matrix = count.fit_transform(anime_df['Keywords'])

# Generate the cosine similarity matrix
cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [224]:
# Store the Python data into byte streams for faster future processing
dill.dump_session('my_anime_list.db')

In [232]:
# Run the recommender here, and set the number of anime to be recommended in the second parameter
anime_recommendations(cosine_sim, 10)

Anime similar to Girls & Panzer are:
Girls & Panzer: Kore ga Hontou no Anzio-sen Desu!
Girls & Panzer Movie
Keijo!!!!!!!!
Ro-Kyu-Bu! SS
Major S4
High School DxD
Hajime no Ippo: Champion Road
Girls & Panzer: Saishuushou Part 1
Girls & Panzer: Dai 63-kai Senshadou Zenkoku Koukousei Taikai Recap
Guilty Crown


PHASE 3: Provide personalised anime recommendations to users through the results of their queries

In [None]:
# Run the recommender here, and set the number of anime to be recommended in the third parameter
anime_recommendations_from_query(query_recommendation_list, cosine_sim, 20)