In [4]:
import requests
from collections import Counter
import pandas as pd
import numpy as np
from sklearn.metrics import DistanceMetric
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
url = 'https://graphql.anilist.co'

In [6]:
def find_data(year):
    query = """
    query ($year: Int, $page: Int, $perPage: Int) {
      Page(page: $page, perPage: $perPage) {
        media(seasonYear: $year, type: ANIME, sort: POPULARITY_DESC) {
          id
          title {
            romaji
            english
          }
          genres
          season
          seasonYear
          averageScore
          episodes
          popularity
        }
      }
    }
    """

    variables = {
        'year': year,
        'page': 1,
        'perPage': 50
    }

    response = requests.post(url, json={'query': query, 'variables': variables})
    data = response.json()
    anime_data = data['data']['Page']['media']

    df = pd.json_normalize(anime_data)

    # Keep useful columns
    df = df[['title.romaji', 'title.english', 'genres', 'season', 'seasonYear', 'popularity','averageScore']]
    df = df.dropna(subset=['genres'])  # Remove entries without genre data

    # Create a simplified title column
    df['title'] = df['title.english'].fillna(df['title.romaji'])
    df = df.dropna(subset=['title']).reset_index(drop=True)

    # Normalize score (optional)
    df['averageScore'] = df['averageScore'].fillna(df['averageScore'].mean())
    df['score_norm'] = (df['averageScore'] - df['averageScore'].min()) / (df['averageScore'].max() - df['averageScore'].min())

    return df


In [7]:
def get_anime_data_range(start_year, end_year):
    all_years = []
    for y in range(start_year, end_year + 1):
        try:
            year_df = find_data(y)
            all_years.append(year_df)
        except Exception as e:
            print(f"Error fetching {y}: {e}")
    combined_df = pd.concat(all_years, ignore_index=True)
    return combined_df

In [None]:
# test
# df = find_data(2025)
# df.head(15)
# anime_df = get_anime_data_range(2018, 2024)
# anime_df = get_anime_data_range(2005, 2025)
# anime_df.sort_values(by='popularity', ascending=False).head(50)

Unnamed: 0,title.romaji,title.english,genres,season,seasonYear,popularity,averageScore,title,score_norm
400,Shingeki no Kyojin,Attack on Titan,"[Action, Drama, Fantasy, Mystery]",SPRING,2013,912336,85,Attack on Titan,0.930233
700,Kimetsu no Yaiba,Demon Slayer: Kimetsu no Yaiba,"[Action, Adventure, Drama, Fantasy, Supernatural]",SPRING,2019,866659,82,Demon Slayer: Kimetsu no Yaiba,0.78125
50,DEATH NOTE,Death Note,"[Mystery, Psychological, Supernatural, Thriller]",FALL,2006,830515,84,Death Note,0.980769
750,Jujutsu Kaisen,JUJUTSU KAISEN,"[Action, Drama, Supernatural]",FALL,2020,820501,84,JUJUTSU KAISEN,0.88
550,Boku no Hero Academia,My Hero Academia,"[Action, Adventure, Comedy]",SPRING,2016,769451,76,My Hero Academia,0.538462
300,HUNTER×HUNTER (2011),Hunter x Hunter (2011),"[Action, Adventure, Fantasy]",FALL,2011,734799,89,Hunter x Hunter (2011),1.0
500,One Punch Man,One-Punch Man,"[Action, Comedy, Sci-Fi, Supernatural]",FALL,2015,682230,83,One-Punch Man,0.741935
450,Tokyo Ghoul,Tokyo Ghoul,"[Action, Drama, Horror, Mystery, Psychological...",SUMMER,2014,651244,75,Tokyo Ghoul,0.5
600,Shingeki no Kyojin Season 2,Attack on Titan Season 2,"[Action, Drama, Fantasy, Mystery]",SPRING,2017,644109,84,Attack on Titan Season 2,0.848485
200,Hagane no Renkinjutsushi: FULLMETAL ALCHEMIST,Fullmetal Alchemist: Brotherhood,"[Action, Adventure, Drama, Fantasy]",SPRING,2009,634329,90,Fullmetal Alchemist: Brotherhood,1.0


In [9]:
# jaccard similarity
def jaccard_similarity(genres1, genres2):
    set1, set2 = set(genres1), set(genres2)
    intersection = len(set1 & set2)
    union = len(set1 | set2)
    return intersection / union if union > 0 else 0

In [10]:
# get similarity 
def compute_similarity(df, weight_genre=0.8, weight_score=0.2):
    n = len(df)
    sim_matrix = np.zeros((n, n))

    for i in range(n):
        for j in range(n):
            genre_sim = jaccard_similarity(df['genres'][i], df['genres'][j])
            score_sim = 1 - abs(df['score_norm'][i] - df['score_norm'][j])  
            sim_matrix[i, j] = (weight_genre * genre_sim) + (weight_score * score_sim)
    return sim_matrix

In [11]:
# get similiarty
def top_similar(df, sim_matrix, title, n=10):
    if title not in df['title'].values:
        print(f"'{title}' not found in dataset.")
        return

    idx = df.index[df['title'] == title][0]
    similar_indices = sim_matrix[idx].argsort()[::-1][1:n+1]  
    result = df.iloc[similar_indices][['title', 'genres', 'score_norm']]
    return result

In [12]:
anime_df = get_anime_data_range(2005, 2025)
df = anime_df.sort_values(by='popularity', ascending=False)
sim_matrix = compute_similarity(df)

Error fetching 2023: 'NoneType' object is not subscriptable
Error fetching 2024: 'NoneType' object is not subscriptable
Error fetching 2025: 'NoneType' object is not subscriptable


In [13]:
queries = ['Demon Slayer: Kimetsu no Yaiba', 'Golden Time', 'Free! -Iwatobi Swim Club-']

In [14]:
anime = queries[0]
print(anime)
top_similar(df, sim_matrix, anime).sort_values(by='score_norm', ascending=False)

Demon Slayer: Kimetsu no Yaiba


Unnamed: 0,title,genres,score_norm
677,A Place Further Than the Universe,"[Adventure, Comedy, Drama]",0.96
71,Kenichi: The Mightiest Disciple,"[Action, Comedy]",0.846154
17,Strawberry Marshmallow,"[Comedy, Slice of Life]",0.833333
651,My Hero Academia Season 3,"[Action, Adventure, Comedy, Drama]",0.76
69,Air Gear,"[Action, Comedy, Ecchi, Sports]",0.711538
459,JoJo's Bizarre Adventure: Stardust Crusaders,"[Action, Adventure, Supernatural]",0.7
886,The Quintessential Quintuplets Movie,"[Comedy, Drama, Romance, Slice of Life]",0.592593
239,Tenchi Muyo! War on Geminar,"[Action, Comedy, Ecchi, Fantasy, Mecha]",0.540541
355,"Love, Chunibyo & Other Delusions","[Comedy, Drama, Romance, Slice of Life]",0.517241
149,Zombie Loan,"[Action, Horror, Supernatural]",0.361111


In [15]:
anime = queries[1]
print(anime)
top_similar(df, sim_matrix, anime).sort_values(by='score_norm', ascending=False)

Golden Time


Unnamed: 0,title,genres,score_norm
863,BOCCHI THE ROCK!,"[Comedy, Music, Slice of Life]",0.925926
734,Fate/stay night [Heaven's Feel] II. lost butte...,"[Action, Fantasy, Psychological, Romance, Supe...",0.84375
703,Kaguya-sama: Love is War,"[Comedy, Psychological, Romance, Slice of Life]",0.8125
457,Parasyte -the maxim-,"[Action, Drama, Horror, Psychological, Sci-Fi]",0.8
277,Black Lagoon: Roberta's Blood Trail,"[Action, Adventure, Drama, Thriller]",0.741935
106,Lucky☆Star,"[Comedy, Slice of Life]",0.722222
404,Beyond the Boundary,"[Action, Drama, Fantasy, Slice of Life, Supern...",0.674419
442,"Love, Chunibyo & Other Delusions: Glimmering.....","[Comedy, Romance, Slice of Life]",0.651163
326,Hanasaku Iroha ~Blossoms for Tomorrow~,"[Comedy, Drama, Slice of Life]",0.612903
201,Fairy Tail,"[Action, Adventure, Comedy, Fantasy]",0.486486


In [16]:
anime = queries[2]
print(anime)
top_similar(df, sim_matrix, anime).sort_values(by='score_norm', ascending=False)

Free! -Iwatobi Swim Club-


Unnamed: 0,title,genres,score_norm
480,Ping Pong the Animation,"[Drama, Psychological, Sports]",1.0
624,Land of the Lustrous,"[Action, Drama, Fantasy, Mystery, Psychological]",0.818182
723,given,"[Drama, Music, Romance, Slice of Life]",0.78125
210,Redline,"[Action, Romance, Sci-Fi, Sports]",0.756757
209,Eden of the East,"[Drama, Mystery, Psychological, Romance, Sci-F...",0.567568
228,Canaan,"[Action, Sci-Fi, Supernatural]",0.405405
483,World Trigger,"[Action, Sci-Fi]",0.35
270,Demon King Daimao,"[Action, Comedy, Ecchi, Fantasy]",0.225806
299,Cat Planet Cuties,"[Comedy, Ecchi, Romance, Sci-Fi]",0.193548
449,Diabolik Lovers,"[Drama, Horror, Romance, Supernatural]",0.0
