In [1]:
import requests
from collections import Counter
import pandas as pd
import numpy as np
from sklearn.metrics import DistanceMetric
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
url = 'https://graphql.anilist.co'

In [3]:
def find_data(year):
    query = """
    query ($year: Int, $page: Int, $perPage: Int) {
      Page(page: $page, perPage: $perPage) {
        media(seasonYear: $year, type: ANIME, sort: POPULARITY_DESC) {
          id
          title {
            romaji
            english
          }
          genres
          season
          seasonYear
          averageScore
          episodes
          popularity
        }
      }
    }
    """

    variables = {
        'year': year,
        'page': 1,
        'perPage': 50
    }

    response = requests.post(url, json={'query': query, 'variables': variables})
    data = response.json()
    anime_data = data['data']['Page']['media']

    df = pd.json_normalize(anime_data)

    # Keep useful columns
    df = df[['title.romaji', 'title.english', 'genres', 'season', 'seasonYear', 'popularity','averageScore']]
    df = df.dropna(subset=['genres'])  # Remove entries without genre data

    # Create a simplified title column
    df['title'] = df['title.english'].fillna(df['title.romaji'])
    df = df.dropna(subset=['title']).reset_index(drop=True)

    # Normalize score (optional)
    df['averageScore'] = df['averageScore'].fillna(df['averageScore'].mean())
    df['score_norm'] = (df['averageScore'] - df['averageScore'].min()) / (df['averageScore'].max() - df['averageScore'].min())

    return df


In [4]:
def get_anime_data_range(start_year, end_year):
    all_years = []
    for y in range(start_year, end_year + 1):
        try:
            year_df = find_data(y)
            all_years.append(year_df)
        except Exception as e:
            print(f"Error fetching {y}: {e}")
    combined_df = pd.concat(all_years, ignore_index=True)
    return combined_df

In [23]:
# test
# df = find_data(2025)
# df.head(15)
# anime_df = get_anime_data_range(2018, 2024)
anime_df = get_anime_data_range(2005, 2025)
anime_df.sort_values(by='popularity', ascending=False).head(50)

Unnamed: 0,title.romaji,title.english,genres,season,seasonYear,popularity,averageScore,title,score_norm
400,Shingeki no Kyojin,Attack on Titan,"[Action, Drama, Fantasy, Mystery]",SPRING,2013,912565,85,Attack on Titan,0.930233
700,Kimetsu no Yaiba,Demon Slayer: Kimetsu no Yaiba,"[Action, Adventure, Drama, Fantasy, Supernatural]",SPRING,2019,866889,82,Demon Slayer: Kimetsu no Yaiba,0.78125
50,DEATH NOTE,Death Note,"[Mystery, Psychological, Supernatural, Thriller]",FALL,2006,830712,84,Death Note,0.980769
750,Jujutsu Kaisen,JUJUTSU KAISEN,"[Action, Drama, Supernatural]",FALL,2020,820731,84,JUJUTSU KAISEN,0.88
550,Boku no Hero Academia,My Hero Academia,"[Action, Adventure, Comedy]",SPRING,2016,769627,76,My Hero Academia,0.538462
300,HUNTER×HUNTER (2011),Hunter x Hunter (2011),"[Action, Adventure, Fantasy]",FALL,2011,734970,89,Hunter x Hunter (2011),1.0
500,One Punch Man,One-Punch Man,"[Action, Comedy, Sci-Fi, Supernatural]",FALL,2015,682430,83,One-Punch Man,0.741935
450,Tokyo Ghoul,Tokyo Ghoul,"[Action, Drama, Horror, Mystery, Psychological...",SUMMER,2014,651412,75,Tokyo Ghoul,0.5
600,Shingeki no Kyojin Season 2,Attack on Titan Season 2,"[Action, Drama, Fantasy, Mystery]",SPRING,2017,644272,84,Attack on Titan Season 2,0.848485
200,Hagane no Renkinjutsushi: FULLMETAL ALCHEMIST,Fullmetal Alchemist: Brotherhood,"[Action, Adventure, Drama, Fantasy]",SPRING,2009,634483,90,Fullmetal Alchemist: Brotherhood,1.0


In [6]:
# jaccard similarity
def jaccard_similarity(genres1, genres2):
    set1, set2 = set(genres1), set(genres2)
    intersection = len(set1 & set2)
    union = len(set1 | set2)
    return intersection / union if union > 0 else 0

In [7]:
# get similarity 
def compute_similarity(df, weight_genre=0.8, weight_score=0.2):
    n = len(df)
    sim_matrix = np.zeros((n, n))

    for i in range(n):
        for j in range(n):
            genre_sim = jaccard_similarity(df['genres'][i], df['genres'][j])
            score_sim = 1 - abs(df['score_norm'][i] - df['score_norm'][j])  
            sim_matrix[i, j] = (weight_genre * genre_sim) + (weight_score * score_sim)
    return sim_matrix

In [8]:
# get similiarty
def top_similar(df, sim_matrix, title, n=10):
    if title not in df['title'].values:
        print(f"'{title}' not found in dataset.")
        return

    idx = df.index[df['title'] == title][0]
    similar_indices = sim_matrix[idx].argsort()[::-1][1:n+1]  
    result = df.iloc[similar_indices][['title', 'genres', 'score_norm']]
    return result

In [9]:
anime_df = get_anime_data_range(2005, 2025)
df = anime_df
sim_matrix = compute_similarity(df)

In [19]:
queries = ['JUJUTSU KAISEN', 'A Silent Voice', 'Free! -Iwatobi Swim Club-']

In [18]:
anime = queries[0]
print(anime)
top_similar(df, sim_matrix, anime).sort_values(by='score_norm', ascending=False)

JUJUTSU KAISEN


Unnamed: 0,title,genres,score_norm
1013,Chainsaw Man – The Movie: Reze Arc,"[Action, Drama, Supernatural]",0.966667
900,JUJUTSU KAISEN Season 2,"[Action, Drama, Supernatural]",0.857143
360,Fate/Zero Season 2,"[Action, Drama, Fantasy, Supernatural]",0.827586
39,Rozen Maiden: Dreaming,"[Action, Comedy, Drama, Supernatural]",0.818182
558,Assassination Classroom Second Season,"[Action, Comedy, Drama, Supernatural]",0.807692
850,Chainsaw Man,"[Action, Drama, Horror, Supernatural]",0.777778
824,JoJo's Bizarre Adventure: STONE OCEAN,"[Action, Adventure, Drama, Supernatural]",0.763158
711,Dororo,"[Action, Adventure, Drama, Supernatural]",0.75
305,Fate/Zero,"[Action, Drama, Fantasy, Supernatural]",0.741935
883,JoJo's Bizarre Adventure: STONE OCEAN Part 2,"[Action, Adventure, Drama, Supernatural]",0.740741


In [20]:
anime = queries[1]
print(anime)
top_similar(df, sim_matrix, anime).sort_values(by='score_norm', ascending=False)

A Silent Voice


Unnamed: 0,title,genres,score_norm
656,I Want to Eat Your Pancreas,"[Drama, Romance, Slice of Life]",0.96
93,Honey and Clover II,"[Drama, Romance, Slice of Life]",0.884615
47,Emma: A Victorian Romance,"[Drama, Romance, Slice of Life]",0.833333
773,"Josee, the Tiger and the Fish","[Drama, Romance, Slice of Life]",0.8
687,Bloom Into You,"[Drama, Romance, Slice of Life]",0.72
32,Peach Girl: Super Pop Love Hurricane,"[Drama, Romance, Slice of Life]",0.69697
625,Tsukigakirei,"[Drama, Romance, Slice of Life]",0.666667
77,We Were There,"[Drama, Romance, Slice of Life]",0.653846
167,"Clannad: Another World, Tomoyo Chapter","[Drama, Romance, Slice of Life]",0.645161
103,5 Centimeters per Second,"[Drama, Romance, Slice of Life]",0.611111


In [13]:
anime = queries[2]
print(anime)
top_similar(df, sim_matrix, anime).sort_values(by='score_norm', ascending=False)

Free! -Iwatobi Swim Club-


Unnamed: 0,title,genres,score_norm
668,Grand Blue Dreaming,"[Comedy, Slice of Life, Sports]",0.88
218,Hajime no Ippo: The Fighting! New Challenger,"[Comedy, Drama, Sports]",0.864865
482,SHIROBAKO,"[Comedy, Drama, Slice of Life]",0.85
757,HAIKYU!! TO THE TOP,"[Comedy, Drama, Sports]",0.84
1040,Grand Blue Dreaming Season 2,"[Comedy, Slice of Life, Sports]",0.833333
506,HAIKYU!! 2nd Season,"[Comedy, Drama, Sports]",0.806452
786,HAIKYU!! LAND VS. AIR,"[Comedy, Drama, Sports]",0.64
326,Hanasaku Iroha ~Blossoms for Tomorrow~,"[Comedy, Drama, Slice of Life]",0.612903
565,Yuri!!! on ICE,"[Comedy, Drama, Sports]",0.576923
491,Free! -Eternal Summer-,"[Comedy, Drama, Slice of Life, Sports]",0.45
