In [1]:
import pandas as pd
import requests
import json
import sqlite3
import numpy as np
import time
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import re as re
import seaborn as sns
from ast import literal_eval

# Anilist GraphQL API call:

The following functions will request data from https://anilist.co. If you would like to explore additional arguments and field reesponses, you can use the sandbox at https://studio.apollographql.com/sandbox/explorer, and enter https://graphql.anilist.co as the sandbox in the top right corner.

In [8]:
#Function to query anilist.co for a page from thier anime database, excluding explicit content and including only anime with the FINISHED status.
#It also creates a useable data frame for the query.
def anilist_query(i) -> pd.DataFrame:
  query= '''
  query Query($type: MediaType, $status: MediaStatus, $page: Int, $isAdult: Boolean) {
    Page(page: $page) {
      pageInfo {
        currentPage
        hasNextPage
      }
      media(type: $type, status: $status, isAdult: $isAdult) {
        id
        idMal
        title {
          english
          romaji
        }
        type
        format
        status
        seasonYear
        source
        genres
        popularity
        tags {
          name
        }
      }
    }
  }
  ''' 
  variables={
    "type" : 'ANIME',
    "status": 'FINISHED',
    "page" : i,
    "isAdult" : False
  }
  response = requests.post(url, json={'query': query,'variables': variables}) # Make the HTTP Api request
  json = response.json()
  df = pd.json_normalize(json['data']) #parse json for data column
  return df

In [9]:
# Function to loop all pages and append to data frame
def anilist_compile(i) -> pd.DataFrame:
    result = pd.DataFrame() #Create empty data frame.
    while True:
        df = anilist_query(i)
        if df['Page.pageInfo.hasNextPage'][0] == np.True_: #test for if there is another page to query
            df_1 = pd.json_normalize(df['Page.media']).T #flaten db for Page.media and transform
            df_2 = pd.json_normalize(df_1[0])  #flaten db for concatinate
            result = pd.concat([result,df_2], axis=0) #concatinate for result
            i += 1
            time.sleep(3) #limit request rate to complie with anilist
        else:
            df_1 = pd.json_normalize(df['Page.media']).T #flaten db for Page.media and transform
            df_2 = pd.json_normalize(df_1[0]) #flaten db for concatinate
            result = pd.concat([result,df_2], axis=0) #concatinate for result
            break
    return result

In [12]:
url = 'https://graphql.anilist.co'

In [None]:
anilist = anilist_compile(1) #compiling from page 1 will ensure all titles are included

In [None]:
anilist.to_csv("complete_anilist.csv",index=False) # Write to csv.

# User Data Collection:
The following funcition will return a users completed anime list from https://anilist.co. You can return your own list by changing the username in mylist.

In [13]:
# Function to query anilist.co for a defined user's anime media list with the completed status, and transform it into a useable data frame.
def my_anilist(user_name) -> pd.DataFrame:
    query = '''
    query Query($userName: String, $type: MediaType, $status: MediaListStatus) {
    MediaListCollection(userName: $userName, type: $type, status: $status) {
        lists {
        entries {
            id
            mediaId
            media {
            title {
                english
                romaji
            }
            format
            }
        }
        name
        }
    }
    }
        '''
    variables={
        "type" : 'ANIME',
        "status": 'COMPLETED',
        "userName" : user_name,
    }
    response = requests.post(url, json={'query': query,'variables': variables}) # Make the HTTP Api request
    json = response.json()
    df = pd.json_normalize(json['data']) #parse json for data column
    df = pd.json_normalize(df['MediaListCollection.lists']) 
    df =pd.json_normalize(df[0])
    df = pd.json_normalize(df['entries']).T #transform data frame to be readable
    df = pd.json_normalize(df[0])
    df = df[(df['media.format'] != "MOVIE") & (df['media.format'] != "SPECIAL") & (df['media.format'] != "MUSIC") & (df['media.format'] != "TV_SHORT")] #remove unnecesary formats
    df['media.title.english'] = df['media.title.english'].fillna(df['media.title.romaji'])  #clean english title
    df = df.drop(['media.title.romaji','id','media.format'],axis=1) #drop romaji title, userid, and the format of the media
    return df

In [16]:
my_list = my_anilist("leolion023") #change username to import your personal list
my_list

Unnamed: 0,mediaId,media.title.english
1,21711,91 Days
2,9776,A-Channel
3,8101,Hen Zemi
4,20785,Absolute Duo
5,21823,ACCA: 13-Territory Inspection Dept.
...,...,...
1402,112153,Pokémon Journeys: The Series
1403,103301,ENDRO!
1404,103638,Case File nº221: Kabukicho
1405,103222,Magical Girl Spec-Ops Asuka


In [None]:
my_list.to_csv('my_anilist.csv', index=False) #Write to csv

# My Anime List data for Mahou Shoujo

In anilist_analysis.ipynb we do a short section comparing user popularity data from our target data set from anilist.co to the equivalent set of data from My Anime List, a site with a larger user base but a worse API experience. To accomplish this we need a short list of My Anime List ids to be able to run their API. Luckily anilist's API will also return the id for MAL. First we will import the anilist data from the csv so we don't need to run anilist_compile every time we make a query to MAL API.

In [4]:
df = pd.read_csv('complete_anilist.csv')

In [5]:
df

Unnamed: 0,id,type,idMal,format,status,seasonYear,source,genres,popularity,tags,title.english,title.romaji
0,1,ANIME,1.0,TV,FINISHED,1998.0,ORIGINAL,"['Action', 'Adventure', 'Drama', 'Sci-Fi']",384442,"[{'name': 'Space'}, {'name': 'Crime'}, {'name'...",Cowboy Bebop,Cowboy Bebop
1,5,ANIME,5.0,MOVIE,FINISHED,2001.0,ORIGINAL,"['Action', 'Drama', 'Mystery', 'Sci-Fi']",71022,"[{'name': 'Terrorism'}, {'name': 'Primarily Ad...",Cowboy Bebop: The Movie - Knockin' on Heaven's...,Cowboy Bebop: Tengoku no Tobira
2,6,ANIME,6.0,TV,FINISHED,1998.0,MANGA,"['Action', 'Adventure', 'Comedy', 'Drama', 'Sc...",138190,"[{'name': 'Guns'}, {'name': 'Fugitive'}, {'nam...",Trigun,TRIGUN
3,7,ANIME,7.0,TV,FINISHED,2002.0,ORIGINAL,"['Action', 'Drama', 'Mystery', 'Supernatural']",18880,"[{'name': 'Conspiracy'}, {'name': 'Police'}, {...",Witch Hunter ROBIN,Witch Hunter ROBIN
4,8,ANIME,8.0,TV,FINISHED,2004.0,MANGA,"['Adventure', 'Fantasy', 'Supernatural']",2646,"[{'name': 'Shounen'}, {'name': 'Spearplay'}, {...",Beet the Vandel Buster,Bouken Ou Beet
...,...,...,...,...,...,...,...,...,...,...,...,...
18505,188076,ANIME,51753.0,TV_SHORT,FINISHED,,ORIGINAL,"['Adventure', 'Fantasy']",5,[],Tayo and Little Wizards,Madeob Bus Tayo
18506,188077,ANIME,51756.0,SPECIAL,FINISHED,,ORIGINAL,"['Adventure', 'Fantasy']",3,[],Luna's Magic Class,Luna-ui Mabeobdogam
18507,188078,ANIME,32585.0,TV_SHORT,FINISHED,,ORIGINAL,['Adventure'],5,[],,Telemonster
18508,188122,ANIME,61259.0,MUSIC,FINISHED,,ORIGINAL,"['Music', 'Slice of Life']",158,"[{'name': 'Full CGI'}, {'name': 'Band'}, {'nam...",,Tirori Mix (2025)


In [6]:
df_2 = df[(df.format != "MOVIE") & (df.format != "SPECIAL") & (df.format != "MUSIC") & (df.format != "TV_SHORT") & (df.format.notnull())] #drop irrelivant format
df_2['title.english'] = df['title.english'].fillna(df['title.romaji']) #fill missing english titles whit romaji
df_2 = df_2.dropna(subset=['seasonYear','idMal']) #drop missing year and mal id data
df_2 = df_2.drop(['title.romaji','status','type','format','source','tags'],axis=1) # drop irrelivant columns
df_2['seasonYear'] = df_2['seasonYear'].astype(int) #change float to int
df_2.rename(columns={'title.english' : 'Title'}, inplace=True) #rename columns to make more readable
df_2.rename(columns={'seasonYear' : 'Year'}, inplace=True) #rename column
df_3 = df_2[(df_2['Year'] > 2004) & (df_2['Year'] <= 2024)] #narrow down search by year
df_4 = df_3.loc[df_3['popularity'] > 5000] #narrow down search to only include titles with relevant popularity
df_4['idMal'] = df_4['idMal'].astype(int) #change float to int
df_4['genres'] = df_4['genres'].apply(literal_eval) #convert string to list
df_4 = df_4.explode('genres') 
df_5 = df_4.loc[df_4['genres'] == 'Mahou Shoujo'] #narrow down to specific genre
df_5

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2['title.english'] = df['title.english'].fillna(df['title.romaji']) #fill missing english titles whit romaji
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_4['idMal'] = df_4['idMal'].astype(int) #change float to int
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_4['genres'] = df_4['genres'

Unnamed: 0,id,idMal,Year,genres,popularity,Title
56,77,77,2005,Mahou Shoujo,13694,Magical Girl Lyrical Nanoha A's
817,933,933,2006,Mahou Shoujo,7750,Magical Witch Punie-Chan
1074,1221,1221,2006,Mahou Shoujo,9010,Powerpuff Girls Z
1408,1642,1642,2005,Mahou Shoujo,10700,Sugar Sugar Rune
1546,1808,1808,2007,Mahou Shoujo,8170,Kamichama Karin
...,...,...,...,...,...,...
16173,154876,53097,2023,Mahou Shoujo,5142,TOKYO MEW MEW NEW Season 2
16401,157883,53716,2023,Mahou Shoujo,5842,Soaring Sky! Precure
16403,157965,53723,2024,Mahou Shoujo,9353,Acro Trip
16606,162780,54722,2024,Mahou Shoujo,54406,Gushing Over Magical Girls


The MAL API reqires you to search each show individualy, you can do it by name, but luckily anilist will allow you to return the MAL id. Using this we are able to loop through the id's for the shows that we want. If you would like to gather data for a larger set of ids be warned that it seems to top out at around 150 shows and no reasonable amount of time between querys seems to help, so prepare for that. You will need to apply for your own CLIENT ID from the My Anime List website. I highly suggest looking through [MAL API Club](https://myanimelist.net/clubs.php?cid=13727) for pointers.

**If you would like to run the MAL API code you will need to provide a file called client_id.py with your own client id from myanimelist.com (see client_id.py.example for the format)**

In [2]:
try:
    from client_id import CLIENT_ID
except ImportError:
    import os
    CLIENT_ID = os.getenv("CLIENT_ID")  # Try to get it from an environment variable
    if not CLIENT_ID:
        raise ValueError("CLIENT_ID not found. Please define it in client_id.py or as an environment variable.")


In [8]:
def compile_mal(url):
    response = requests.get(url, headers = {
    'X-MAL-CLIENT-ID': CLIENT_ID
    })
    response.raise_for_status()
    anime = response.json()
    response.close()
    df = pd.json_normalize([anime])
    return(df)

mal_list= pd.DataFrame()

for i in df_5['idMal']:
    url = 'https://api.myanimelist.net/v2/anime/' + str(i) + '?fields=rank,mean,num_list_users'
    temp = compile_mal(url)
    mal_list = pd.concat([mal_list,temp],axis=0)
    time.sleep(2)

mal_list

Unnamed: 0,id,title,rank,mean,num_list_users,main_picture.medium,main_picture.large
0,77,Mahou Shoujo Lyrical Nanoha A's,732,7.95,67393,https://cdn.myanimelist.net/images/anime/4/676...,https://cdn.myanimelist.net/images/anime/4/676...
0,933,Dai Mahou Touge,4739,6.92,35316,https://cdn.myanimelist.net/images/anime/1349/...,https://cdn.myanimelist.net/images/anime/1349/...
0,1221,Demashita! Powerpuff Girls Z,7159,6.49,37453,https://cdn.myanimelist.net/images/anime/12/32...,https://cdn.myanimelist.net/images/anime/12/32...
0,1642,Sugar Sugar Rune,1975,7.50,45360,https://cdn.myanimelist.net/images/anime/5/747...,https://cdn.myanimelist.net/images/anime/5/747...
0,1808,Kamichama Karin,3455,7.18,54605,https://cdn.myanimelist.net/images/anime/12/76...,https://cdn.myanimelist.net/images/anime/12/76...
...,...,...,...,...,...,...,...
0,53097,Tokyo Mew Mew New ♡ 2nd Season,5812,6.71,15962,https://cdn.myanimelist.net/images/anime/1274/...,https://cdn.myanimelist.net/images/anime/1274/...
0,53716,Hirogaru Sky! Precure,1559,7.61,11524,https://cdn.myanimelist.net/images/anime/1762/...,https://cdn.myanimelist.net/images/anime/1762/...
0,53723,Acro Trip,5749,6.72,21151,https://cdn.myanimelist.net/images/anime/1397/...,https://cdn.myanimelist.net/images/anime/1397/...
0,54722,Mahou Shoujo ni Akogarete,1471,7.63,176793,https://cdn.myanimelist.net/images/anime/1525/...,https://cdn.myanimelist.net/images/anime/1525/...


In [None]:
mal_list.to_csv('MAL_list.csv', index=False) #Write to csv