In [1]:
import requests
import pandas as pd
import time

def fetch_anime_data(page):
    url = f"https://api.jikan.moe/v4/anime?page={page}&limit=25"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()['data']
    else:
        return None

def extract_anime_features(anime):
    return {
        'id': anime['mal_id'],
        'title': anime['title'],
        'type': anime.get('type', ''),
        'source': anime.get('source', ''),
        'episodes': anime.get('episodes', None),
        'status': anime.get('status', ''),
        'airing': anime.get('airing', False),
        'duration': anime.get('duration', ''),
        'rating': anime.get('rating', ''),
        'score': anime.get('score', None),
        'scored_by': anime.get('scored_by', None),
        'rank': anime.get('rank', None),
        'popularity': anime.get('popularity', None),
        'members': anime.get('members', None),
        'favorites': anime.get('favorites', None),
        'year': anime.get('year', None),
        'season': anime.get('season', ''),
        'studios': [studio['name'] for studio in anime.get('studios', [])],
        'genres': [genre['name'] for genre in anime.get('genres', [])],
        'themes': [theme['name'] for theme in anime.get('themes', [])],
        'demographics': [demo['name'] for demo in anime.get('demographics', [])],
        'synopsis': anime.get('synopsis', ''),
        'background': anime.get('background', ''),
        'producers': [producer['name'] for producer in anime.get('producers', [])],
        'licensors': [licensor['name'] for licensor in anime.get('licensors', [])],
        'broadcast': anime.get('broadcast', {}).get('string', None),
        'related_anime': [related.get('entry', {}).get('title', '') for related in anime.get('relations', []) if related.get('type') == 'anime'],
        'opening_themes': anime.get('theme', {}).get('openings', []),
        'ending_themes': anime.get('theme', {}).get('endings', [])
    }

anime_data = []
page = 1

while True:
    data = fetch_anime_data(page)
    if data:
        anime_data.extend([extract_anime_features(anime) for anime in data])
        page += 1
        time.sleep(1)  # Respect API rate limits
    else:
        break

df = pd.DataFrame(anime_data)
print(f"Total anime entries fetched: {len(df)}")
df.head()

Total anime entries fetched: 27969


Unnamed: 0,id,title,type,source,episodes,status,airing,duration,rating,score,...,themes,demographics,synopsis,background,producers,licensors,broadcast,related_anime,opening_themes,ending_themes
0,1,Cowboy Bebop,TV,Original,26.0,Finished Airing,False,24 min per ep,R - 17+ (violence & profanity),8.75,...,"[Adult Cast, Space]",[],"Crime is timeless. By the year 2071, humanity ...",When Cowboy Bebop first aired in spring of 199...,"[Bandai Visual, Victor Entertainment, Audio Pl...",[Funimation],Saturdays at 01:00 (JST),[],[],[]
1,5,Cowboy Bebop: Tengoku no Tobira,Movie,Original,1.0,Finished Airing,False,1 hr 55 min,R - 17+ (violence & profanity),8.38,...,"[Adult Cast, Space]",[],"Another day, another bounty—such is the life o...",,"[Sunrise, Bandai Visual]","[Sony Pictures Entertainment, Funimation]",,[],[],[]
2,6,Trigun,TV,Manga,26.0,Finished Airing,False,24 min per ep,PG-13 - Teens 13 or older,8.22,...,[Adult Cast],[Shounen],"Vash the Stampede is the man with a $$60,000,0...",The Japanese release by Victor Entertainment h...,[Victor Entertainment],[Funimation],Thursdays at 01:15 (JST),[],[],[]
3,7,Witch Hunter Robin,TV,Original,26.0,Finished Airing,False,25 min per ep,PG-13 - Teens 13 or older,7.24,...,[Detective],[],"Though hidden away from the general public, Wi...",,"[Bandai Visual, Dentsu, Victor Entertainment]","[Funimation, Bandai Entertainment]",Wednesdays at 01:25 (JST),[],[],[]
4,8,Bouken Ou Beet,TV,Manga,52.0,Finished Airing,False,23 min per ep,PG - Children,6.93,...,[],[Shounen],It is the dark century and the people are suff...,,"[TV Tokyo, Dentsu]",[Illumitoon Entertainment],Thursdays at 18:30 (JST),[],[],[]


In [4]:
df.to_csv('anime_dataset.csv', index=False)