In [2]:
import json, xmltodict, time
from jikanpy import AioJikan
from pathlib import Path
from pprint import pprint

data = Path('data')

/home/cosmic/.local/share/virtualenvs/myanimestats-ScZBfrK1/lib64/python3.11/site-packages/jikanpy/utils.py
https://api.jikan.moe/v4


In [8]:
# Load the animelist data XML file and convert to dict
with (data / 'animelist.xml').open() as f:
	animelist_data = xmltodict.parse(f.read())['myanimelist']

# Print first entry
pprint(animelist_data['anime'][0])

{'my_comments': None,
 'my_discuss': '1',
 'my_finish_date': '2023-06-28',
 'my_id': '0',
 'my_priority': 'LOW',
 'my_rated': None,
 'my_rewatch_value': None,
 'my_rewatching': '0',
 'my_rewatching_ep': '0',
 'my_score': '10',
 'my_sns': 'default',
 'my_start_date': '0000-00-00',
 'my_status': 'Completed',
 'my_storage': None,
 'my_storage_value': '0.00',
 'my_tags': None,
 'my_times_watched': '0',
 'my_watched_episodes': '11',
 'series_animedb_id': '52034',
 'series_episodes': '11',
 'series_title': '"Oshi no Ko"',
 'series_type': 'TV',
 'update_on_import': '0'}


In [4]:
def clean_anime(anime: dict):
	anime.update(anime['details']['data'])
	del anime['details']

	# Define key map for necessary fields and their corresponding types
	key_map = {
		# My data
		'my_watched_episodes': int,
		'my_start_date': None,
		'my_finish_date': None,
		'my_rated': None,
		'my_score': int,
		'my_storage': None,
		'my_storage_value': None,
		'my_status': None,
		'my_comments': None,
		'my_times_watched': int,
		'my_rewatch_value': None,
		'my_priority': None,
		'my_tags': None,
		'my_rewatching': int,
		'my_rewatching_ep': int,
		'my_discuss': int,
		'my_sns': None,

		# MAL data
		'mal_id': int,
		'url': None,
		'images': None,
		'trailer': None,
		'approved': None,
		'title': None,
		'title_english': None,
		'title_japanese': None,
		'title_synonyms': None,
		'type': None,
		'source': None,
		'episodes': None,
		'status': None,
		'airing': None,
		'aired': None,
		'duration': None,
		'rating': None,
		'score': None,
		'scored_by': None,
		'rank': None,
		'popularity': None,
		'members': None,
		'favorites': None,
		'synopsis': None,
		'background': None,
		'season': None,
		'year': None,
		'broadcast': None,
		'producers': None,
		'licensors': None,
		'studios': None,
		'genres': None,
		'explicit_genres': None,
		'themes': None,
		'demographics': None,
	}

	def clean_fields(data: dict, fields: dict):
		for field in list(data.keys()):  # Iterate over a copy of the keys
			if field not in fields:
				del data[field]
			elif isinstance(data[field], dict) and isinstance(fields[field], dict):
				clean_fields(data[field], fields[field])
			elif fields[field] is not None and data[field] is not None:
				data[field] = fields[field](data[field])

	clean_fields(anime, key_map)

	return anime

In [9]:
async def fetch_anime_details(anime: dict):
	anime_id = int(anime['series_animedb_id'])
	details = await aio_jikan.anime(anime_id)
	anime['details'] = details
	anime = clean_anime(anime)

# Complete the first anime
async with AioJikan() as aio_jikan:
	anime = animelist_data["anime"][0]
	await fetch_anime_details(anime)
	pprint(anime)

{'aired': {'from': '2023-04-12T00:00:00+00:00',
           'prop': {'from': {'day': 12, 'month': 4, 'year': 2023},
                    'to': {'day': 28, 'month': 6, 'year': 2023}},
           'string': 'Apr 12, 2023 to Jun 28, 2023',
           'to': '2023-06-28T00:00:00+00:00'},
 'airing': False,
 'approved': True,
 'background': None,
 'broadcast': {'day': 'Wednesdays',
               'string': 'Wednesdays at 23:00 (JST)',
               'time': '23:00',
               'timezone': 'Asia/Tokyo'},
 'demographics': [{'mal_id': 42,
                   'name': 'Seinen',
                   'type': 'anime',
                   'url': 'https://myanimelist.net/anime/genre/42/Seinen'}],
 'duration': '30 min per ep',
 'episodes': 11,
 'explicit_genres': [],
 'favorites': 23559,
 'genres': [{'mal_id': 8,
             'name': 'Drama',
             'type': 'anime',
             'url': 'https://myanimelist.net/anime/genre/8/Drama'},
            {'mal_id': 37,
             'name': 'Supernatural',
    

In [None]:
# Complete anime data with Jikan API
async with AioJikan() as aio_jikan:
	animes = animelist_data['anime']
	n = len(animes)
	for i, anime in enumerate(animes):
		await fetch_anime_details(anime)
		print(f'{i+1}/{n}')
		time.sleep(1.1)  # Delay to ensure we don't exceed rate limit

# Save the completed data to a JSON file
with (data / 'completed_data.json').open('w') as f:
	json.dump(animelist_data, f)