In [3]:
import json, xmltodict, time, copy
from tqdm import tqdm
from jikanpy import AioJikan
from pathlib import Path
from pprint import pprint

data = Path('data')

In [4]:
# Load the animelist data XML file and convert to dict
xml_data = (data / 'animelist.xml').read_text()
animelist_data = xmltodict.parse(xml_data)['myanimelist']

# Print first entry
pprint(animelist_data['anime'][0])

{'my_comments': None,
 'my_discuss': '1',
 'my_finish_date': '2023-06-28',
 'my_id': '0',
 'my_priority': 'LOW',
 'my_rated': None,
 'my_rewatch_value': None,
 'my_rewatching': '0',
 'my_rewatching_ep': '0',
 'my_score': '10',
 'my_sns': 'default',
 'my_start_date': '0000-00-00',
 'my_status': 'Completed',
 'my_storage': None,
 'my_storage_value': '0.00',
 'my_tags': None,
 'my_times_watched': '0',
 'my_watched_episodes': '11',
 'series_animedb_id': '52034',
 'series_episodes': '11',
 'series_title': '"Oshi no Ko"',
 'series_type': 'TV',
 'update_on_import': '0'}


In [5]:
# MAL data
anime_key_map = {
	'my_watched_episodes': int,
	'my_start_date': None,
	'my_finish_date': None,
	'my_rated': None,
	'my_score': int,
	'my_storage': None,
	'my_storage_value': None,
	'my_status': None,
	'my_comments': None,
	'my_times_watched': int,
	'my_rewatch_value': None,
	'my_priority': None,
	'my_tags': None,
	'my_rewatching': int,
	'my_rewatching_ep': int,
	'my_discuss': int,
	'my_sns': None,
}

# Jikan API data
details_key_map = {
	'mal_id': int,
	'url': None,
	'images': None,
	'trailer': None,
	'approved': None,
	'title': None,
	'title_english': None,
	'title_japanese': None,
	'title_synonyms': None,
	'type': None,
	'source': None,
	'episodes': None,
	'status': None,
	'airing': None,
	'aired': None,
	'duration': None,
	'rating': None,
	'score': None,
	'scored_by': None,
	'rank': None,
	'popularity': None,
	'members': None,
	'favorites': None,
	'synopsis': None,
	'background': None,
	'season': None,
	'year': None,
	'broadcast': None,
	'producers': None,
	'licensors': None,
	'studios': None,
	'genres': None,
	'explicit_genres': None,
	'themes': None,
	'demographics': None,
}

def clean_fields(data: dict, fields: dict):
	for field in list(data.keys()):  # Iterate over a copy of the keys
		if field not in fields:
			del data[field]
		elif isinstance(data[field], dict) and isinstance(fields[field], dict):
			clean_fields(data[field], fields[field])
		elif fields[field] is not None and data[field] is not None:
			data[field] = fields[field](data[field])

def complete_anime(anime: dict, details: dict):
	# Define key map for necessary fields and their corresponding types
	details_copy = copy.deepcopy(details)
	clean_fields(details_copy, details_key_map)
	anime_copy = copy.deepcopy(anime)
	clean_fields(anime_copy, anime_key_map)

	# Create complete anime object
	complete_anime = {**details_copy, **anime_copy}

	return complete_anime

In [6]:
async def fetch_and_complete_anime(anime: dict):
	anime_id = int(anime['series_animedb_id'])
	# print(f"Fetching details for {anime['series_title']} ({anime_id})")
	details = await aio_jikan.anime(anime_id)
	details = details["data"]
	return complete_anime(anime, details)

# Complete the first anime and print the result
async with AioJikan() as aio_jikan:
	result = await fetch_and_complete_anime(animelist_data["anime"][0])
pprint(result)

Fetching details for "Oshi no Ko" (52034)
{'aired': {'from': '2023-04-12T00:00:00+00:00',
           'prop': {'from': {'day': 12, 'month': 4, 'year': 2023},
                    'to': {'day': 28, 'month': 6, 'year': 2023}},
           'string': 'Apr 12, 2023 to Jun 28, 2023',
           'to': '2023-06-28T00:00:00+00:00'},
 'airing': False,
 'approved': True,
 'background': None,
 'broadcast': {'day': 'Wednesdays',
               'string': 'Wednesdays at 23:00 (JST)',
               'time': '23:00',
               'timezone': 'Asia/Tokyo'},
 'demographics': [{'mal_id': 42,
                   'name': 'Seinen',
                   'type': 'anime',
                   'url': 'https://myanimelist.net/anime/genre/42/Seinen'}],
 'duration': '30 min per ep',
 'episodes': 11,
 'explicit_genres': [],
 'favorites': 23559,
 'genres': [{'mal_id': 8,
             'name': 'Drama',
             'type': 'anime',
             'url': 'https://myanimelist.net/anime/genre/8/Drama'},
            {'mal_id': 37,

In [8]:
# Complete anime data with Jikan API
completed = []
async with AioJikan() as aio_jikan:
	for i, anime in tqdm(list(enumerate(animelist_data['anime']))):
		result = await fetch_and_complete_anime(anime)
		completed.append(result)
		time.sleep(1.1)  # Delay to ensure we don't exceed rate limit

# Save the completed data to a JSON file
with (data / 'data.json').open('w') as f:
	json.dump(completed, f)

  0%|          | 0/207 [00:00<?, ?it/s]

Fetching details for "Oshi no Ko" (52034)


  0%|          | 1/207 [00:01<04:32,  1.32s/it]

Fetching details for "Oshi no Ko" Season 2 (55791)


  1%|          | 2/207 [00:02<04:12,  1.23s/it]

Fetching details for 3-gatsu no Lion (31646)


  1%|▏         | 3/207 [00:03<04:08,  1.22s/it]

Fetching details for 86 (41457)





CancelledError: 