In [24]:
import json, unidecode, re
from pathlib import Path

data = Path('data')

# Load the json file
with (data / 'animes.json').open() as f:
	animes = json.load(f)

animes[0]

{'mal_id': 52034,
 'url': 'https://myanimelist.net/anime/52034/Oshi_no_Ko',
 'images': {'jpg': {'image_url': 'https://cdn.myanimelist.net/images/anime/1812/134736.jpg',
   'small_image_url': 'https://cdn.myanimelist.net/images/anime/1812/134736t.jpg',
   'large_image_url': 'https://cdn.myanimelist.net/images/anime/1812/134736l.jpg'},
  'webp': {'image_url': 'https://cdn.myanimelist.net/images/anime/1812/134736.webp',
   'small_image_url': 'https://cdn.myanimelist.net/images/anime/1812/134736t.webp',
   'large_image_url': 'https://cdn.myanimelist.net/images/anime/1812/134736l.webp'}},
 'trailer': {'youtube_id': '1yXa8MAmocQ',
  'url': 'https://www.youtube.com/watch?v=1yXa8MAmocQ',
  'embed_url': 'https://www.youtube.com/embed/1yXa8MAmocQ?enablejsapi=1&wmode=opaque&autoplay=1',
  'images': {'image_url': 'https://img.youtube.com/vi/1yXa8MAmocQ/default.jpg',
   'small_image_url': 'https://img.youtube.com/vi/1yXa8MAmocQ/sddefault.jpg',
   'medium_image_url': 'https://img.youtube.com/vi/1yXa

# Aggregate animes into franchises

In [25]:
def sanitize(text: str):
	# Unidecode
	text = unidecode.unidecode(text)
	# Lowercase
	text = text.lower()
	# Remove special characters
	text = re.sub(r"[^\w\s]", "", text)
	# Remove multiple spaces
	text = re.sub(r"\s+", " ", text)
	# Remove leading and trailing spaces
	text = text.strip()
	return text

# Return the name of the franchise, none if not found
def get_franchise(a_title: str, f_title: str, auto: bool):
	words1 = a_title.split(" ")
	words2 = f_title.split(" ")
	min_w_len = min(len(words1), len(words2))

	# If franchise isn't auto, and we find a common string, it is a franchise
	match = re.search(f_title, a_title)
	if not auto and match:
		# print(f"Franchise match (manual)")
		return f_title

	common = []
	for i in range(min_w_len):
		if sanitize(words1[i]) == sanitize(words2[i]):
			common.append(words1[i])
		else:
			break
	franchise = " ".join(common)

	# If more than XX% of the characters of the shortest title are common, it is a franchise
	min_len = min(len(a_title), len(f_title))
	if len(franchise) / min_len > 0.8:
		# print(f"Franchise match (XX%): {len(franchise)}/{min_len}")
		return franchise

	# If the length of the common string is more than X characters, it is a franchise
	if len(franchise) > 15:
		# print(f"Franchise match (X characters)")
		return franchise

	return None

known_franchises = [
	"Evangelion",
	"Code Geass",
]

franchises = []
# Initialize franchises list with known franchises
for franchise in known_franchises:
	franchises.append({
		"animes": [],
		"title": franchise,
		"auto": False,
	})

# Build franchises list
for anime in animes:
	match = None
	index = None

	for i in range(len(franchises)):
		franchise = franchises[i]
		match = get_franchise(anime["title"], franchise["title"], franchise["auto"])
		if match:
			# print(f"Franchise of [{anime['title']}] and [{franchises[i]['title']}] is [{match}]")
			index = i
			break
	
	if not match:
		franchises.append({
			"animes": [anime],
			"title": anime["title"],
			"auto": True,
		})
		continue

	franchises[index]["animes"].append(anime)
	franchises[index]["title"] = match

print("Found " + str(len(franchises)) + " franchises")
for franchise in franchises:
	titles = [anime["title"] for anime in franchise["animes"]]
	print(f"- {franchise['title']} [{', '.join(titles)}]")

Found 60 franchises
- Evangelion [Evangelion: 1.0 You Are (Not) Alone, Evangelion: 2.0 You Can (Not) Advance, Evangelion: 3.0 You Can (Not) Redo, Evangelion: 3.0+1.0 Thrice Upon a Time, Neon Genesis Evangelion]
- Code Geass [Code Geass: Fukkatsu no Lelouch, Code Geass: Hangyaku no Lelouch, Code Geass: Hangyaku no Lelouch R2]
- "Oshi no Ko" ["Oshi no Ko"]
- 86 [86, 86 Part 2]
- Angel Beats! [Angel Beats!]
- Ansatsu Kyoushitsu [Ansatsu Kyoushitsu, Ansatsu Kyoushitsu 2nd Season]
- Berserk: Ougon Jidai-hen [Berserk: Ougon Jidai-hen I - Haou no Tamago, Berserk: Ougon Jidai-hen II - Doldrey Kouryaku, Berserk: Ougon Jidai-hen III - Kourin]
- Blue Lock [Blue Lock]
- Bocchi the Rock! [Bocchi the Rock!]
- Boku no Hero Academia [Boku no Hero Academia, Boku no Hero Academia 2nd Season, Boku no Hero Academia 3rd Season, Boku no Hero Academia 4th Season, Boku no Hero Academia 5th Season, Boku no Hero Academia 6th Season, Boku no Hero Academia the Movie 1: Futari no Hero, Boku no Hero Academia the Mo

In [26]:
# Save the franchises
with (data / 'franchises.json').open('w') as f:
	json.dump(franchises, f)

# Compute franchise stats

In [27]:
def weighted_mean(animes, attr, total_episodes):
    if total_episodes > 0:  # Check to avoid division by zero
        return sum((anime[attr] if anime[attr] is not None else 0) * 
                   (anime["episodes"] if anime["episodes"] is not None else 0) 
                   for anime in animes) / total_episodes
    return None

def union(animes, attr):
    return list(set(name['name'] for anime in animes for name in anime[attr]))

for franchise in franchises:
    animes = franchise['animes']
    franchise['episodes'] = sum(anime["episodes"] if anime["episodes"] is not None else 0 for anime in animes)
    franchise['score'] = weighted_mean(animes, "score", franchise['episodes'])
    franchise['my_score'] = weighted_mean(animes, "my_score", franchise['episodes'])
    franchise['genres'] = union(animes, "genres")
    franchise['themes'] = union(animes, "themes")
    franchise['demographics'] = union(animes, "demographics")

franchises[0]

{'animes': [{'mal_id': 2759,
   'url': 'https://myanimelist.net/anime/2759/Evangelion__10_You_Are_Not_Alone',
   'images': {'jpg': {'image_url': 'https://cdn.myanimelist.net/images/anime/7/74975.jpg',
     'small_image_url': 'https://cdn.myanimelist.net/images/anime/7/74975t.jpg',
     'large_image_url': 'https://cdn.myanimelist.net/images/anime/7/74975l.jpg'},
    'webp': {'image_url': 'https://cdn.myanimelist.net/images/anime/7/74975.webp',
     'small_image_url': 'https://cdn.myanimelist.net/images/anime/7/74975t.webp',
     'large_image_url': 'https://cdn.myanimelist.net/images/anime/7/74975l.webp'}},
   'trailer': {'youtube_id': 'ETNj92NPIh4',
    'url': 'https://www.youtube.com/watch?v=ETNj92NPIh4',
    'embed_url': 'https://www.youtube.com/embed/ETNj92NPIh4?enablejsapi=1&wmode=opaque&autoplay=1',
    'images': {'image_url': 'https://img.youtube.com/vi/ETNj92NPIh4/default.jpg',
     'small_image_url': 'https://img.youtube.com/vi/ETNj92NPIh4/sddefault.jpg',
     'medium_image_url'

# Explore Franchises

In [30]:
import pandas as pd

# Convert the franchises list to a dataframe
df = pd.DataFrame(franchises)

# Show the dataframe
df = df.drop(columns=['animes', 'auto'])
df = df.sort_values(by=['my_score'], ascending=False)
df

Unnamed: 0,title,episodes,score,my_score,genres,themes,demographics
2,"""Oshi no Ko""",11,8.83,10.0,"[Supernatural, Drama]","[Reincarnation, Showbiz]",[Seinen]
54,Tenkuu no Shiro Laputa,1,8.26,10.0,"[Fantasy, Adventure, Award Winning, Romance, S...",[],[]
22,Howl no Ugoku Shiro,1,8.66,10.0,"[Fantasy, Drama, Adventure, Award Winning, Rom...",[],[]
53,Tengoku Daimakyou,13,8.23,10.0,"[Mystery, Adventure, Sci-Fi]",[Survival],[Seinen]
36,Mononoke Hime,1,8.67,10.0,"[Fantasy, Award Winning, Adventure, Action]",[],[]
57,Vinland Saga,49,8.757959,9.979592,"[Adventure, Action, Drama]","[Gore, Historical]",[Seinen]
39,Nichijou:,27,8.42037,9.925926,[Comedy],"[Gag Humor, School]",[Shounen]
48,Shingeki no Kyojin:,89,8.678876,9.539326,"[Award Winning, Action, Drama, Suspense]","[Gore, Military, Survival]",[Shounen]
42,One Punch Man,24,8.0,9.5,"[Action, Comedy]","[Super Power, Parody, Adult Cast]",[Seinen]
18,Fullmetal Alchemist:,115,8.660957,9.113043,"[Fantasy, Drama, Adventure, Award Winning, Act...",[Military],[Shounen]
