# To search Danbooru tags by most tagged with franchise:
https://danbooru.donmai.us/related_tag.json?query={$SEARCH_TERM}&category=character&is_deprecated=false&order=count

[Reference](https://danbooru.donmai.us/wiki_pages/help:api)

In [1]:
import json
from urllib.request import urlopen
from tqdm.auto import tqdm
import time

In [None]:
franchises = [
    "ace_attorney",
    "azur_lane",
    "blue_archive",
    "dragon_quest",
    "neon_genesis_evangelion",
    "fate_(series)",
    "fire_emblem",
    "genshin_impact",
    "granblue_fantasy",
    "hololive",
    "honkai_(series)",
    "kantai_collection",
    "league_of_legends",
    "naruto_(series)",
    "nier_(series)",
    "overwatch",
    "persona",
    "resident_evil",
    "vocaloid",
    "xenoblade_chronicles_(series)",
    "final_fantasy",
    "mario_(series)",
    "the_legend_of_zelda", 
    "street_fighter",
    "tekken",
    "dead_or_alive",
    "darkstalkers",
    "guilty_gear",
    "soulcalibur",
    "dragon_ball"
]

franchise_names = [
    "Ace Attorney",
    "Azur Lane",
    "Blue Archive",
    "Dragon Quest",
    "Evangelion",
    "Fate",
    "Fire Emblem",
    "Genshin",
    "Granblue",
    "Hololive",
    "Honkai",
    "Kantai Collection",
    "LoL",
    "Naruto",
    "Nier",
    "Overwatch",
    "Persona",
    "Resident Evil",
    "Vocaloid",
    "Xenoblade",
    "Final Fantasy",
    "Nintendo",
    "Zelda",
    "Street Fighter",
    "Tekken",
    "Dead or Alive",
    "Darkstalkers",
    "Guilty Gear",
    "Soulcalibur",
    "Dragonball"
]

print(len(franchises), len(franchise_names))

32 32


In [None]:
limit = 50
boy_overlap_threshold = 0.35
girl_overlap_threshold = 0.45
fran_overlap_threshold = 0.8

In [4]:
def myurlopen(url, max_wait=300):
    i = 2
    while True:       
        try:
            r = urlopen(url)
            return r
        except:
            sleep_time = 2**i / 1000
            if sleep_time > max_wait:
                print('ABORT!')
                raise TimeoutError(f'Timed out with max wait time of {max_wait} s')
            time.sleep(sleep_time)
            i += 1
            continue

def reject_if_above(url, target_tag, overlap_threshold):
    try:
        r = myurlopen(url)
    except:
        return 1
    rt = json.loads(r.read())['related_tags']
    # go through all tags
    for t in rt:

        # see if tag exists
        if t['tag']['name'] == target_tag:
            
            # high overlap? reject
            #print(t['overlap_coefficient'])
            if t['overlap_coefficient'] > overlap_threshold:
                #print('Should Reject')
                return 1
            # otherwise accept
            else:
                #print('Should Accept')
                return 0
    # tag doesn't exist, so accept
    #print('Should Accept')
    return 0

def reject_if_below(url, target_tag, overlap_threshold):
    try:
        r = myurlopen(url)
    except:
        return 1
    rt = json.loads(r.read())['related_tags']
    
    # go through all tags
    for t in rt:

        # see if tag exists
        if t['tag']['name'] == target_tag:
            
            # high overlap? accept
            #print(t['overlap_coefficient'])
            if t['overlap_coefficient'] >= overlap_threshold:
                #print('Should Accept')
                return 0
            # otherwise reject
            else:
                #print('Should Reject')
                return 1
    # tag doesn't exist, so reject
    #print('Should Reject')
    return 1

def upcycle_character(character, qlimit=1000):
    tags = []
    tag_overlap = 0.25

    url = f"https://danbooru.donmai.us/related_tag.json?query={character}&category=general&is_deprecated=false&order=count&limit={qlimit}"
    response = myurlopen(url)
    related_tags = json.loads(response.read())['related_tags']

    for rtag in related_tags:
        if rtag['overlap_coefficient'] <= tag_overlap:
            continue
        tag_dict = rtag['tag']
        if tag_dict['post_count'] < 100:
            continue
        
        if tag_dict['name'].endswith('eyes') or tag_dict['name'].endswith('hair') or ('skin' in tag_dict['name']):
            tags.append( tag_dict['name'] ) 

    tags = set(tags)
    processed = [ x.strip().replace('_', ' ').replace('(', '\(').replace(')', '\)') for x in tags]
    tag_string = ', '.join(processed)
    return tag_string

In [5]:
for franchise, franchise_name in zip(franchises, franchise_names):
    
    print(f"Working on {franchise_name}...")
    characterFile = f'./characters/{franchise_name}.txt'

    final_text = ''
    characters = []

    url = f"https://danbooru.donmai.us/related_tag.json?query={franchise}&category=character&is_deprecated=false&order=count&limit={limit}"
    response = myurlopen(url)
    related_tags = json.loads(response.read())['related_tags']

    for rtag in tqdm(related_tags):
        tag_dict = rtag['tag']
        if tag_dict['post_count'] < 100:
            continue
        tag = tag_dict['name']
        gen_search_url = f"https://danbooru.donmai.us/related_tag.json?query={tag}&category=general&is_deprecated=false&order=count&limit={limit}"
        # search for 1boy
        similarity_result = reject_if_above(gen_search_url, '1boy', boy_overlap_threshold)
        # search for 1girl
        similarity_result += reject_if_below(gen_search_url, '1girl', girl_overlap_threshold)

        fran_search_url = f"https://danbooru.donmai.us/related_tag.json?query={tag}&category=copyright&is_deprecated=false&order=count&limit={limit}"
        # make sure this tag actually matches this franchise
        similarity_result += reject_if_below(fran_search_url, franchise, fran_overlap_threshold)
        
        # only allow if both are true
        if similarity_result == 0:
            upcycled_tags = upcycle_character(tag)

            character = tag.strip().replace('_', ' ').replace('(', '\(').replace(')', '\)')
            the_franchise = franchise.strip().replace('_', ' ').replace('(', '\(').replace(')', '\)')
            
            final_string = f'{character}, {the_franchise}, {upcycled_tags}\n'

            characters.append(final_string)

    with open(characterFile, 'w') as f:
        f.writelines( characters )
    print(f"Finished working on {franchise_name}.")

Working on Ace Attorney...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Ace Attorney.
Working on Azur Lane...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Azur Lane.
Working on Blue Archive...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Blue Archive.
Working on Disney...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Disney.
Working on Dragon Quest...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Dragon Quest.
Working on Evangelion...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Evangelion.
Working on Fate...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Fate.
Working on Fire Emblem...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Fire Emblem.
Working on Genshin...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Genshin.
Working on Granblue...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Granblue.
Working on Hololive...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Hololive.
Working on Honkai...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Honkai.
Working on Kantai Collection...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Kantai Collection.
Working on LoL...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on LoL.
Working on Naruto...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Naruto.
Working on Nier...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Nier.
Working on Overwatch...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Overwatch.
Working on Persona...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Persona.
Working on Resident Evil...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Resident Evil.
Working on Vocaloid...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Vocaloid.
Working on Xenoblade...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Xenoblade.
Working on Final Fantasy...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Final Fantasy.
Working on Nintendo...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Nintendo.
Working on Zelda...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Zelda.
Working on Street Fighter...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Street Fighter.
Working on Tekken...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Tekken.
Working on Dead or Alive...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Dead or Alive.
Working on Darkstalkers...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Darkstalkers.
Working on Guilty Gear...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Guilty Gear.
Working on Mortal Kombat...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Mortal Kombat.
Working on Soulcalibur...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Soulcalibur.
Working on Dragonball...


  0%|          | 0/275 [00:00<?, ?it/s]

Finished working on Dragonball.
