# To search Danbooru tags by most tagged with franchise:
https://danbooru.donmai.us/related_tag.json?query={$SEARCH_TERM}&category=character&is_deprecated=false&order=count

[Reference](https://danbooru.donmai.us/wiki_pages/help:api)

In [1]:
import json
from urllib.request import urlopen
from urllib.error import HTTPError
from tqdm.auto import tqdm
import time

In [2]:
franchises = [
    "reverse:1999",
    "last_origin",
    "warship_girls_r",
    "to_heart_(series)",
    "darling_in_the_franxx",
    "queen's_blade",
    "monster_girl_encyclopedia",
    "monster_musume_no_iru_nichijou"

]

franchise_names = [
    "Reverse 1999",
    "Last Origin",
    "Warship Girls R",
    "To Heart",
    "Darling in the Franxx",
    "Queens Blade",
    "Monster Girl Encyclopedia",
    "MonMusu"
]

print(len(franchises), len(franchise_names))

8 8


In [3]:
limit = 150
boy_overlap_threshold = 0.35
girl_overlap_threshold = 0.45
fran_overlap_threshold = 0.8

In [4]:
def myurlopen(url, max_wait=300):
    i = 2
    while True:       
        try:
            r = urlopen(url)
            return r
        except HTTPError as e:
            print(e)
            sleep_time = 2**i / 1000
            if sleep_time > max_wait:
                print('ABORT!')
                raise TimeoutError(f'Timed out with max wait time of {max_wait} s')
            print(f'Some HTTP error occurred. Waiting for {sleep_time}...')
            time.sleep(sleep_time)
            i += 1
            continue

def reject_if_above(url, target_tag, overlap_threshold):
    try:
        r = myurlopen(url)
    except:
        return 1
    rt = json.loads(r.read())['related_tags']
    # go through all tags
    for t in rt:

        # see if tag exists
        if t['tag']['name'] == target_tag:
            
            # high overlap? reject
            #print(t['overlap_coefficient'])
            if t['overlap_coefficient'] > overlap_threshold:
                #print('Should Reject')
                return 1
            # otherwise accept
            else:
                #print('Should Accept')
                return 0
    # tag doesn't exist, so accept
    #print('Should Accept')
    return 0

def reject_if_below(url, target_tag, overlap_threshold):
    try:
        r = myurlopen(url)
    except:
        return 1
    rt = json.loads(r.read())['related_tags']
    
    # go through all tags
    for t in rt:

        # see if tag exists
        if t['tag']['name'] == target_tag:
            
            # high overlap? accept
            #print(t['overlap_coefficient'])
            if t['overlap_coefficient'] >= overlap_threshold:
                #print('Should Accept')
                return 0
            # otherwise reject
            else:
                #print('Should Reject')
                return 1
    # tag doesn't exist, so reject
    #print('Should Reject')
    return 1

def upcycle_character(character, qlimit=1000):
    tags = []
    tag_overlap = 0.25

    url = f"https://danbooru.donmai.us/related_tag.json?query={character}&category=general&is_deprecated=false&order=count&limit={qlimit}"
    response = myurlopen(url)
    related_tags = json.loads(response.read())['related_tags']

    for rtag in related_tags:
        if rtag['overlap_coefficient'] <= tag_overlap:
            continue
        tag_dict = rtag['tag']
        if tag_dict['post_count'] < 100:
            continue
        
        if tag_dict['name'].endswith('eyes') or tag_dict['name'].endswith('hair') or ('skin' in tag_dict['name']) or tag_dict['name'].endswith('ears'):
            tags.append( tag_dict['name'] ) 

    tags = set(tags)
    processed = [ x.strip().replace('_', ' ').replace('(', '\(').replace(')', '\)') for x in tags]
    tag_string = ', '.join(processed)
    return tag_string

In [5]:
for franchise, franchise_name in zip(franchises, franchise_names):
    
    print(f"Working on {franchise_name}...")
    characterFile = f'./characters/{franchise_name}.txt'

    final_text = ''
    characters = []

    url = f"https://danbooru.donmai.us/related_tag.json?query={franchise}&category=character&is_deprecated=false&order=count&limit={limit}"
    response = myurlopen(url)
    related_tags = json.loads(response.read())['related_tags']

    for rtag in tqdm(related_tags):
        tag_dict = rtag['tag']
        if tag_dict['post_count'] < 100:
            continue
        tag = tag_dict['name']
        gen_search_url = f"https://danbooru.donmai.us/related_tag.json?query={tag}&category=general&is_deprecated=false&order=count&limit={limit}"
        # search for 1boy
        similarity_result = reject_if_above(gen_search_url, '1boy', boy_overlap_threshold)
        # search for 1girl
        similarity_result += reject_if_below(gen_search_url, '1girl', girl_overlap_threshold)

        fran_search_url = f"https://danbooru.donmai.us/related_tag.json?query={tag}&category=copyright&is_deprecated=false&order=count&limit={limit}"
        # make sure this tag actually matches this franchise
        similarity_result += reject_if_below(fran_search_url, franchise, fran_overlap_threshold)
        
        # only allow if both are true
        if similarity_result == 0:
            upcycled_tags = upcycle_character(tag)

            character = tag.strip().replace('_', ' ').replace('(', '\(').replace(')', '\)')
            the_franchise = franchise.strip().replace('_', ' ').replace('(', '\(').replace(')', '\)')
            
            final_string = f'({character}:1.3), {the_franchise}, {upcycled_tags}\n'

            characters.append(final_string)

    if len(characters) > 0:
        with open(characterFile, 'w') as f:
            f.writelines( characters )
    print(f"Finished working on {franchise_name}.")

Working on Reverse 1999...


  0%|          | 0/150 [00:00<?, ?it/s]

Finished working on Reverse 1999.
Working on Last Origin...


  0%|          | 0/150 [00:00<?, ?it/s]

Finished working on Last Origin.
Working on Warship Girls R...


  0%|          | 0/150 [00:00<?, ?it/s]

Finished working on Warship Girls R.
Working on To Heart...


  0%|          | 0/150 [00:00<?, ?it/s]

Finished working on To Heart.
Working on Darling in the Franxx...


  0%|          | 0/150 [00:00<?, ?it/s]

Finished working on Darling in the Franxx.
Working on Queens Blade...


  0%|          | 0/150 [00:00<?, ?it/s]

Finished working on Queens Blade.
Working on Monster Girl Encyclopedia...


  0%|          | 0/150 [00:00<?, ?it/s]

Finished working on Monster Girl Encyclopedia.
Working on MonMusu...


  0%|          | 0/150 [00:00<?, ?it/s]

Finished working on MonMusu.
