In [1]:
>>> from iso639 import languages
>>> from pprint import pprint

>>> len(languages)

7981

In [2]:
aragonese = languages.get(alpha2='an')
aragonese.name

'Aragonese'

In [3]:
aragonese = languages.get(part1='ja')
print(aragonese.part1, aragonese.part3, aragonese.name)
pprint(vars(aragonese))

ja jpn Japanese
{'inverted': 'Japanese',
 'macro': '',
 'name': 'Japanese',
 'names': [],
 'part1': 'ja',
 'part2b': 'jpn',
 'part2t': 'jpn',
 'part3': 'jpn',
 'part5': ''}


In [4]:
def get_locale(lang):
    aragonese = languages.get(part1=lang)
    return aragonese.part3
get_locale('ja')

'jpn'

In [6]:
print(get_locale('de'), get_locale('ru'))

deu rus


In [6]:
aragonese = languages.get(part1='zh')
pprint(vars(aragonese))

{'inverted': 'Chinese',
 'macro': '',
 'name': 'Chinese',
 'names': [],
 'part1': 'zh',
 'part2b': 'chi',
 'part2t': 'zho',
 'part3': 'zho',
 'part5': ''}


In [8]:
aragonese = languages.get(part3='cmn')
pprint(vars(aragonese))

{'inverted': 'Chinese, Mandarin',
 'macro': 'zho',
 'name': 'Mandarin Chinese',
 'names': [],
 'part1': '',
 'part2b': '',
 'part2t': '',
 'part3': 'cmn',
 'part5': ''}


In [13]:
from nltk.corpus import wordnet as wn
import sagas
langs=wn.langs()
print(len(langs), sorted(langs))
rs=[]
excepts=['qcn']
for lang in langs:
    if lang not in excepts:
        loc = languages.get(part3=lang)
        rs.append((loc.part3, loc.part1, loc.macro, loc.name))

sagas.to_df(rs, ['code', 'part1', 'micro', 'name'])

29 ['als', 'arb', 'bul', 'cat', 'cmn', 'dan', 'ell', 'eng', 'eus', 'fas', 'fin', 'fra', 'glg', 'heb', 'hrv', 'ind', 'ita', 'jpn', 'nld', 'nno', 'nob', 'pol', 'por', 'qcn', 'slv', 'spa', 'swe', 'tha', 'zsm']


Unnamed: 0,code,part1,micro,name
0,eng,en,,English
1,als,,sqi,Tosk Albanian
2,arb,,ara,Standard Arabic
3,bul,bg,,Bulgarian
4,cat,ca,,Catalan
5,cmn,,zho,Mandarin Chinese
6,dan,da,,Danish
7,ell,el,,Modern Greek (1453-)
8,eus,eu,,Basque
9,fas,fa,,Persian


In [15]:
annex={'arb':'ar', 'cmn':'zh', 'qcn':'zh-TW'}
mappings={}
for lang in langs:
    if lang in annex:
        mappings[annex[lang]]=lang
    else:
        loc = languages.get(part3=lang)
        if loc.part1!='':
            mappings[loc.part1]=loc.part3
print(len(mappings), mappings)

27 {'en': 'eng', 'ar': 'arb', 'bg': 'bul', 'ca': 'cat', 'zh': 'cmn', 'da': 'dan', 'el': 'ell', 'eu': 'eus', 'fa': 'fas', 'fi': 'fin', 'fr': 'fra', 'gl': 'glg', 'he': 'heb', 'hr': 'hrv', 'id': 'ind', 'it': 'ita', 'ja': 'jpn', 'nl': 'nld', 'nn': 'nno', 'nb': 'nob', 'pl': 'pol', 'pt': 'por', 'zh-TW': 'qcn', 'sl': 'slv', 'es': 'spa', 'sv': 'swe', 'th': 'tha'}


In [16]:
import json_utils
json_utils.write_json('./conf/iso-639.json', mappings)

In [20]:
iso_map=json_utils.read_json_file('./conf/iso-639.json')
iso_map['zh']

'cmn'

In [23]:
rev_map={v:k for k,v in iso_map.items()}
print(len(rev_map), rev_map)

27 {'eng': 'en', 'arb': 'ar', 'bul': 'bg', 'cat': 'ca', 'cmn': 'zh', 'dan': 'da', 'ell': 'el', 'eus': 'eu', 'fas': 'fa', 'fin': 'fi', 'fra': 'fr', 'glg': 'gl', 'heb': 'he', 'hrv': 'hr', 'ind': 'id', 'ita': 'it', 'jpn': 'ja', 'nld': 'nl', 'nno': 'nn', 'nob': 'nb', 'pol': 'pl', 'por': 'pt', 'qcn': 'zh-TW', 'slv': 'sl', 'spa': 'es', 'swe': 'sv', 'tha': 'th'}


In [28]:
try:
    loc = languages.get(part3='qcn')
except KeyError:
    loc=None