In [19]:
import glob

In [20]:
iso_to_locale = {
    'afr': 'af-ZA', 'amh': 'am-ET', 'ara': 'ar-SA', 'aze': 'az-AZ', 'ben': 'bn-BD', 'cat': 'ca-ES', 'zho': 'zh-CN', 'zho': 'zh-TW', 'dan': 'da-DK', 'deu': 'de-DE', 'ell': 'el-GR', 'eng': 'en-US', 'spa': 'es-ES',
    'fas': 'fa-IR', 'fin': 'fi-FI', 'fra': 'fr-FR', 'heb': 'he-IL', 'hun': 'hu-HU', 'hye': 'hy-AM', 'ind': 'id-ID', 'isl': 'is-IS', 'ita': 'it-IT', 'jpn': 'ja-JP', 'jav': 'jv-ID', 'kat': 'ka-GE', 'khm': 'km-KH',
    'kor': 'ko-KR', 'lav': 'lv-LV', 'mon': 'mn-MN', 'msa': 'ms-MY', 'mya': 'my-MM', 'nor': 'nb-NO', 'nld': 'nl-NL', 'pol': 'pl-PL', 'por': 'pt-PT', 'ron': 'ro-RO', 'rus': 'ru-RU', 'slv': 'sl-SL', 'sqi': 'sq-AL',
    'swe': 'sv-SE', 'swa': 'sw-KE', 'hin': 'hi-IN', 'kan': 'kn-IN', 'mal': 'ml-IN', 'tam': 'ta-IN', 'tel': 'te-IN', 'tha': 'th-TH', 'tgl': 'tl-PH', 'tur': 'tr-TR', 'urd': 'ur-PK', 'vie': 'vi-VN', 'cym': 'cy-GB',
}

In [21]:
import json

with open("ppl_vectors/ppl_vectors.json") as fp:
    ppl = json.load(fp)

In [22]:
available_keys = [k for k in ppl.keys() if k in iso_to_locale]
available_locale = [iso_to_locale[k] for k in available_keys]

print(available_keys)
print(available_locale)

['ind', 'tam', 'tha', 'tgl', 'eng', 'khm', 'vie', 'mya', 'jav']
['id-ID', 'ta-IN', 'th-TH', 'tl-PH', 'en-US', 'km-KH', 'vi-VN', 'my-MM', 'jv-ID']


In [23]:
import torch

for src_path in glob.glob("*/*.json"):
    with open(src_path) as fp:
        data = json.load(fp)
    new_data = {}
    is_break = False
    for k, v in data.items():
        if k in available_keys:
            if '--' in v:
                print(f'`--` found in {k}', src_path)
                is_break = True
                break
            else:
                new_data[iso_to_locale[k]] = torch.tensor(v, dtype=torch.float64)
    if not is_break:
        if len(set(new_data.keys()).difference(available_locale)) == 0:
            with open(src_path.replace('.json','.pt'), 'wb') as fp:
                torch.save(new_data, fp)

`--` found in ind uriel_ppl_vectors/inventory_phoible_gm_std_scaled.json
`--` found in ind uriel_ppl_vectors/syntax_sswl_minmax_scaled.json
`--` found in eng uriel_ppl_vectors/learned_minmax_scaled.json
`--` found in ind uriel_ppl_vectors/inventory_phoible_spa_std_scaled.json
`--` found in ind uriel_ppl_vectors/phonology_wals_minmax_scaled.json
`--` found in ind uriel_ppl_vectors/inventory_phoible_upsid_minmax_scaled.json
`--` found in ind uriel_ppl_vectors/phonology_wals_std_scaled.json
`--` found in ind uriel_ppl_vectors/phonology_ethnologue_minmax_scaled.json
`--` found in ind uriel_ppl_vectors/syntax_ethnologue_std_scaled.json
`--` found in ind uriel_ppl_vectors/syntax_wals_minmax_scaled.json
`--` found in ind uriel_ppl_vectors/inventory_phoible_spa_minmax_scaled.json
`--` found in ind uriel_ppl_vectors/inventory_phoible_aa_minmax_scaled.json
`--` found in ind uriel_ppl_vectors/phonology_ethnologue_std_scaled.json
`--` found in ind uriel_ppl_vectors/inventory_ethnologue_std_scaled.

In [24]:
import os
import shutil

for foldername in os.listdir():
    if not os.path.isdir(foldername):
        continue
    os.makedirs(f"../vectors/{foldername.replace('_vectors', '')}", exist_ok=True)
    for filename in os.listdir(foldername):
        if not filename.endswith(".pt"):
            continue
        src_path = os.path.join(foldername, filename)
        dst_path = os.path.join(f"../vectors/{foldername.replace('_vectors', '')}", filename)
        shutil.move(src_path, dst_path)