In [1]:
import pandas
import functools
import json
import numpy
import os
from bs4 import BeautifulSoup

In [2]:
import requests

In [3]:
version_codes = pandas.read_csv('versions_with_langcodes.csv')

In [4]:
version_codes

Unnamed: 0,version_id,language,version_name,wikidata_entity
0,1,eng,american standard version,http://www.wikidata.org/entity/Q1860
1,2,eng,amplified bible,http://www.wikidata.org/entity/Q1860
2,3,eng,amplified bible classic edition,http://www.wikidata.org/entity/Q1860
3,4,eng,catholic public domain version,http://www.wikidata.org/entity/Q1860
4,5,eng,christian standard bible,http://www.wikidata.org/entity/Q1860
...,...,...,...,...
2157,2353,zho_tw,%E8%95%AD%E9%90%B5%E7%AC%9B%E6%96%B0%E8%AD%AF%...,
2158,2354,zho_tw,%E8%95%AD%E9%9D%9C%E5%B1%B1%E6%96%B0%E7%B6%93%...,
2159,2355,zho_tw,%E9%99%B8%E4%BA%A8%E7%90%86%E9%84%AD%E5%A3%BD%...,
2160,2357,kor,,http://www.wikidata.org/entity/Q9176


In [5]:
version_codes.language.nunique()

1504

In [6]:
version_codes.wikidata_entity.nunique()

1500

In [7]:
version_codes.wikidata_entity.iloc[0]

'http://www.wikidata.org/entity/Q1860'

In [8]:
def get_entity_from_url(url):
    if type(url) == type(''):
        return url.split('/')[-1]
    else:
        return numpy.nan
    
version_codes['entity'] = version_codes.wikidata_entity.map(get_entity_from_url)
version_codes

Unnamed: 0,version_id,language,version_name,wikidata_entity,entity
0,1,eng,american standard version,http://www.wikidata.org/entity/Q1860,Q1860
1,2,eng,amplified bible,http://www.wikidata.org/entity/Q1860,Q1860
2,3,eng,amplified bible classic edition,http://www.wikidata.org/entity/Q1860,Q1860
3,4,eng,catholic public domain version,http://www.wikidata.org/entity/Q1860,Q1860
4,5,eng,christian standard bible,http://www.wikidata.org/entity/Q1860,Q1860
...,...,...,...,...,...
2157,2353,zho_tw,%E8%95%AD%E9%90%B5%E7%AC%9B%E6%96%B0%E8%AD%AF%...,,
2158,2354,zho_tw,%E8%95%AD%E9%9D%9C%E5%B1%B1%E6%96%B0%E7%B6%93%...,,
2159,2355,zho_tw,%E9%99%B8%E4%BA%A8%E7%90%86%E9%84%AD%E5%A3%BD%...,,
2160,2357,kor,,http://www.wikidata.org/entity/Q9176,Q9176


In [9]:
#@functools.lru_cache(2)
def get_url_content(entity):
    local_cache = f'.cache/{entity}.json'
    if os.path.exists(local_cache):
        with open(local_cache) as f:
            return json.load(f)
    url = 'http://www.wikidata.org/entity/' + entity + '.json'
    print(f"Fetching {url}")
    r = requests.get(url)
    answer = r.json()
    os.makedirs('.cache', exist_ok=True)
    with open(local_cache, 'w') as f:
        json.dump(answer, f, indent=4)
    return answer

In [10]:
@functools.lru_cache(5000)
def get_label(entity):
    if type(entity) != type(''):
        return numpy.nan
    obj = get_url_content(entity)
    this_entity = obj['entities'][entity]
    labels = this_entity['labels']
    if 'en' in labels:
        return labels['en']['value']
    raise IndexError(entity)

In [11]:
@functools.lru_cache(5000)
def get_property_label(property):
    """Any better way of doing this?"""
    url = f'http://www.wikidata.org/wiki/Property:{property}' 
    r = requests.get(url)
    soup = BeautifulSoup(r.text)
    title = soup.find('span', class_='wikibase-title-label')
    return title.string

In [12]:
version_codes['language_name_in_english'] = version_codes.entity.map(get_label)

In [13]:
@functools.lru_cache(100000)
def get_claim_list(entity, claim):
    if type(entity) != type(''):
        return numpy.nan
    obj = get_url_content(entity)
    claims = obj['entities'][entity]['claims']
    if claim in claims:
        return [x['mainsnak'].get('datavalue') for x in claims[claim]]
    else:
        return []

In [14]:
def instance_of(entity):
    if type(entity) != type(''):
        return numpy.nan
    raw_content = get_claim_list(entity, 'P31')
    answer = []
    for v in raw_content:
        name = get_label(v['value']['id'])
        answer.append(name)
    return answer

In [15]:
version_codes['instance_of'] = version_codes.entity.map(instance_of)

In [16]:
def country(entity):
    if type(entity) != type(''):
        return numpy.nan
    raw_content = get_claim_list(entity, 'P17')
    answer = []
    for v in raw_content:
        name = get_label(v['value']['id'])
        answer.append(name)
    return answer

In [17]:
version_codes['country'] = version_codes.entity.map(country)

In [18]:
def claims(entity):
    if type(entity) != type(''):
        return []
    obj = get_url_content(entity)
    claims = obj['entities'][entity]['claims']
    return list(claims.keys())

In [19]:
known_claims = set()
for entity in version_codes.entity:
    known_claims.update(claims(entity))
known_claims

{'P10',
 'P1014',
 'P1018',
 'P1036',
 'P1051',
 'P1098',
 'P1151',
 'P1190',
 'P1225',
 'P1232',
 'P1245',
 'P1249',
 'P1251',
 'P1252',
 'P1256',
 'P126',
 'P127',
 'P1282',
 'P1296',
 'P131',
 'P1343',
 'P135',
 'P1365',
 'P1366',
 'P1368',
 'P1376',
 'P138',
 'P1394',
 'P1396',
 'P1417',
 'P1424',
 'P144',
 'P1448',
 'P1466',
 'P1482',
 'P1535',
 'P155',
 'P1552',
 'P156',
 'P158',
 'P1613',
 'P1617',
 'P1627',
 'P163',
 'P1687',
 'P17',
 'P170',
 'P1705',
 'P1711',
 'P172',
 'P1798',
 'P18',
 'P1807',
 'P1813',
 'P1842',
 'P1846',
 'P186',
 'P1889',
 'P190',
 'P1999',
 'P2161',
 'P2163',
 'P218',
 'P2184',
 'P219',
 'P2192',
 'P220',
 'P221',
 'P2263',
 'P227',
 'P2283',
 'P2341',
 'P2347',
 'P2355',
 'P242',
 'P244',
 'P2572',
 'P2579',
 'P2581',
 'P2587',
 'P2588',
 'P2590',
 'P2596',
 'P2671',
 'P268',
 'P269',
 'P276',
 'P278',
 'P279',
 'P282',
 'P2888',
 'P2910',
 'P2919',
 'P2924',
 'P2959',
 'P2989',
 'P3021',
 'P305',
 'P3095',
 'P3097',
 'P31',
 'P3103',
 'P3161',
 'P321

In [20]:
len(known_claims)

253

In [21]:
get_property_label('P1014')

'Art & Architecture Thesaurus ID'

In [22]:
for claim in known_claims:
    print(claim, get_property_label(claim))

P948 page banner
P8370 UNBIS Thesaurus ID
P2283 uses
P4342 Store norske leksikon ID
P7293 PLWABN ID
P4211 Bashkir encyclopedia (Russian version) ID
P1098 number of speakers
P7867 category for maps
P5337 Google News topics ID
P6541 Stack Exchange site
P7157 The Top Tens ID
P2579 studied by
P920 LEM ID
P6404 Treccani's Dizionario di Storia ID
P4254 Bengali Banglapedia ID
P1368 LNB ID
P1256 Iconclass notation
P138 named after
P1366 replaced by
P3984 subreddit
P691 NKCR AUT ID
P3161 has grammatical mood
P3553 Zhihu topic ID
P5922 ANZSRC 2008 FoR ID
P7832 Basque Vikidia ID
P950 Biblioteca Nacional de España ID
P1617 BBC Things ID
P1014 Art & Architecture Thesaurus ID
P7084 related category
P457 foundational text
P18 image
P7959 historic county
P163 flag
P127 owned by
P2163 FAST ID
P131 located in the administrative territorial entity
P172 ethnic group
P279 subclass of
P461 opposite of
P269 IdRef ID
P1687 Wikidata property
P1051 PSH ID
P2192 endangeredlanguages.com ID
P6081 RIA Novosti refer

In [23]:
useful_and_interesting_claims = [ 'P5913', 'P2192', 'P1365',
                                 #'P527', 
                                 'P2341', 'P3823', 'P5206', 
                                 #'P2587',
                                 'P2989', 'P279',
'P5110', 'P3103', 'P461', 'P4913', 'P1466',  'P366', 
'P282', 'P5109', 'P460', #'P495', 
                                 'P4132',  'P2283', 
                                 #'P8786',
'P3161', 'P1098', 'P1535', 'P126', 'P1249', 'P144',
'P737']

In [24]:
def coordinates(claim_list):
    if type(claim_list) != type([]):
        return []
    return [(x['value']['latitude'], x['value']['longitude']) for x in claim_list]
locations_records = []
for entity in version_codes.entity.unique():
    if type(entity) == float:
        continue
    for lat,long in coordinates(get_claim_list(entity, 'P625')):
        locations_records.append({'entity': entity, 'latitude': lat, 'longitude': long })
    for country_id in get_claim_list(entity, 'P17'):
        locations_records.append({'entity': entity, 'country': get_label(country_id['value']['id'])})
    for country_id in get_claim_list(entity, 'P495'):
        locations_records.append({'entity': entity, 'country': get_label(country_id['value']['id'])})
    for location in get_claim_list(entity, "P276"):
        location_entity = location['value']['id']
        location_label = get_label(location_entity)
        countries = get_claim_list(location_entity, 'P17')
        if countries == []:
            locations_records.append({'entity': entity, 'location_entity': location_entity,
                                      'location_name': location_label})
        else:
            for country in countries:
                locations_records.append({'entity': entity, 'location_entity': location_entity,
                                      'location_name': location_label,
                                         'country': get_label(country['value']['id'])})
    for home in get_claim_list(entity, "P2341"):
        home_entity = home['value']['id']
        home_name = get_label(home_entity)
        countries = get_claim_list(home_entity, "P17")
        if countries == []:
            locations_records.append({'entity': entity, 'indigenous_to_name': 
                                      home_name, 'indigenous_to_entity': home_entity})
        else:
            for country in countries:
                locations_records.append({'entity': entity, 
                                          'indigenous_to_name': 
                                      home_name, 
                                          'indigenous_to_entity': home_entity, 
                                          'country': get_label(country['value']['id'])})
locations_df = pandas.DataFrame.from_records(locations_records)
locations_df.to_csv('geomap.csv', index=False)

In [25]:
locations_df[locations_df.location_entity.notnull() & locations_df.country.isnull()]

Unnamed: 0,entity,country,indigenous_to_name,indigenous_to_entity,latitude,longitude,location_entity,location_name
1972,Q3094570,,,,,,Q24287192,Wangeotek
1973,Q3094570,,,,,,Q24287209,Kao
1974,Q3094570,,,,,,Q24287218,Kira
1975,Q3094570,,,,,,Q24287221,Kedi
1976,Q3094570,,,,,,Q24287223,Laba Besar
1977,Q3094570,,,,,,Q24287227,Goal
6263,Q1142333,,,,,,Q24287148,Pale


In [26]:
locations_df[locations_df.country.map(lambda x: type(x) == type('') and '+' in x)]

Unnamed: 0,entity,country,indigenous_to_name,indigenous_to_entity,latitude,longitude,location_entity,location_name


In [27]:
languages_with_countries = locations_df[locations_df.country.notnull()].groupby('entity').country.nunique().index

In [28]:
locations_df.country.nunique()

298

In [29]:
locations_df[locations_df.country.notnull()].groupby('entity').country.unique()

entity
Q10179                     [Lesotho, Mozambique, South Africa]
Q10266010                                             [Brazil]
Q10322066                                             [Brazil]
Q10510745                                          [Australia]
Q1066766     [Tajikistan, Turkmenistan, Uzbekistan, Soviet ...
                                   ...                        
Q948514                                       [Colombia, Peru]
Q952133                                     [Papua New Guinea]
Q957945                                              [Vanuatu]
Q9610                        [Bangladesh, India, Sierra Leone]
Q962392                                     [Brazil, Colombia]
Name: country, Length: 1485, dtype: object

In [30]:
lookup_country_of_language = locations_df[locations_df.country.notnull()].groupby('entity').country.unique().to_dict()
version_codes['country'] = version_codes.entity.map(lookup_country_of_language)

In [31]:
version_codes.sample(50, random_state=19721812)

Unnamed: 0,version_id,language,version_name,wikidata_entity,entity,language_name_in_english,instance_of,country
700,777,ilo,ti baro a naimbag a damag biblia,http://www.wikidata.org/entity/Q35936,Q35936,Ilocano,"[language, modern language]",[Philippines]
1386,1508,ktj,nysa a haantitie,http://www.wikidata.org/entity/Q10975356,Q10975356,Plapo Krumen,"[language, modern language]",[Ivory Coast]
188,210,bdh,mkanda lor b lomo k dos ey e,http://www.wikidata.org/entity/Q2880165,Q2880165,Baka,"[language, modern language]","[Democratic Republic of the Congo, South Sudan]"
929,1015,kru,nt bsi,http://www.wikidata.org/entity/Q33492,Q33492,Kurukh,"[language, modern language]","[Bangladesh, India]"
1533,1676,san,sanskrit bible nt in oriya script,http://www.wikidata.org/entity/Q11059,Q11059,Sanskrit,"[language, ancient language]","[India, British India, Mughal Empire, Sur Empi..."
389,428,dga,naamen npaalaa gane,http://www.wikidata.org/entity/Q35159,Q35159,Dagaare,"[language, modern language]","[Burkina Faso, Ghana]"
1569,1716,shp,diossen joi jatixonbi onanti joi,http://www.wikidata.org/entity/Q2671988,Q2671988,Shipibo-Conibo,"[language, modern language]",[Peru]
2086,2278,tel,irv 2019,http://www.wikidata.org/entity/Q8097,Q8097,Telugu,"[language, modern language]",[India]
1043,1136,mgh,makua new testament,http://www.wikidata.org/entity/Q33604,Q33604,Makhuwa-Meetto,"[dialect, language, modern language]","[Mozambique, Tanzania]"
1724,1891,tob,nuevo testamento toba sur,http://www.wikidata.org/entity/Q3113756,Q3113756,Toba Qom,"[language, modern language, definitely endange...","[Argentina, Paraguay]"


In [32]:
country_to_language = locations_df[locations_df.country.notnull()][['entity', 'country']
                                            ].drop_duplicates().country.value_counts().reset_index().rename(
columns={'country': 'number_of_languages', 'index': 'nation'})
country_to_language.to_csv('country-to-language-count.csv',
                                                                      index=False
                                                                     )
country_to_language

Unnamed: 0,nation,number_of_languages
0,Papua New Guinea,212
1,Mexico,147
2,India,117
3,Indonesia,100
4,Philippines,80
...,...,...
293,Czechoslovakia,1
294,Kingdom of the Netherlands,1
295,New Caledonia,1
296,Dominican Republic,1


In [33]:
country_to_language = locations_df[locations_df.country.notnull()][['entity', 'country']
                                            ].drop_duplicates().country.value_counts().reset_index().rename(
columns={'country': 'number_of_languages', 'index': 'nation'})
country_to_language.to_csv('country-to-language-count.csv',
                                                                      index=False
                                                                     )
country_to_language

Unnamed: 0,nation,number_of_languages
0,Papua New Guinea,212
1,Mexico,147
2,India,117
3,Indonesia,100
4,Philippines,80
...,...,...
293,Czechoslovakia,1
294,Kingdom of the Netherlands,1
295,New Caledonia,1
296,Dominican Republic,1


In [34]:
locations_df

Unnamed: 0,entity,country,indigenous_to_name,indigenous_to_entity,latitude,longitude,location_entity,location_name
0,Q1860,American Samoa,,,,,,
1,Q1860,Anguilla,,,,,,
2,Q1860,Antigua and Barbuda,,,,,,
3,Q1860,Aruba,,,,,,
4,Q1860,Australia,,,,,,
...,...,...,...,...,...,...,...,...
7834,Q9176,South Korea,,,,,,
7835,Q9176,North Korea,,,,,,
7836,Q9176,People's Republic of China,,,,,,
7837,Q9176,North Korea,North Korea,Q423,,,,


In [46]:
locations_df.entity.nunique()

1486

In [35]:
locations_df[locations_df.country.notnull()].country.value_counts().reset_index().rename(
columns={'country': 'number_of_languages', 'index': 'nation'}).to_csv('country-to-language-count.csv',
                                                                      index=False
                                                                     )

In [36]:
languages_without_countries = locations_df[~locations_df.entity.isin(languages_with_countries)]
languages_without_countries

Unnamed: 0,entity,country,indigenous_to_name,indigenous_to_entity,latitude,longitude,location_entity,location_name
7116,Q35497,,Mediterranean Basin,Q72499,,,,


In [37]:
def nullable(f):
    def wrapped(claim_list):
        if type(claim_list) == float:
            return numpy.nan
        else:
            return f(claim_list)
    return wrapped

In [38]:
@nullable
def make_label_list(claim_list):   return [get_label(x['value']['id']) for x in claim_list]

@nullable
def first_label(claim_list): return '' if len(claim_list) == 0 else get_label(claim_list[0]['value']['id'])

def emptiness(claim_list): return False if type(claim_list) == float or len(claim_list) == 0 else True

@nullable
def as_is(claim_list):  return [x['value'] for x in claim_list]

@nullable
def comma_list(claim_list): return ', '.join(make_label_list(claim_list))
    
def translator_func(claim):        
    if claim in ['P17', 'P276', 'P527', 'P2341',  'P279', 'P2283', 'P461', 'P460', 'P495',
                'P4913', 'P282', 'P4132', 'P737']: return make_label_list
    if claim in ['P5913', 'P2192', 'P5206', 'P2989', 'P5109', 'P5110', 'P3161', 'P3103']: return emptiness
    if claim in ['P1365', 'P1535','P144']: return first_label
    if claim in ['P625']: return coordinates
    if claim in ['P3823']: return comma_list
    return as_is

In [39]:
language_claim_records = []
for entity in version_codes.entity.unique():
    this_entity = {'entity': entity, 'name': get_label(entity) }
    for claim in useful_and_interesting_claims:
        f = translator_func(claim)
        this_entity[claim + "_" + get_property_label(claim)] = f(get_claim_list(entity, claim))
    language_claim_records.append(this_entity)
language_claim_df = pandas.DataFrame.from_records(language_claim_records).set_index('entity')

In [40]:
language_claim_df

Unnamed: 0_level_0,name,P5913_has inflection class,P2192_endangeredlanguages.com ID,P1365_replaces,P2341_indigenous to,P3823_Ethnologue language status,P5206_has conjugation class,P2989_has grammatical case,P279_subclass of,P5110_has grammatical person,...,P460_said to be the same as,P4132_linguistic typology,P2283_uses,P3161_has grammatical mood,P1098_number of speakers,P1535_used by,P126_maintained by,P1249_time of earliest written record,P144_based on,P737_influenced by
entity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Q1860,English,False,False,,"[England, Scotland, United Kingdom, Wales, Ire...",1 National,False,True,[Anglic languages],True,...,[Oldspeak],"[subject–verb–object, nominative–accusative la...",[],True,"[{'amount': '+339370920', 'unit': '1'}, {'amou...",,[],[],,"[French, Vulgar Latin, Germanic languages, Greek]"
Q5166,'Auhelawa,False,False,,[Milne Bay Province],5 Developing,False,False,[Austronesian languages],False,...,[],[],[],False,"[{'amount': '+1200', 'unit': '1'}]",,[],[],,[]
Q3073568,Abau,False,True,,[Sandaun Province],5 Developing,False,False,[Sepik languages],False,...,[],[],[],False,"[{'amount': '+7270', 'unit': '1'}]",,[],[],,[]
Q34835,Abua,False,False,,"[Federal Capital Territory, Rivers State]",6a Vigorous,False,False,[Central Delta languages],False,...,[],[],[],False,[],,[],[],,[]
Q56657,Abun,False,True,,"[Papua, West Papua]",6b Threatened,False,False,[],False,...,[],[],[],False,"[{'amount': '+3000', 'unit': '1'}]",,[],[],,[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Q33375,Hakka,False,False,Middle Chinese,"[Guangxi, Fujian, Guangdong, Hainan, Hunan, Ji...",5 Developing,False,False,"[Chinese, Sinitic languages]",False,...,[],"[subject–verb–object, tonal language]","[Hakka Chinese characters, Hakka Transliterati...",False,"[{'amount': '+30000000', 'unit': '1'}]",,[],[],,[]
Q37041,Classical Chinese,False,False,,[],,False,False,[written Chinese language],False,...,[],[],[],False,[],,[],[],,[]
Q5287,Japanese,False,False,,[Japan],1 National,False,False,"[Japonic languages, language isolate]",False,...,[],"[subject–object–verb, nominative–accusative la...",[],True,"[{'amount': '+130000000', 'unit': '1', 'upperB...",,[],[],,[]
Q7850,Chinese,False,False,,[],,False,False,"[Sino-Tibetan languages, Sinitic languages]",False,...,"[Sinitic languages, Huáyǔ]","[subject–verb–object, isolating language]",[],False,"[{'amount': '+1299877520', 'unit': '1'}]",,[],[],,[]


In [41]:
language_claim_df['P1098_number of speakers'].map(lambda x: [] if type(x) != list else [t['unit'] for t in x])

entity
Q1860       [1, 1, 1, 1, 1]
Q5166                   [1]
Q3073568                [1]
Q34835                   []
Q56657                  [1]
                 ...       
Q33375                  [1]
Q37041                   []
Q5287                   [1]
Q7850                   [1]
Q9176                   [1]
Name: P1098_number of speakers, Length: 1501, dtype: object

In [42]:
language_claim_df['P1098_number of speakers'].map(lambda x: [] if type(x) != list else [t.keys() for t in x])

entity
Q1860       [(amount, unit), (amount, unit), (amount, unit...
Q5166                                        [(amount, unit)]
Q3073568                                     [(amount, unit)]
Q34835                                                     []
Q56657                                       [(amount, unit)]
                                  ...                        
Q33375                                       [(amount, unit)]
Q37041                                                     []
Q5287                [(amount, unit, upperBound, lowerBound)]
Q7850                                        [(amount, unit)]
Q9176                                        [(amount, unit)]
Name: P1098_number of speakers, Length: 1501, dtype: object

In [43]:
language_claim_df['P1098_number of speakers'].loc['Q1860']

[{'amount': '+339370920', 'unit': '1'},
 {'amount': '+603163010', 'unit': '1'},
 {'amount': '+379007140', 'unit': '1'},
 {'amount': '+753359540', 'unit': '1'},
 {'amount': '+1132366680', 'unit': '1'}]

In [44]:
version_codes[version_codes.language.isin(['hak', 'jpn', 'lzh', 'nob', 'qvz', 'zho', 'zho_tw', 'tha'])]

Unnamed: 0,version_id,language,version_name,wikidata_entity,entity,language_name_in_english,instance_of,country
1257,1372,nob,bibelen guds ord hverdagsbibelen hermon forlag,http://www.wikidata.org/entity/Q25167,Q25167,Bokmål,"[language, standard language, written language...",[Norway]
1258,1373,nob,bibelen 2011 bokmal,http://www.wikidata.org/entity/Q25167,Q25167,Bokmål,"[language, standard language, written language...",[Norway]
1259,1374,nob,bibelen guds ord 2017,http://www.wikidata.org/entity/Q25167,Q25167,Bokmål,"[language, standard language, written language...",[Norway]
1260,1375,nob,det norsk bibelselskap 1930,http://www.wikidata.org/entity/Q25167,Q25167,Bokmål,"[language, standard language, written language...",[Norway]
1261,1376,nob,en levende bok,http://www.wikidata.org/entity/Q25167,Q25167,Bokmål,"[language, standard language, written language...",[Norway]
1262,1377,nob,norsk bibel 88slash 07,http://www.wikidata.org/entity/Q25167,Q25167,Bokmål,"[language, standard language, written language...",[Norway]
1263,1378,nob,the bible in norwegian 1978slash 85 bokmal,http://www.wikidata.org/entity/Q25167,Q25167,Bokmål,"[language, standard language, written language...",[Norway]
1280,1396,qvz,diospa shimi,http://www.wikidata.org/entity/Q12953848,Q12953848,Northern Pastaza Quichua,"[language, modern language, definitely endange...","[Ecuador, Peru]"
2097,2290,tha,1940,http://www.wikidata.org/entity/Q9217,Q9217,Thai,"[natural language, modern language, language]","[Cambodia, Thailand]"
2098,2291,tha,erv,http://www.wikidata.org/entity/Q9217,Q9217,Thai,"[natural language, modern language, language]","[Cambodia, Thailand]"


In [45]:
version_codes[version_codes.language_name_in_english.isin(['Bahasa', 'Indonesian', 'Malay', 'Russian', 'Esperanto'])]

Unnamed: 0,version_id,language,version_name,wikidata_entity,entity,language_name_in_english,instance_of,country
175,197,ind,alkitab terjemahan baru,http://www.wikidata.org/entity/Q9240,Q9240,Indonesian,"[language, standard language, modern language]",[Indonesia]
176,198,ind,tl alkitab terjemahan lama,http://www.wikidata.org/entity/Q9240,Q9240,Indonesian,"[language, standard language, modern language]",[Indonesia]
177,199,ind,alkitab dalam bahasa indonesia masa kini,http://www.wikidata.org/entity/Q9240,Q9240,Indonesian,"[language, standard language, modern language]",[Indonesia]
178,200,ind,firman allah yang hidup,http://www.wikidata.org/entity/Q9240,Q9240,Indonesian,"[language, standard language, modern language]",[Indonesia]
179,201,ind,indonesian tazi nt study bible,http://www.wikidata.org/entity/Q9240,Q9240,Indonesian,"[language, standard language, modern language]",[Indonesia]
180,202,ind,perjanjian baru terjemahan baru edisi 2,http://www.wikidata.org/entity/Q9240,Q9240,Indonesian,"[language, standard language, modern language]",[Indonesia]
181,203,ind,perjanjian baru versi mudah dibaca,http://www.wikidata.org/entity/Q9240,Q9240,Indonesian,"[language, standard language, modern language]",[Indonesia]
182,204,ind,terjemahan sederhana indonesia,http://www.wikidata.org/entity/Q9240,Q9240,Indonesian,"[language, standard language, modern language]",[Indonesia]
183,205,msa,alkitab berita baik,http://www.wikidata.org/entity/Q9237,Q9237,Malay,"[language, macrolanguage, modern language]","[Malaysia, Indonesia, Brunei, Singapore, East ..."
184,206,msa,alkitab berita baik deuterokanonika,http://www.wikidata.org/entity/Q9237,Q9237,Malay,"[language, macrolanguage, modern language]","[Malaysia, Indonesia, Brunei, Singapore, East ..."
