In [1]:
# default_exp core

# Core Functions
> Python library to access online data from the Joshua Project via APIv2

[Joshua Project](https://joshuaproject.net/) via [APIv2](https://joshuaproject.net/resources/datasets)

In [2]:
# export
import urllib3
from urllib3.util.url import Url
import certifi
import json
from functools import partial
from datetime import date
from fastcore.test import *

In [3]:
#hide
from nbdev.showdoc import *

## URL Construction Functions

In [4]:
# hide
api_key='w3NOS49TW7fg' #'your_api_key'

In [5]:
# export
def url_path_query(path:str,cntry:str=None,pgid:str=None,extra=None,month=None,day=None,api_key='your_api_key')->str:
    """Builds and returns a string url to query `path` for `cntry` 
    with optional `pgid`,`extra`,`month` and `day`."""
    query = ''
    if cntry is not None: query = query+'ROG3='+cntry
    if pgid  is not None: query = query+'&PeopleID3='+pgid
    if extra is not None: query = query+'&'+extra
    if 'upgotd' in path:
        today = date.today()
        if isinstance(day,  int):   day=str(day)
        if isinstance(month,int): month=str(month)
        if day is None:   day   = today.strftime("%-d")
        if month is None: month = today.strftime("%-m")
        query = query+'&LRofTheDayMonth='+month
        query = query+'&LRofTheDayDay='  +day
    query = query+'&api_key='+api_key
    return Url(scheme='https', host='joshuaproject.net', path=path, query=query).url

In [6]:
show_doc(url_path_query)

<h4 id="url_path_query" class="doc_header"><code>url_path_query</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>url_path_query</code>(**`path`**:`str`, **`cntry`**:`str`=*`None`*, **`pgid`**:`str`=*`None`*, **`extra`**=*`None`*, **`month`**=*`None`*, **`day`**=*`None`*, **`api_key`**=*`'your_api_key'`*)

Builds and returns a string url to query `path` for `cntry` 
with optional `pgid`,`extra`,`month` and `day`.

`url_path_query` is a generic low-level function. It requires specification of required `path` and if used without other specific arguments, returns over 17000 records.

`cntry` is a two-letter country code as was specified in FIPS 10-4 (US Federal Information Processing Standard), which is now absolete a bit differs from the international ISO 3166 standard. See details.

The library provides a helper function to work around the current ambiguity.

`pgid` is a string with people group id.

`extra` is an *optional* argument in a form `field=value`, for example: `'LeastReached=Y'`

`month` and `day` are *optional* arguments of type `int` used in `url_upgotd` to specify the date for which to request the unreached people group. If either is omitted, the value of the current date is used. If `int` values are passed, these are coverted to proper `str`.

In [7]:
test_eq(url_path_query('/api/v2/people_groups'),
        'https://joshuaproject.net/api/v2/people_groups?&api_key=your_api_key')

In [8]:
# export
url_pgs_cntry  = partial(url_path_query,'/api/v2/people_groups')
url_pgs_cntry.__doc__ = """Get all people groups in a specific country."""

url_pg_cntry   = partial(url_pgs_cntry)
url_pg_cntry.__doc__ = """Get a specific people group in a specific country."""

### Unreached People Group of the Day

In [9]:
# export
url_upgotd     = partial(url_path_query,'/api/v2/upgotd')
url_upgotd.__doc__ = """Get data for the UPG of the day."""

In [10]:
test_eq(
    url_upgotd(month=10,day=1),
    'https://joshuaproject.net/api/v2/upgotd?&LRofTheDayMonth=10&LRofTheDayDay=1&api_key=your_api_key')

m,d = date.today().strftime("%-m"),date.today().strftime("%-d")
test_eq(
    url_upgotd(),
    f'https://joshuaproject.net/api/v2/upgotd?&LRofTheDayMonth={m}&LRofTheDayDay={d}&api_key=your_api_key')

m = date.today().strftime("%-m")
test_eq(
    url_upgotd(day='15'),
    f'https://joshuaproject.net/api/v2/upgotd?&LRofTheDayMonth={m}&LRofTheDayDay={15}&api_key=your_api_key')

In [11]:
url_upgotd(day='12',month=date.today().strftime("%-m")),

('https://joshuaproject.net/api/v2/upgotd?&LRofTheDayMonth=1&LRofTheDayDay=12&api_key=your_api_key',)

### Unreached People Groups in the Country

In [12]:
# export
url_upgs_cntry = partial(url_pgs_cntry,extra='LeastReached=Y')
url_upgs_cntry.__doc__ = """Get all unreached people groups in a specific country."""

In [13]:
url_upgs_cntry('IN')

'https://joshuaproject.net/api/v2/people_groups?ROG3=IN&LeastReached=Y&api_key=your_api_key'

In [14]:
# assert  url_pgs_cntry('EN') == 'http://joshuaproject.net/api/v2/people_groups?ROG3=EN&api_key=w3NOS49TW7fg'
# assert url_upgs_cntry('EN') == 'http://joshuaproject.net/api/v2/people_groups?ROG3=EN&LeastReached=Y&api_key=w3NOS49TW7fg'

## Testing

In [15]:
http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED',ca_certs=certifi.where())

In [16]:
url = url_pgs_cntry('RS')
url

'https://joshuaproject.net/api/v2/people_groups?ROG3=RS&api_key=your_api_key'

In [17]:
resp = http.request('GET',url)

In [18]:
if resp.status != 200:
    resp_j = json.loads(resp.data)
    s = resp_j['status']['status_code']
    m = resp_j['status']['message']
    print(f'{s}: {m}')


403: The api_key you submited is not valid


In [19]:
url = url_pg_cntry('IN','16152')
url

'https://joshuaproject.net/api/v2/people_groups?ROG3=IN&PeopleID3=16152&api_key=your_api_key'

In [20]:
resp_j = json.loads(resp.data)

In [21]:
# hide
#print(json.dumps(resp_j['data'], sort_keys=False, indent=2))

In [23]:
# TODO: Add procedding for multi-page results
# for i in range(resp_j['meta']['pagination']['total_count']):
#     print(resp_j['data'][i]['PeopNameInCountry'])

In [25]:
import pandas as pd

In [26]:
cc = pd.read_csv('data/country_codes.csv',skiprows=3,names=['Country_name','FIPS','ISO'])

In [27]:
cc.head().to_dict(orient='records')

[{'Country_name': 'Afghanistan', 'FIPS': 'AF', 'ISO': 'AF'},
 {'Country_name': 'Aland Islands', 'FIPS': nan, 'ISO': 'AX'},
 {'Country_name': 'Albania', 'FIPS': 'AL', 'ISO': 'AL'},
 {'Country_name': 'Algeria', 'FIPS': 'AG', 'ISO': 'DZ'},
 {'Country_name': 'American Samoa', 'FIPS': 'AQ', 'ISO': 'AS'}]

In [28]:
#cci = cc.set_index('ISO')

In [31]:
#cci.drop('Country_name',axis=1).to_dict(orient='records')

In [32]:
#cci.to_dict(orient='dict')['Country_name']

### Convert JSON into a Pandas DataFrame

In [33]:
pd.DataFrame(resp_j['data']).head()

In [34]:
#hide
json_str = json.dumps(resp_j['data'])

All people groups in a specific country

http://joshuaproject.net/api/v2/people_groups?ROG3=YM&api_key=aWye9lV20QtF

All unreached people groups in a specific country

https://joshuaproject.net/api/v2/people_groups?ROG3=YM&LeastReached=Y&api_key=aWye9lV20QtF

A people group in a specific country

http://joshuaproject.net/api/v2/people_groups?ROG3=AF&PeopleID3=19409&api_key=aWye9lV20QtF

All countries a specific people group lives in

http://joshuaproject.net/api/v2/people_groups?PeopleID3=10294&api_key=aWye9lV20QtF

**Country summary data**

http://joshuaproject.net/api/v2/countries?ROG3=AF&api_key=aWye9lV20QtF

**Language summary info**

http://joshuaproject.net/api/v2/languages?ROL3=hau&api_key=aWye9lV20QtF

**Unreached of the Day** (basic data October 16th)

https://joshuaproject.net/api/v2/upgotd?api_key=aWye9lV20QtF&ROL3Profile=eng&LRofTheDayMonth=10&LRofTheDayDay=16

## Export -

In [35]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_core.ipynb.
Converted index.ipynb.
