In [2]:
import agate
import json

%store -r cpi_and_cl

In [3]:
country_json = json.loads(open('earth.json', 'rb').read())

country_dict = {}

for dct in country_json:
    country_dict[dct['name']] = dct['parent']

# given a contry row, it returns the continent.
def get_country(country_row):
    return country_dict.get(country_row['Country / Territory'].lower())

cpi_and_cl = cpi_and_cl.compute([('continent',
                                   agate.Formula(agate.Text(), get_country))
                                ])

for r in cpi_and_cl.rows:
    print(r['Country / Territory'], r['continent'])

Uruguay south america
Chile south america
Saint Lucia None
Botswana africa
Bhutan asia
Portugal europe
Costa Rica north america
Rwanda africa
Turkey europe
Georgia europe
Lesotho africa
Bahrain asia
Ghana africa
Jordan asia
Montenegro europe
Romania europe
Bosnia and Herzegovina None
Brazil south america
Sao Tome and Principe None
Serbia europe
Senegal africa
Tunisia africa
Swaziland africa
Burkina Faso africa
El Salvador north america
Jamaica north america
Liberia africa
Mongolia asia
Peru south america
Trinidad and Tobago None
Zambia africa
Malawi africa
Morocco africa
Algeria africa
Armenia europe
Benin africa
Colombia south america
Djibouti africa
India asia
Philippines None
Suriname south america
Ecuador south america
Panama north america
Thailand asia
Argentina south america
Gabon africa
Mexico north america
Niger africa
Ethiopia africa
Egypt africa
Indonesia asia
Albania europe
Nepal asia
Mauritania africa
Mozambique africa
Sierra Leone africa
Timor-Leste None
Belarus europe
Dom



In [4]:
# print out the countries that have no continent 'None'
no_continent = cpi_and_cl.where(lambda x: x['continent'] is None)

for r in no_continent.rows:
    print(r['Country / Territory'])

Saint Lucia
Bosnia and Herzegovina
Sao Tome and Principe
Trinidad and Tobago
Philippines
Timor-Leste
Democratic Republic of the Congo
Equatorial Guinea
Guinea-Bissau


In [5]:
%store -r cpi_and_cl

# earth-cleaned.json, which is the earth.json file with the necessary changes made.
country_json = json.loads(open('earth-cleaned.json', 'rb').read())

country_dict = {}

for dct in country_json:
    country_dict[dct['name']] = dct['parent']

# given a contry row, it returns the continent.
def get_country(country_row):
    return country_dict.get(country_row['Country / Territory'].lower())

cpi_and_cl = cpi_and_cl.compute([('continent',
                                   agate.Formula(agate.Text(), get_country))
                                ])

# print out the countries that have no continent 'None'
no_continent = cpi_and_cl.where(lambda x: x['continent'] is None)

for r in no_continent.rows:
    print(r['Country / Territory'])



In [6]:
# group our now-complete continent data by continent
grp_by_cont = cpi_and_cl.group_by('continent')
print(grp_by_cont)

| table         | rows |
| ------------- | ---- |
| south america | 10   |
| north america | 12   |
| africa        | 41   |
| asia          | 19   |
| europe        | 12   |



In [7]:
for cont, table in grp_by_cont.items():
    print(cont, len(table.rows))

south america 10
north america 12
africa 41
asia 19
europe 12


In [8]:
agg = grp_by_cont.aggregate([('cl_mean', agate.Mean('Total (%)')),
                             ('cl_max', agate.Max('Total (%)'))  ,
                             ('cpi_median', agate.Median ('CPI 2013 Score')),
                             ('cpi_min', agate.Min('CPI 2013 Score'))
                            ])

agg.print_table()

| continent     | cl_mean | cl_max | cpi_median | cpi_min |
| ------------- | ------- | ------ | ---------- | ------- |
| south america | 12.710… |   33.5 |       36.0 |      24 |
| north america | 10.333… |   25.8 |       34.5 |      19 |
| africa        | 22.349… |   49.0 |       30.0 |       8 |
| asia          |  9.589… |   33.9 |       30.0 |       8 |
| europe        |  5.625… |   18.4 |       42.0 |      25 |


In [9]:
agg.print_bars('continent', 'cl_max')

continent     cl_max
south america   33.5 ▓░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░                                
north america   25.8 ▓░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░                                               
africa          49.0 ▓░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░  
asia            33.9 ▓░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░                                
europe          18.4 ▓░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░                                                              
                     +-----------------------+------------------------+------------------------+-----------------------+
                     0.0                   12.5                     25.0                     37.5                   50.0


### Separating and Focusing Your Data

In [10]:
africa_cpi_cl = cpi_and_cl.where(lambda x: x['continent'] == 'africa')

for r in africa_cpi_cl.order_by('Total (%)', reverse = True).rows:
    print("{}: {}% - {}".format(r['Country / Territory'], r['Total (%)'], r['CPI 2013 Score']))

Somalia: 49.0% - 8.0
Cameroon: 41.7% - 25.0
Zambia: 40.6% - 38.0
Burkina Faso: 39.2% - 38.0
Guinea-Bissau: 38.0% - 19.0
Ghana: 33.9% - 46.0
Niger: 30.5% - 34.0
Rwanda: 28.5% - 53.0
Central African Republic: 28.5% - 25.0
Togo: 28.3% - 29.0
Guinea: 28.3% - 24.0
Equatorial Guinea: 27.8% - 19.0
Ethiopia: 27.4% - 33.0
Burundi: 26.3% - 21.0
Chad: 26.1% - 19.0
Sierra Leone: 26.0% - 30.0
Kenya: 25.9% - 27.0
Malawi: 25.7% - 37.0
Nigeria: 24.7% - 25.0
Angola: 23.5% - 23.0
Lesotho: 22.9% - 49.0
Madagascar: 22.9% - 28.0
Mozambique: 22.2% - 30.0
Comoros: 22.0% - 28.0
Mali: 21.4% - 28.0
Liberia: 20.8% - 38.0
Gambia: 19.2% - 28.0
Uganda: 16.3% - 26.0
Benin: 15.3% - 36.0
Democratic Republic of the Congo: 15.0% - 22.0
Mauritania: 14.6% - 30.0
Senegal: 14.5% - 41.0
Gabon: 13.4% - 34.0
Egypt: 9.3% - 32.0
Botswana: 9.0% - 64.0
Morocco: 8.3% - 37.0
Djibouti: 7.7% - 36.0
Sao Tome and Principe: 7.5% - 42.0
Swaziland: 7.3% - 39.0
Algeria: 4.7% - 36.0
Tunisia: 2.1% - 41.0


In [11]:
import numpy
print(numpy.corrcoef(
    [float(t) for t in africa_cpi_cl.columns['Total (%)'].values()],
    [float(c) for c in africa_cpi_cl.columns['CPI 2013 Score'].values()])[0,1]
     )

-0.4041456951709211


In [12]:
africa_cpi_cl = cpi_and_cl.compute([('Africa Child Labor Rank',
                                   agate.Rank('Total (%)', reverse=True))
                                ])
africa_cpi_cl = cpi_and_cl.compute([('Africa CPI Rank',
                                   agate.Rank('CPI 2013 Score'))
                                ])

In [13]:
cl_mean = africa_cpi_cl.aggregate(agate.Mean('Total (%)'))
cpi_mean = africa_cpi_cl.aggregate(agate.Mean('CPI 2013 Score'))

def highest_rates(row):
    if row['Total (%)'] > cl_mean and row['CPI 2013 Score'] < cpi_mean:
        return True
    return False

highest_cpi_cl = africa_cpi_cl.where(lambda x : highest_rates(x))

for r in highest_cpi_cl.rows:
    print('{}: {}% - {}'.format(r['Country / Territory'], r['Total (%)'], r['CPI 2013 Score']))

Niger: 30.5% - 34.0
Ethiopia: 27.4% - 33.0
Nepal: 33.9% - 31.0
Mozambique: 22.2% - 30.0
Sierra Leone: 26.0% - 30.0
Guatemala: 25.8% - 29.0
Togo: 28.3% - 29.0
Comoros: 22.0% - 28.0
Gambia: 19.2% - 28.0
Madagascar: 22.9% - 28.0
Mali: 21.4% - 28.0
Guyana: 16.4% - 27.0
Kenya: 25.9% - 27.0
Uganda: 16.3% - 26.0
Cameroon: 41.7% - 25.0
Central African Republic: 28.5% - 25.0
Nigeria: 24.7% - 25.0
Guinea: 28.3% - 24.0
Paraguay: 27.6% - 24.0
Angola: 23.5% - 23.0
Burundi: 26.3% - 21.0
Cambodia: 18.3% - 20.0
Chad: 26.1% - 19.0
Equatorial Guinea: 27.8% - 19.0
Guinea-Bissau: 38.0% - 19.0
Haiti: 24.4% - 19.0
Yemen: 22.7% - 18.0
Somalia: 49.0% - 8.0


In [15]:
%store africa_cpi_cl
%store highest_cpi_cl

Stored 'africa_cpi_cl' (Table)
Stored 'highest_cpi_cl' (Table)
