In [14]:
import agate

In [15]:
%store -r table

In [16]:
table

<agate.table.Table at 0x1f0bc9b1010>

In [17]:
table.column_names

('Countries and areas',
 'Total (%)',
 'c',
 'Sex (%) Male',
 'Female',
 'Place of residence (%) Urban',
 'Rural',
 'Household wealth quintile (%) Poorest',
 'Second',
 'Middle',
 'Fourth',
 'Richest',
 'Reference Year',
 'Data Source')

In [18]:
# returns a list of the 10 countries with the highest incidence of child labor, in terms of percentages.
most_egregious = table.order_by('Total (%)', reverse = True).limit(10)

for r in most_egregious.rows:
    print(r)

<agate.Row: ('Somalia', Decimal('49.0'), None, Decimal('44.5'), Decimal('53.6'), ...)>
<agate.Row: ('Cameroon', Decimal('41.7'), None, Decimal('43.1'), Decimal('40.2'), ...)>
<agate.Row: ('Zambia', Decimal('40.6'), 'y', Decimal('41.6'), Decimal('39.5'), ...)>
<agate.Row: ('Burkina Faso', Decimal('39.2'), None, Decimal('42.3'), Decimal('36.0'), ...)>
<agate.Row: ('Guinea-Bissau', Decimal('38.0'), None, Decimal('39.5'), Decimal('36.4'), ...)>
<agate.Row: ('Ghana', Decimal('33.9'), None, Decimal('33.8'), Decimal('34.0'), ...)>
<agate.Row: ('Nepal', Decimal('33.9'), 'y', Decimal('30.2'), Decimal('37.8'), ...)>
<agate.Row: ('Peru', Decimal('33.5'), 'y', Decimal('30.6'), Decimal('36.3'), ...)>
<agate.Row: ('Niger', Decimal('30.5'), None, Decimal('30.8'), Decimal('30.1'), ...)>
<agate.Row: ('Central African Republic', Decimal('28.5'), None, Decimal('27.2'), Decimal('29.9'), ...)>


In [19]:
# returns a list of the 10 countries with the most girls working.
most_females = table.order_by('Female', reverse=True).limit(10)
for r in most_females.rows:
    print('{}: {}%'.format(r['Countries and areas'], r['Female']))

Cabo Verde: None%
Chile: None%
Ecuador: None%
Somalia: 53.6%
Cameroon: 40.2%
Zambia: 39.5%
Nepal: 37.8%
Guinea-Bissau: 36.4%
Peru: 36.3%
Burkina Faso: 36.0%


In [20]:
# removing the None using the agate table's where method
female_data = table.where(lambda r: r['Female'] is not None)
most_females = female_data.order_by('Female', reverse=True).limit(10)

for r in most_females.rows:
    print('{}: {}%'.format(r['Countries and areas'], r['Female']))

Somalia: 53.6%
Cameroon: 40.2%
Zambia: 39.5%
Nepal: 37.8%
Guinea-Bissau: 36.4%
Peru: 36.3%
Burkina Faso: 36.0%
Ghana: 34.0%
Rwanda: 30.4%
Niger: 30.1%


In [21]:
# finding the average percentage of child labor
table.aggregate(agate.Mean('Place of residence (%) Urban'))



Decimal('10.41204819277108433734939759')

In [22]:
# we got the same value, because agate just does the same thing(removing null columns and computing the average of what's left)
has_por = table.where(lambda r: r['Place of residence (%) Urban'] is not None)

has_por.aggregate(agate.Mean('Place of residence (%) Urban'))

Decimal('10.41204819277108433734939759')

In [23]:
# finding one of the rows with more than 50% of rural child labor.
first_match = has_por.find(lambda x: x['Rural'] > 50)
first_match['Countries and areas']

'Bolivia (Plurinational State of)'

In [24]:
ranked = table.compute([('Total Child Labor Rank', agate.Rank('Total (%)', reverse=True))])

for row in ranked.order_by('Total (%)', reverse=True).limit(20).rows:
    print(row['Total (%)'], row['Total Child Labor Rank'])

49.0 1
41.7 2
40.6 3
39.2 4
38.0 5
33.9 6
33.9 6
33.5 8
30.5 9
28.5 10
28.5 10
28.3 12
28.3 12
27.8 14
27.6 15
27.4 16
26.4 17
26.4 17
26.3 19
26.1 20


In [27]:
print(ranked)

| column                                | data_type |
| ------------------------------------- | --------- |
| Countries and areas                   | Text      |
| Total (%)                             | Number    |
| c                                     | Text      |
| Sex (%) Male                          | Number    |
| Female                                | Number    |
| Place of residence (%) Urban          | Number    |
| Rural                                 | Number    |
| Household wealth quintile (%) Poorest | Number    |
| Second                                | Number    |
| Middle                                | Number    |
| Fourth                                | Number    |
| Richest                               | Number    |
| Reference Year                        | Text      |
| Data Source                           | Text      |
| Total Child Labor Rank                | Number    |



In [32]:
# calculate the rank in another way, creating a column with the inverse percentages.
def reverse_percent(row):
    return 100 - row['Total (%)']

ranked = table.compute([('Children not working (%)', agate.Formula(agate.Number(), reverse_percent))])

ranked = ranked.compute([('Total Child Labor Rank', agate.Rank('Children not working (%)'))])

for row in ranked.order_by('Total (%)', reverse = True).limit(20).rows:
    print(row['Total (%)'], row['Total Child Labor Rank'])

49.0 1
41.7 2
40.6 3
39.2 4
38.0 5
33.9 6
33.9 6
33.5 8
30.5 9
28.5 10
28.5 10
28.3 12
28.3 12
27.8 14
27.6 15
27.4 16
26.4 17
26.4 17
26.3 19
26.1 20
