In [2]:
import pandas as pd

In [3]:
from ddf_utils.str import format_float_digits

In [4]:
from functools import partial

In [5]:
source = '../source/eci_country_rankings.csv'

In [6]:
data = pd.read_csv(source)

In [7]:
data.head()

Unnamed: 0,Year,Country,Country ID,ECI,ECI+
0,1964,Angola,afago,-1.31753,-1.22675
1,1964,Cote d'Ivoire,afciv,-0.915735,-1.02821
2,1964,Cameroon,afcmr,-1.12852,-0.889755
3,1964,Republic of the Congo,afcog,-0.930207,-0.898081
4,1964,Algeria,afdza,-0.131416,0.090527


In [8]:
countries = data[['Country ID', 'Country']].drop_duplicates().copy()

In [9]:
countries.columns = ['country', 'name']

In [10]:
countries.to_csv('../../ddf--entities--country.csv', index=False)

In [11]:
dps = data[['Country ID', 'Year', 'ECI', 'ECI+']].copy()

In [12]:
dps.columns = ['country', 'year', 'eci', 'eci_plus']

In [13]:
dps = dps.set_index(['country', 'year'])

In [14]:
dps.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,eci,eci_plus
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
afago,1964,-1.31753,-1.22675
afciv,1964,-0.915735,-1.02821
afcmr,1964,-1.12852,-0.889755
afcog,1964,-0.930207,-0.898081
afdza,1964,-0.131416,0.090527


In [15]:
ff = partial(format_float_digits, digits=6)

for c in dps:
    df = dps[[c]].copy()
    df[c] = df[c].map(format_float_digits)
    df.dropna().to_csv(f'../../ddf--datapoints--{c}--by--country--year.csv')

In [16]:
# calculate rankings

def scanl(f, l, base):
    for x in l:
        base = f(base, x)
        yield base

def compare(vs, x):
    if pd.isnull(x):
        return None
    if vs[0] == x:
        return (x, vs[1])
    else:
        return (x, vs[1]+1)

def r(ser):
    s = ser.sort_values(ascending=False)
    s_ = scanl(compare, s.values, (100, 0))
    rks = [x[1] for x in s_]
    return pd.Series(rks, index=s.index)

In [17]:
df = dps[['eci']]

In [18]:
rks = []

for _, v in df.groupby(['year']):
    df_ = v.copy()
    df_['eci_ranking'] = r(df_['eci'])
    rks.append(df_['eci_ranking'])
    
rks = pd.concat(rks)

In [19]:
rks = rks.reset_index()

In [20]:
rks[rks.year == 2016].sort_values(by='eci_ranking').head()

Unnamed: 0,country,year,eci_ranking
5594,asjpn,2016,1
5615,euche,2016,2
5617,eudeu,2016,3
5638,euswe,2016,4
5598,askor,2016,5


In [21]:
rks.to_csv('../../ddf--datapoints--eci_ranking--by--country--year.csv', index=False)

In [22]:
!open ../../

In [23]:
concepts = [
    {
        'concept': 'country',
        'name': 'Country',
        'concept_type': 'entity_domain',
        'domain': ''
    },
    {
        'concept': 'name',
        'name': 'Name',
        'concept_type': 'string',
        'domain': ''
    },
    {
        'concept': 'domain',
        'name': 'Domain',
        'concept_type': 'string',
        'domain': ''
    },
    {
        'concept': 'year',
        'name': 'Year',
        'concept_type': 'time',
        'domain': ''
    },
    {
        'concept': 'eci',
        'name': 'ECI',
        'concept_type': 'measure',
        'domain': ''
    },
    {
        'concept': 'eci_plus',
        'name': 'ECI+',
        'concept_type': 'measure',
        'domain': ''
    },
    {
        'concept': 'eci_ranking',
        'name': 'ECI ranking',
        'concept_type': 'measure',
        'domain': ''
    }
]

cdf = pd.DataFrame.from_records(concepts)

In [24]:
cdf

Unnamed: 0,concept,concept_type,domain,name
0,country,entity_domain,,Country
1,name,string,,Name
2,domain,string,,Domain
3,year,time,,Year
4,eci,measure,,ECI
5,eci_plus,measure,,ECI+
6,eci_ranking,measure,,ECI ranking


In [25]:
cdf.to_csv('../../ddf--concepts.csv', index=False)

In [26]:
!ddf create_datapackage --update ../../

backing up previous datapackage.json...
[32m2018-06-20 09:26:00[0m [1;30mINFO[0m generating ddf schema, may take some time...
[32m2018-06-20 09:26:00[0m [1;30mINFO[0m loading dataset from disk: ddf--mit--economic_complexity_rankings
100%|█████████████████████████████████████████████| 5/5 [00:00<00:00, 57.17it/s]
Done.


In [27]:
!validate-ddf ../../ --silent

[0m[?1000l[?1002l[?1003l[?1004l