In [1]:
import pandas as pd
import numpy as np
import os

In [5]:
from ddf_utils.str import to_concept_id

In [8]:
data = pd.read_csv('../source/Credit Suisse Wealth Report - Dataset - v1 - Data Source.csv')

In [9]:
synonyms = pd.read_csv('../../../ddf--open_numbers/ddf--synonyms--geo.csv', dtype=str).set_index('synonym')['geo'].to_dict()

In [10]:
def get_mapping(x):
    try:
        return synonyms[x]
    except KeyError:
        print(x)
        return to_concept_id(x)

data['Country'] = data['Country'].map(get_mapping)

British Caribbean
Dutch Caribbean
French Caribbean
Melanesia
Polynesia


In [12]:
df = data.set_index(['Country', 'Year'])

In [13]:
df.columns

Index([' Adults (Thousands)', ' Mean Wealth per Adult',
       ' Median Wealth per Adult', ' Adults % under 10000',
       ' Adults % between 10000-100k', ' Adults % between 100k-1M',
       ' Adults % over 1M', ' Total', ' Gini %'],
      dtype='object')

In [14]:
indicators1 = df[[' Adults (Thousands)', ' Mean Wealth per Adult',
                  ' Median Wealth per Adult', ' Gini %']].copy()

In [33]:
indicators1.index.names = ['country', 'year']
indicators1 = indicators1.groupby(level=['country']).apply(lambda x: x)
for c in indicators1.columns:
    ser = df[c]
    concept_id = to_concept_id(c)
    ser.name = concept_id
    ser.to_csv(f'../../ddf--datapoints--{concept_id}--by--country--year.csv')

In [23]:
indicators2 = df[[' Adults % under 10000',
                  ' Adults % between 10000-100k', ' Adults % between 100k-1M',
                  ' Adults % over 1M']].copy()
indicators2.index.names = ['country', 'year']
m = {
    ' Adults % under 10000': 'under_10k',
    ' Adults % between 10000-100k': 'between_10k_100k',
    ' Adults % between 100k-1M': 'between_100k_1m',
    ' Adults % over 1M': 'over_1m'
}
indicators2.columns = indicators2.columns.map(m)
indicators2.columns.name = 'wealth_group'
indicators2 = indicators2.stack()

In [24]:
indicators2

country  year  wealth_group    
afg      2020  under_10k           97.6
               between_10k_100k     2.4
               between_100k_1m      0.1
               over_1m              0.0
alb      2020  under_10k           41.0
                                   ... 
zmb      2012  over_1m              0.0
zwe      2012  under_10k           95.1
               between_10k_100k     4.8
               between_100k_1m      0.2
               over_1m              0.0
Length: 6187, dtype: float64

In [25]:
indicators2.name = 'adult_percentage'
indicators2.to_csv('../../ddf--datapoints--adult_percentage--by--country--year--wealth_group.csv')

In [36]:
adults = df[' Adults (Thousands)']
grps = indicators2.groupby(['country', 'year'])
res = []

def get_number(i):
    return adults.loc[i]

for k, ser in grps:
    res.append(ser * get_number(k))

In [41]:
res1 = pd.concat(res)

In [43]:
levels = [10000, 100000, 1000000]
levels_daily = list(map(lambda x: x * 0.03 / 365, levels))


In [44]:
levels_daily 

[0.821917808219178, 8.219178082191782, 82.1917808219178]

In [46]:
res1 = res1.reset_index()

In [48]:
res1.wealth_group.unique()

array(['under_10k', 'between_10k_100k', 'between_100k_1m', 'over_1m'],
      dtype=object)

In [49]:
m = {
    'under_10k': 'level1',
    'between_10k_100k': 'level2',
    'between_100k_1m': 'level3',
    'over_1m': 'level4',
}

In [70]:
res2 = res1.copy()
res2['income_level'] = res2['wealth_group'].map(lambda x: m[x])
res2 = res2.drop(columns=['wealth_group'])

In [71]:
# new_idx = pd.MultiIndex.from_product([res2.country.unique(),
#                                       res2.year.unique(),
#                                       res2.income_level.unique()])

res2 = res2.set_index(['country', 'year', 'income_level'])
# res2 = res2.reindex(new_idx)

In [72]:
res2['adult_percentage'].hasnans

False

In [63]:
# res2 = res2['adult_percentage'].fillna(0)

In [73]:
res2.name = 'population'

In [74]:
res2 = np.floor(res2)
res2.astype(int).sort_index().to_csv('../../ddf--datapoints--population--by--country--year--income_level.csv')

In [77]:
names1=  ['Adults (Thousands)', 'Mean Wealth per Adult',
                  'Median Wealth per Adult', ' Gini %']
cdf = pd.DataFrame(names1)

In [78]:
cdf.columns = ['name']
cdf["concept"] = cdf['name'].map(to_concept_id)
cdf['concept_type'] = 'measure'

In [79]:
cdf

Unnamed: 0,name,concept,concept_type
0,Adults (Thousands),adults_thousands,measure
1,Mean Wealth per Adult,mean_wealth_per_adult,measure
2,Median Wealth per Adult,median_wealth_per_adult,measure
3,Gini %,gini_pct,measure


In [80]:
cdf[['concept', 'concept_type', 'name']].to_csv('../../ddf--concepts.csv', index=False)

In [81]:
cdf = pd.read_csv('../../../ddf--open_numbers/ddf--concepts.csv', dtype=str)

In [83]:
cdf2= pd.read_csv('../../ddf--concepts.csv')

In [84]:
cdf2

Unnamed: 0,concept,concept_type,name,domain
0,adults_thousands,measure,Adults (Thousands),
1,mean_wealth_per_adult,measure,Mean Wealth per Adult,
2,median_wealth_per_adult,measure,Median Wealth per Adult,
3,gini_pct,measure,Gini %,
4,population,measure,Population,
5,adult_percentage,measure,Adult percentage,
6,income_level,entity_domain,Income level,
7,wealth_group,entity_domain,Wealth group,


In [86]:
cdf = cdf.set_index('concept')
cdf2 = cdf2.set_index('concept')

In [88]:
cdf3 = pd.concat([cdf, cdf2])

In [89]:
cdf3.to_csv('../../ddf--concepts.csv')