In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import plotly.graph_objects as go

from datasets import DF, Load

In [2]:
df = Load(DF.WIID)

df = df[['ISO', 'YEAR', 'GDP PER CAPITA']]
df = df[df['YEAR'] >= 1980]
df = df.rename(columns = {'ISO': 'c3', 'YEAR': 'year', 'GDP PER CAPITA': 'gdppc'})

fil = df.sort_values(by=['year', 'gdppc'])
fil = fil.reset_index(drop = True)

fil

Unnamed: 0,c3,year,gdppc
0,MOZ,1980,516.18
1,MMR,1980,594.40
2,SOM,1980,617.93
3,TCD,1980,845.67
4,ETH,1980,865.93
...,...,...,...
3193,DNK,2020,55819.91
3194,GRL,2020,55819.91
3195,ARE,2020,63299.42
3196,IRL,2020,90624.72


In [3]:
y_bar = fil.groupby('year')['gdppc'].mean()
y_bar = y_bar.rename('mean_gdppc')

In [4]:
fil = fil.merge(y_bar, left_on = 'year', right_index = True)
fil['s'] = fil['gdppc'] / fil['mean_gdppc']

fil = fil.sort_values(by=['year', 'gdppc'], ascending=[True, True])
fil

Unnamed: 0,c3,year,gdppc,mean_gdppc,s
0,MOZ,1980,516.18,11667.646923,0.044240
1,MMR,1980,594.40,11667.646923,0.050944
2,SOM,1980,617.93,11667.646923,0.052961
3,TCD,1980,845.67,11667.646923,0.072480
4,ETH,1980,865.93,11667.646923,0.074216
...,...,...,...,...,...
3193,DNK,2020,55819.91,16166.488590,3.452816
3194,GRL,2020,55819.91,16166.488590,3.452816
3195,ARE,2020,63299.42,16166.488590,3.915471
3196,IRL,2020,90624.72,16166.488590,5.605715


_"suppose N countries are ranked from lowest to highest per capita real income. f(yi) then indicates the position country i (whose income is yi) occupies in the international distribution of income. By dividing by the ‘length’ of the cumulative distribution, one can normalize f to lie between 0 and 1"_ (p. 108, Park, W. G. & Bark, D. A., 1995)

In [5]:
fil['f_raw'] = fil.groupby('year')['gdppc'].rank(method='dense', ascending=True)
fil['f_mid'] = fil['f_raw'] - 0.5

fil['N'] = fil.groupby('year')['gdppc'].transform('count')
fil['f'] = fil['f_mid'] / fil['N']

fil = fil[['c3', 'year', 'gdppc', 'N', 's', 'f']]

In [6]:
gini = fil.groupby('year')[['f', 's']].apply(lambda x: x[['f', 's']].cov().iloc[0, 1] * 2)
gini = gini.rename('gini')

In [7]:
y_bar_df = y_bar.reset_index(name='mean_gdppc')
gini_df = gini.reset_index(name='gini_df')

kuznets = pd.merge(y_bar, gini, on = 'year', how = 'inner')
kuznets = kuznets.reset_index()

In [8]:
kuznets['y'] = np.log(kuznets['mean_gdppc'])
kuznets['y2'] = np.power(kuznets['y'], 2)

### Regression model

In [9]:
k_x = sm.add_constant(kuznets[['y', 'y2']])
k_y = kuznets['gini']

In [10]:
k_model = sm.OLS(k_y, k_x).fit()
print(k_model.summary())

                            OLS Regression Results                            
Dep. Variable:                   gini   R-squared:                       0.621
Model:                            OLS   Adj. R-squared:                  0.601
Method:                 Least Squares   F-statistic:                     31.15
Date:                Thu, 16 Jan 2025   Prob (F-statistic):           9.80e-09
Time:                        20:51:02   Log-Likelihood:                 124.81
No. Observations:                  41   AIC:                            -243.6
Df Residuals:                      38   BIC:                            -238.5
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        -17.8123      9.127     -1.952      0.0

**G = -17.81 + 3.98 y + -0.21 y^2; R² = 0.62**
<br>
**y := ln of gdp per capita**

### Graph

In [11]:
β0 = k_model.params['const']
β1 = k_model.params['y']
β2 = k_model.params['y2']

k_pred = pd.DataFrame({'y': np.linspace(kuznets['y'].min(), kuznets['y'].max(), 200)})
k_pred['gini'] = β0 + β1 * k_pred['y'] + β2 * k_pred['y'] ** 2

In [12]:
col_curve = '#286bbb'
col_acc = '#883039'

fc = '#595959'
fc_scnd = '#8c8c8c'
fc_title = '#262626'

sctr = go.Scatter(
    x=kuznets['y'],
    y=kuznets['gini'],
    mode='markers',
    marker=dict(color=col_curve, size=8),
    name='Data Points'
)

trnd = go.Scatter(
    x=k_pred['y'],
    y=k_pred['gini'],
    mode='lines',
    marker=dict(color=col_acc, size=8),
    name='Trend'
)

fig = go.Figure(data=[sctr, trnd])

fig.update_xaxes(title_text='ln GDP per capita (2017 USD PPP)')
fig.update_yaxes(title_text='Gini Coefficient')

fig.update_layout(
    title=dict(
        text='Growth and Inequality - The Global Trend (1980-2020)',
        font=dict(size=20, color=fc_title),
        xanchor='center', yanchor='top',
        x = 0.5, y=0.98
    ),
    template='seaborn', showlegend=False,
    width=1000, height=600,
    font=dict(family="Helvetica Now Text", color=fc, size=12),
    plot_bgcolor='rgba(234, 234, 243, 0.75)',
    margin=dict(t=40, b=75, l=50, r=40),
)

fig.add_annotation(
    text='Source: UNU-WIDER, World Income Inequality Database (WIID) Companion dataset. Version 15 May 2023 (1.0.1).',
    xref='paper', yref='paper', x=0, y=-0.11,
    xanchor='left', yanchor='top',
    font=dict(size=10, color=fc_scnd),
    align='left',
    showarrow=False
)

fig.show()

In [13]:
fig.write_image('../images/kuznets-ppp.png', scale=3, engine='kaleido')