In [148]:
import pandas as pd
import numpy as np
import re

import wbdata

from datetime import datetime
from sklearn import linear_model
import statsmodels.api as sm

In [172]:
### cross-country annual GDP regression with lumi

# import GDP data
dt = (datetime(2012, 1, 1), datetime(2019, 1, 1))
country_codes = ['CHN', 'USA', 'AUS', 'NZL', 'GBR', 'DEU', 'CHE', 'ITA', 'ESP']
# country_codes = 'all'
indicators = {'NY.GDP.MKTP.KD': 'GDP_constant'}
df_gdp_countries = wbdata.get_dataframe(indicators, country = country_codes, convert_date = False, data_date = dt)

data = df_gdp_countries.copy()

data.reset_index(level=0, inplace=True)
data.reset_index(level=0, inplace=True)

# import lumi data
country_names = data['country'].unique().tolist()

df_country_lumi = pd.DataFrame()

for country_name in country_names:
    try:
        
        df_tmp = pd.read_csv('country-lumi-data/%s.csv' % country_name)
        df_tmp['year'] = df_tmp['year_month'].apply(lambda x: str(x)[:4])
        df_tmp = df_tmp[['country_name', 'year', 'sum(lumi)']]

        df_tmp = df_tmp.groupby(['country_name','year']).sum()

        df_tmp.reset_index(level=0, inplace=True)
        df_tmp.reset_index(level=0, inplace=True)

        df_country_lumi = df_country_lumi.append(df_tmp)
        df_country_lumi = df_country_lumi[['country_name', 'year', 'sum(lumi)']]
    
    except Exception as e:
        print(country_name, e)

df_data_across_country = data.merge(left_on=['date', 'country'], right=df_country_lumi, right_on=['year', 'country_name'])

# data preparation

df_data_across_country.dropna(inplace=True)

satellite_X = np.array(df_data_across_country['sum(lumi)'][:]).reshape(len(df_data_across_country['sum(lumi)']), 1)
satellite_y = df_data_across_country['GDP_constant'][:]
# satellite_X.shape, satellite_y.shape

# regression

reg = linear_model.LinearRegression()
reg.fit(satellite_X, satellite_y)

print('R square:', reg.score(satellite_X, satellite_y))

satellite_X = sm.add_constant(satellite_X)
mod = sm.OLS(satellite_y, satellite_X)

res = mod.fit()
print(res.summary())

In [187]:
### China quarterly GDP regression with lumi

# import GDP data
df_China_quarterly_GDP = pd.read_csv('china-gdp-qt.csv')

In [210]:
int(((int(str(201812)[-2:]))-1)/3) + 1

4

In [215]:
# import quarterly lumi data
df_tmp = pd.read_csv('country-lumi-data/%s.csv' % 'China')
df_tmp['year'] = df_tmp['year_month'].apply(lambda x: int(str(x)[:4]))
df_tmp['quarter'] = df_tmp['year_month'].apply(lambda x: int(((int(str(x)[-2:]))-1)/3) + 1)
df_tmp = df_tmp[['year', 'quarter', 'sum(lumi)']]

df_tmp = df_tmp.groupby(['year', 'quarter']).sum()

df_tmp.reset_index(level=0, inplace=True)
df_tmp.reset_index(level=0, inplace=True)

df_China_quarterly_lumi = df_tmp.copy()

In [216]:
df_China_quarterly_data = df_China_quarterly_GDP.merge(right=df_China_quarterly_lumi, left_on=['year', 'quarter'], right_on=['year', 'quarter'])
df_China_quarterly_data

Unnamed: 0,year,quarter,GDP_constant_China,sum(lumi)
0,2018,3,214338.6,11253660.0
1,2018,2,205017.0,12331290.0
2,2018,1,184264.0,17941490.0
3,2017,4,219803.3,19200170.0
4,2017,3,201348.7,17679570.0
5,2017,2,192157.3,17451700.0
6,2017,1,172460.6,17126510.0
7,2016,4,205903.8,16922740.0
8,2016,3,188440.9,10542280.0
9,2016,2,179751.2,11321370.0


In [217]:
# data preparation

satellite_X = np.array(df_China_quarterly_data ['sum(lumi)'][:]).reshape(len(df_China_quarterly_data ['sum(lumi)']), 1)
satellite_y = df_China_quarterly_data['GDP_constant_China'][:]
# satellite_X.shape, satellite_y.shape

# regression

reg = linear_model.LinearRegression()
reg.fit(satellite_X, satellite_y)

print('R square:', reg.score(satellite_X, satellite_y))

satellite_X = sm.add_constant(satellite_X)
mod = sm.OLS(satellite_y, satellite_X)

res = mod.fit()
print(res.summary())

R square: 0.17394005168949134
                            OLS Regression Results                            
Dep. Variable:     GDP_constant_China   R-squared:                       0.174
Model:                            OLS   Adj. R-squared:                  0.122
Method:                 Least Squares   F-statistic:                     3.369
Date:                Mon, 17 Dec 2018   Prob (F-statistic):             0.0851
Time:                        17:37:12   Log-Likelihood:                -207.10
No. Observations:                  18   AIC:                             418.2
Df Residuals:                      16   BIC:                             420.0
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        1.22e+05 

  "anyway, n=%i" % int(n))
