In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.stattools import adfuller, coint
from statsmodels.stats.stattools import durbin_watson
from statsmodels.tsa.vector_ar.vecm import coint_johansen

# Import data CSV
data = pd.read_csv('data_ecm.csv', delimiter=';')
data = data.dropna()

# Hapus kolom 'year' sebelum normalisasi
data = data.drop(columns=['year'])

# Menampilkan data awal
print("Data Sebelum Normalisasi:\n", data.head())

# Inisialisasi scaler
scaler = MinMaxScaler()

# Melakukan normalisasi pada kolom yang diinginkan
data[['literacy_rate', 'monthly_health_exp']] = scaler.fit_transform(
    data[['literacy_rate', 'monthly_health_exp']]
)

# Menampilkan data setelah normalisasi
print("\nData Setelah Normalisasi:\n", data.head())

Data Sebelum Normalisasi:
    literacy_rate  monthly_health_exp
0          96.77              3306.0
1          97.85              3029.0
2          97.99              3191.0
3          97.81              4575.0
4          97.56              4914.0

Data Setelah Normalisasi:
    literacy_rate  monthly_health_exp
0       0.000000            0.004903
1       0.280578            0.000000
2       0.316949            0.002867
3       0.270186            0.027363
4       0.205238            0.033363


In [2]:
# Step 1: Uji Stasioneritas dengan Augmented Dickey-Fuller Test
def adf_test(series, title=''):
    print(f'== ADF Test for {title} ==')
    result = adfuller(series, autolag='AIC')
    print('ADF Statistic:', result[0])
    print('p-value:', result[1])
    print('Critical Values:', result[4])
    print('\n')
    return result[1]  # Return p-value

# Uji stasioneritas untuk setiap variabel
adf_test(data['literacy_rate'], 'literacy_rate')
adf_test(data['monthly_health_exp'], 'monthly_health_exp')

# Step 2: Uji Cointegrasi
# Uji Engle-Granger jika ada dua variabel
if len(data.columns) == 2:
    score, p_value, _ = coint(data['literacy_rate'], data['monthly_health_exp'])
    print(f'Engle-Granger Cointegration test p-value: {p_value}')

# Uji Johansen jika ada lebih dari dua variabel
elif len(data.columns) > 2:
    johansen_test = coint_johansen(data[['literacy_rate', 'monthly_health_exp']], det_order=0, k_ar_diff=1)
    print('Johansen Cointegration Test Trace Statistic:', johansen_test.lr1)
    print('Critical Values:', johansen_test.cvt)

# Step 3: Model Error Correction (ECM)
# Buat model regresi jangka panjang dan simpan residual sebagai Error Correction Term (ECT)
long_term_model = sm.OLS(data['monthly_health_exp'], sm.add_constant(data[['literacy_rate']])).fit()
print(long_term_model.summary())
data['ECT'] = long_term_model.resid  # Error Correction Term

# Differencing variabel untuk membuat ECM
data['delta_literacy_rate'] = data['literacy_rate'].diff()
data['delta_monthly_health_exp'] = data['monthly_health_exp'].diff()
data['ECT_lag'] = data['ECT'].shift(1)
data.dropna(inplace=True)

# Regresi ECM
ecm_model = sm.OLS(data['delta_monthly_health_exp'], sm.add_constant(data[['delta_literacy_rate', 'ECT_lag']])).fit()
print(ecm_model.summary())

# Step 4: Uji Asumsi Klasik
# Uji Autokorelasi (Durbin-Watson)
print("Durbin-Watson Test:", durbin_watson(ecm_model.resid))

# Uji Heteroskedastisitas (Breusch-Pagan)
from statsmodels.stats.diagnostic import het_breuschpagan
bp_test = het_breuschpagan(ecm_model.resid, ecm_model.model.exog)
print('Breusch-Pagan Test p-value:', bp_test[1])

# Uji Multikolinearitas (VIF)
from statsmodels.stats.outliers_influence import variance_inflation_factor
exog = sm.add_constant(data[['delta_literacy_rate', 'delta_monthly_health_exp']])
vif_data = pd.DataFrame()
vif_data['Feature'] = exog.columns
vif_data['VIF'] = [variance_inflation_factor(exog.values, i) for i in range(exog.shape[1])]
print(vif_data)

# Uji Stasioneritas pada variabel yang sudah di-difference
adf_test(data['delta_literacy_rate'], 'delta_literacy_rate')
adf_test(data['delta_monthly_health_exp'], 'delta_monthly_health_exp')

== ADF Test for literacy_rate ==
ADF Statistic: -1.0232075230800186
p-value: 0.7447060700933998
Critical Values: {'1%': -3.9240193847656246, '5%': -3.0684982031250003, '10%': -2.67389265625}


== ADF Test for monthly_health_exp ==
ADF Statistic: -0.4449855398751435
p-value: 0.9023125406105545
Critical Values: {'1%': -3.769732625845229, '5%': -3.005425537190083, '10%': -2.6425009917355373}


Engle-Granger Cointegration test p-value: 0.004586909073226506
                            OLS Regression Results                            
Dep. Variable:     monthly_health_exp   R-squared:                       0.773
Model:                            OLS   Adj. R-squared:                  0.763
Method:                 Least Squares   F-statistic:                     74.94
Date:                Tue, 26 Nov 2024   Prob (F-statistic):           1.55e-08
Time:                        12:11:00   Log-Likelihood:                 7.4403
No. Observations:                  24   AIC:                         

1.943611636824899e-09