### Analisis Regresi Data Panel terhadap Faktor Penentu Pertumbuhan Inklusif di Berbagai Provinsi di Indonesia Selama 7 Tahun

#### Import Library

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy import stats 
from scipy.stats import f
from linearmodels.panel import compare
from linearmodels.panel import PanelOLS, RandomEffects
from statsmodels.api import OLS, add_constant
from statsmodels.stats.stattools import durbin_watson
from statsmodels.stats.diagnostic import het_breuschpagan
from statsmodels.stats.outliers_influence import variance_inflation_factor

#### Load Data and Set Dataset Index 

In [2]:
dataset = pd.read_excel("Inclusive Growth Determinants.xlsx")
dataset = dataset.set_index(['Province', 'Year'])
dataset

Unnamed: 0_level_0,Unnamed: 1_level_0,GRDP Capita,GRDP Employed,Health Complaint,Formal Sector,Unemployed Percent,HDI,Gov Spending,Gov CapEx,Poverty,Gini,Decent Sanitation,Clean Water,HS Graduation,Agriculture,Life Expectancy,GRDP Nominal,Population Thousand
Province,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
ACEH,2015,22524.31,53.990600,27.92,0.3950,8.830,69.45,37549.69,8553.03,17.095,0.3365,54.68,61.23,68.16,37598.849177,69.540,1.290927e+08,5018.7
ACEH,2016,22835.29,56.680773,25.78,0.4283,7.850,70.00,42180.53,9870.07,16.580,0.3370,62.68,63.31,74.46,40218.310849,69.565,1.368438e+08,5096.2
ACEH,2017,23362.90,56.179526,24.85,0.4042,6.980,70.60,43580.96,8920.15,16.405,0.3290,63.38,64.85,70.64,43363.354290,69.585,1.458069e+08,5189.5
ACEH,2018,24013.79,56.608468,30.18,0.4056,6.440,71.19,38767.10,6320.93,15.825,0.3215,67.09,66.48,70.68,46365.383165,69.700,1.559110e+08,5281.3
ACEH,2019,24842.30,55.897635,29.33,0.4288,5.825,71.90,45620.69,8743.71,15.165,0.3205,73.16,85.81,69.96,48438.862870,69.915,1.641630e+08,5371.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PAPUA,2017,45577.05,88.351497,16.53,0.2181,3.790,59.09,49176.39,9608.32,27.690,0.3975,33.06,59.09,33.82,22274.746393,65.225,1.889381e+08,3265.2
PAPUA,2018,48069.41,87.764036,17.60,0.2189,2.875,60.06,51086.73,10129.62,27.585,0.3910,33.75,58.35,29.56,23754.007686,65.445,2.106006e+08,3322.5
PAPUA,2019,40203.42,76.102458,15.20,0.2071,3.365,60.84,51993.33,9839.85,27.040,0.3905,38.27,60.85,27.44,24177.230293,65.690,1.895107e+08,3379.3
PAPUA,2020,32108.51,77.630270,16.27,0.2008,3.850,60.44,47564.27,6401.15,26.720,0.3935,40.31,62.73,30.92,24457.093241,65.835,1.992329e+08,4303.7


#### Variabel Dependen dan Independen

In [3]:
# Variabel dependen
y = dataset['Population Thousand']

# Menambahkan variabel-variabel ke X
X = dataset[['GRDP Capita', 'GRDP Employed', 'Health Complaint', 'Formal Sector', 
             'Unemployed Percent', 'HDI', 'Gov Spending', 'Gov CapEx', 
             'Poverty', 'Gini', 'Decent Sanitation', 'Clean Water', 
             'HS Graduation', 'Agriculture', 'Life Expectancy', 'GRDP Nominal']]

# Menambahkan konstanta intercept ke model
X = sm.add_constant(X)

#### Uji Heteroskedastisitas Data Asli

In [4]:
model = sm.OLS(y, X).fit()
bp_test = het_breuschpagan(model.resid, X)

# Menampilkan hasil uji
print("--- Uji Heteroskedastisitas ---\n")
labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
results = dict(zip(labels, bp_test))
for key, value in results.items():
    print(f"{key}: {value}")

--- Uji Heteroskedastisitas ---

Lagrange multiplier statistic: 52.85590436806245
p-value: 7.962668873629385e-06
f-value: 3.943264712773936
f p-value: 1.533299875433917e-06


#### Transformasi Data

In [5]:
dataset_transformed = dataset.copy()

# Transformasi log pada variabel target
dataset_transformed['Population Thousand'] = np.log(dataset['Population Thousand'])

# Variabel yang akan ditransformasi log
log_columns = ['GRDP Capita', 'GRDP Employed', 'Health Complaint', 'Formal Sector', 
               'Unemployed Percent', 'HDI', 'Gov Spending', 'Gov CapEx', 
               'Poverty', 'Gini', 'Decent Sanitation', 'Clean Water', 
               'HS Graduation', 'Agriculture', 'Life Expectancy', 'GRDP Nominal']

# Transformasi log pada variabel-variabel independen
dataset_transformed[log_columns] = np.log(dataset[log_columns] + 1)

# Variabel dependen dan independen setelah transformasi
y_transformed = dataset_transformed['Population Thousand']
X_transformed = dataset_transformed[log_columns]
X_transformed = sm.add_constant(X_transformed)

# Menampilkan dataset hasil transformasi
dataset_transformed

Unnamed: 0_level_0,Unnamed: 1_level_0,GRDP Capita,GRDP Employed,Health Complaint,Formal Sector,Unemployed Percent,HDI,Gov Spending,Gov CapEx,Poverty,Gini,Decent Sanitation,Clean Water,HS Graduation,Agriculture,Life Expectancy,GRDP Nominal,Population Thousand
Province,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
ACEH,2015,10.022395,4.007162,3.364533,0.332894,2.285439,4.254903,10.533447,9.054158,2.895636,0.290054,4.019621,4.130837,4.236423,10.534755,4.256180,18.676041,8.520926
ACEH,2016,10.036106,4.054924,3.287655,0.356485,2.180417,4.262680,10.649738,9.197364,2.866762,0.290428,4.153871,4.163715,4.323603,10.602103,4.256534,18.734351,8.536250
ACEH,2017,10.058947,4.046196,3.252311,0.339468,2.076938,4.271095,10.682399,9.096180,2.856758,0.284427,4.164803,4.187379,4.271654,10.677393,4.256818,18.797794,8.554393
ACEH,2018,10.086425,4.053670,3.439777,0.340464,2.006871,4.279302,10.565353,8.751780,2.822866,0.278767,4.220830,4.211831,4.272212,10.744330,4.258446,18.864796,8.571928
ACEH,2019,10.120343,4.041254,3.412137,0.356835,1.920592,4.289089,10.728139,9.076204,2.782848,0.278010,4.306225,4.463722,4.262116,10.788078,4.261482,18.916370,8.588862
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PAPUA,2017,10.727182,4.492578,2.863914,0.197292,1.566530,4.095843,10.803189,9.170489,3.356549,0.334685,3.528124,4.095843,3.550192,10.011254,4.193058,19.056930,8.091076
PAPUA,2018,10.780422,4.485982,2.923162,0.197949,1.354546,4.111857,10.841300,9.223318,3.352882,0.330023,3.548180,4.083452,3.419692,10.075549,4.196375,19.165474,8.108473
PAPUA,2019,10.601732,4.345135,2.785011,0.188221,1.473618,4.124550,10.858890,9.194297,3.333632,0.329663,3.670461,4.124712,3.347797,10.093208,4.200055,19.059956,8.125424
PAPUA,2020,10.376908,4.364757,2.848971,0.182988,1.578979,4.118061,10.769858,8.764389,3.322154,0.331819,3.721105,4.154655,3.463233,10.104716,4.202227,19.109985,8.367230


#### Uji Heteroskedastisitas setelah transformasi

In [6]:
model_log = sm.OLS(y_transformed, X_transformed).fit()
bp_test_log = het_breuschpagan(model_log.resid, X_transformed)

# Menampilkan hasil uji
print("--- Uji Heteroskedastisitas Setelah Transform Data ---\n")
labels = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
results = dict(zip(labels, bp_test_log))
for key, value in results.items():
    print(f"{key}: {value}")

--- Uji Heteroskedastisitas Setelah Transform Data ---

Lagrange multiplier statistic: 22.78211600257567
p-value: 0.11968436502791945
f-value: 1.462136749236613
f p-value: 0.1155979864897042


#### Uji Chow

In [7]:
def chow_test(df, y_col, x_cols):
    df_period1 = df[df.index.get_level_values('Year') > 2018]
    df_period2 = df[df.index.get_level_values('Year') < 2021]

    X1, y1 = df_period1[x_cols], df_period1[y_col]
    X2, y2 = df_period2[x_cols], df_period2[y_col]

    X_combined = pd.concat([X1, X2])
    y_combined = pd.concat([y1, y2])

    model_combined = sm.OLS(y_combined, sm.add_constant(X_combined)).fit()
    model_1 = sm.OLS(y1, sm.add_constant(X1)).fit()
    model_2 = sm.OLS(y2, sm.add_constant(X2)).fit()

    RSS_combined = model_combined.ssr
    RSS_1 = model_1.ssr
    RSS_2 = model_2.ssr

    n1, n2 = len(y1), len(y2)
    k = X1.shape[1] + 1  

    # Menghitung F-statistic
    F_stat = ((RSS_combined - (RSS_1 + RSS_2)) / k) / ((RSS_1 + RSS_2) / (n1 + n2 - 2 * k))

    # Menghitung p-value
    df1 = k  
    df2 = (n1 + n2 - 2 * k) 
    p_value = 1 - f.cdf(F_stat, df1, df2)

    return F_stat, p_value, df1, df2

# Fungsi uji Chow
F_stat, p_value, df1, df2 = chow_test(
    dataset_transformed, 
    y_col='Population Thousand', 
    x_cols=['GRDP Capita', 'GRDP Employed', 'Health Complaint', 'Formal Sector', 
             'Unemployed Percent', 'HDI', 'Gov Spending', 'Gov CapEx', 
             'Poverty', 'Gini', 'Decent Sanitation', 'Clean Water', 
             'HS Graduation', 'Agriculture', 'Life Expectancy', 'GRDP Nominal']
)

print("--- Uji Chow ---\n")

# Hasil uji Chow
alpha = 0.05
print(f"F-Statistic: {F_stat}")
print(f"P-Value: {p_value}")

if p_value <= alpha:
    print("Terdapat dua model yang memiliki perbedaan signifikan.") 
    print("Hipotesis nol (H0) ditolak, langkah selanjutnya lakukan Uji Hausman.")
else:
    print("Tidak terdapat perbedaan signifikan antara dua model.") 
    print("Hipotesis nol (H0) gagal ditolak, langkah selanjutnya lakukan Uji Breusch-Pagan Lagrange Multiplier (LM).")

--- Uji Chow ---

F-Statistic: 1.7961939980640693
P-Value: 0.028389707322297508
Terdapat dua model yang memiliki perbedaan signifikan.
Hipotesis nol (H0) ditolak, langkah selanjutnya lakukan Uji Hausman.


#### Uji Hausman

In [8]:
fixed_effects_model = PanelOLS(y_transformed, X_transformed, entity_effects=True)
fixed_results = fixed_effects_model.fit()

random_effects_model = RandomEffects(y_transformed, X_transformed)
random_results = random_effects_model.fit()

fixed_coefficients = fixed_results.params
random_coefficients = random_results.params
fixed_cov_matrix = fixed_results.cov
random_cov_matrix = random_results.cov

diff_coeff = fixed_coefficients - random_coefficients
diff_cov = fixed_cov_matrix + random_cov_matrix
hausman_stat = np.dot(diff_coeff.T, np.linalg.inv(diff_cov).dot(diff_coeff))

df = len(fixed_coefficients)
p_value = 1 - stats.chi2.cdf(hausman_stat, df)

print("--- Uji Hausman ---\n")
print("Hausman Statistic:", hausman_stat)
print("p-value:", p_value)

alpha = 0.05
if p_value < alpha:
    print(f"H0 ditolak (p-value = {p_value:.4f}), Fixed Effect Model lebih sesuai.")
else:
    print(f"H0 diterima (p-value = {p_value:.4f}), Random Effect Model lebih sesuai.")

--- Uji Hausman ---

Hausman Statistic: 286.1856831942833
p-value: 0.0
H0 ditolak (p-value = 0.0000), Fixed Effect Model lebih sesuai.


#### Fixed Effect Model

In [9]:
fixed_effects_model = PanelOLS(y_transformed, X_transformed, entity_effects=True)

# Estimasi model
fixed_res = fixed_effects_model.fit()

# Hasil model Fixed Effects
print("\nHasil Fixed Effect Model:")
print(fixed_res)


Hasil Fixed Effect Model:
                           PanelOLS Estimation Summary                           
Dep. Variable:     Population Thousand   R-squared:                        0.8784
Estimator:                    PanelOLS   R-squared (Between):              0.6067
No. Observations:                  238   R-squared (Within):               0.8784
Date:                 Fri, Nov 22 2024   R-squared (Overall):              0.6072
Time:                         22:50:05   Log-likelihood                    679.80
Cov. Estimator:             Unadjusted                                           
                                         F-statistic:                      84.850
Entities:                           34   P-value                           0.0000
Avg Obs:                        7.0000   Distribution:                  F(16,188)
Min Obs:                        7.0000                                           
Max Obs:                        7.0000   F-statistic (robust):         

#### Evaluasi Signifikansi

In [10]:
significance_level = 0.05

# Variabel yang signifikan
significant_vars = fixed_results.pvalues[fixed_results.pvalues < significance_level]
significant_df = pd.DataFrame({
    'Variable': significant_vars.index,
    'P-Value': significant_vars.values
}).sort_values(by='P-Value')

# Variabel yang tidak signifikan
insignificant_vars = fixed_results.pvalues[fixed_results.pvalues >= significance_level]
insignificant_df = pd.DataFrame({
    'Variable': insignificant_vars.index,
    'P-Value': insignificant_vars.values
}).sort_values(by='P-Value')

print("--- Variabel Signifikan pada Tingkat Signifikansi 5% ---")
print(significant_df.to_string(index=False))

print("\n--- Variabel Tidak Signifikan pada Tingkat Signifikansi 5% ---")
print(insignificant_df.to_string(index=False))

--- Variabel Signifikan pada Tingkat Signifikansi 5% ---
     Variable  P-Value
  GRDP Capita 0.000000
 GRDP Nominal 0.000000
GRDP Employed 0.000115
  Agriculture 0.001998
          HDI 0.022175

--- Variabel Tidak Signifikan pada Tingkat Signifikansi 5% ---
          Variable  P-Value
  Health Complaint 0.183551
   Life Expectancy 0.197763
 Decent Sanitation 0.272916
             const 0.372745
           Poverty 0.405781
Unemployed Percent 0.539102
         Gov CapEx 0.625439
      Gov Spending 0.818999
              Gini 0.832107
     HS Graduation 0.866768
     Formal Sector 0.944755
       Clean Water 0.983581
