In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

file_path = "/kaggle/input/global-air-pollution-dataset/global air pollution dataset.csv"
data = pd.read_csv(file_path)

data.replace([np.inf, -np.inf], np.nan, inplace=True)
data = data.dropna()

# Багатофакторна економетрична модель

In [2]:
X = data[['Ozone AQI Value', 'NO2 AQI Value', 'PM2.5 AQI Value']]
y = data['AQI Value']

X = sm.add_constant(X)

model = sm.OLS(y, X).fit()

print(model.summary())

                            OLS Regression Results                            
Dep. Variable:              AQI Value   R-squared:                       0.975
Model:                            OLS   Adj. R-squared:                  0.975
Method:                 Least Squares   F-statistic:                 2.971e+05
Date:                Fri, 05 Jul 2024   Prob (F-statistic):               0.00
Time:                        09:24:32   Log-Likelihood:                -83158.
No. Observations:               23035   AIC:                         1.663e+05
Df Residuals:                   23031   BIC:                         1.664e+05
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
const              -0.6437      0.112     

# Розрахунок еластичності

In [3]:
means = X.mean()
mean_y = y.mean()

elasticity = model.params * (means / mean_y)

significant_predictors = ['Ozone AQI Value', 'NO2 AQI Value', 'PM2.5 AQI Value']
elasticity = elasticity[significant_predictors]

print("Elasticity of significant predictors:")
print(elasticity)

Elasticity of significant predictors:
Ozone AQI Value    0.075847
NO2 AQI Value     -0.001498
PM2.5 AQI Value    0.934549
dtype: float64


 Додавання бінарної змінної для `AQI Category` (0 для «Добре», 1 для інших)

In [4]:

data['AQI Category Binary'] = np.where(data['AQI Category'] == 'Good', 0, 1)

X = data[['Ozone AQI Value', 'NO2 AQI Value', 'PM2.5 AQI Value', 'AQI Category Binary']]
X = sm.add_constant(X)

model = sm.OLS(y, X).fit()

print(model.summary())

                            OLS Regression Results                            
Dep. Variable:              AQI Value   R-squared:                       0.976
Model:                            OLS   Adj. R-squared:                  0.976
Method:                 Least Squares   F-statistic:                 2.326e+05
Date:                Fri, 05 Jul 2024   Prob (F-statistic):               0.00
Time:                        09:24:33   Log-Likelihood:                -82674.
No. Observations:               23035   AIC:                         1.654e+05
Df Residuals:                   23030   BIC:                         1.654e+05
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------
const                   0.1899    

# Повторний розрахунок еластичності

In [5]:
means = X.mean()
mean_y = y.mean()
elasticity = model.params * (means / mean_y)
elasticity = elasticity[['Ozone AQI Value', 'NO2 AQI Value', 'PM2.5 AQI Value']]

print("Elasticity with binary variable included:")
print(elasticity)

Elasticity with binary variable included:
Ozone AQI Value    0.078075
NO2 AQI Value      0.000934
PM2.5 AQI Value    0.954809
dtype: float64
