# Econometric UNFCCC Green Cities Commitment Analysis: USA
## Econometric Analysis

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.ensemble import RandomForestClassifier
from io import StringIO

### 1. Load and Prepare Data

In [2]:
df_all = pd.read_csv('allcities.csv')
df_unfccc = pd.read_csv('UNFCCC.csv')

In [3]:
def BooltoZeroOne(df):
    for column in df.columns:
        if df[column].dtype == bool:
            df[column] = df[column].astype(int)

In [4]:
BooltoZeroOne(df_all)
BooltoZeroOne(df_unfccc)

In [5]:
independent_variables = ['population','redCounty','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome','medianAge','whiteProp'] # 'redState' remove to deal with multicolinearity issues

### 2. Define Model Fitting Functions

In [6]:
def FitLogistic(df, y_var, X_vars, format = 'return', constant = True):
    df = df.dropna()

    X = df[X_vars]
    
    if constant:
        X = sm.add_constant(X)

    y = df[y_var]

    model = sm.Logit(y, X).fit(cov_type = 'HC3', disp = False)

    if format == 'return':
        return model.summary()
    elif format == 'print':
        print(model.summary())
    elif format == 'model':
        return model
    elif format == 'csv':
        print(model.summary())
        summary = pd.read_html(StringIO(model.summary().tables[1].as_html()), header = 0, index_col = 0)[0]
        summary.to_csv(f'Results/LR_{y_var}_FM.csv')

In [7]:
def FitRandomForest(df, y_var, X_vars, format = 'importance'):
    df = df.dropna()

    X = df[X_vars]

    y = df[y_var]

    rf = RandomForestClassifier()
    model = rf.fit(X, y)

    if format == 'importance':

        return pd.DataFrame({
            'Feature':X_vars,
            'Importance':model.feature_importances_
        }).sort_values(by = 'Importance', ascending = False)
    
    elif format == 'model':
        return model

In [8]:
def ForwardSelection(df, y_var, vars_to_check, format = 'return'):
    best_model = None
    best_aic = float('inf')

    vars_selected = []
    v = vars_to_check.copy()

    while v:
        potential_vars = []
        for var in v:
            vars_current = vars_selected + [var]
            model = FitLogistic(df, y_var, vars_current, format = 'model')
            aic = model.aic
            if aic < best_aic:
                best_aic = aic
                best_model = model
                potential_vars = vars_current
        
        if not potential_vars:
            break

        var_selected = potential_vars[-1]
        vars_selected.append(var_selected)
        v.remove(var_selected)

    print(' ')
    print(f'Best AIC: {best_aic}')
    print(' ')
    
    if format == 'return':
        return best_model.summary()
    elif format == 'print':
        print(best_model.summary())
    elif format == 'model':
        return best_model
    elif format == 'csv':
        print(best_model.summary())
        summary = pd.read_html(StringIO(best_model.summary().tables[1].as_html()), header = 0, index_col = 0)[0] 
        summary.to_csv(f'Results/LR_{y_var}_FS.csv')

### 3. Analysing Determinants of Cities to Sign up to the UNFCC

##### a) Logistic Regression

In [9]:
# All variables on their own versus UNFCCC
df = pd.DataFrame(columns = ['Variable','Coefficient','P-Value','AIC','BIC'])

for i in range(len(independent_variables)):
    lr = FitLogistic(df_all, y_var = 'UNFCCC', X_vars = [independent_variables[i]], format = 'model')
    df = pd.concat([df, pd.DataFrame([[independent_variables[i], lr.params.iloc[1], lr.pvalues.iloc[1], lr.aic, lr.bic]], columns =  ['Variable','Coefficient','P-Value','AIC','BIC']).dropna(axis = 1)], ignore_index=True)    
df

  df = pd.concat([df, pd.DataFrame([[independent_variables[i], lr.params.iloc[1], lr.pvalues.iloc[1], lr.aic, lr.bic]], columns =  ['Variable','Coefficient','P-Value','AIC','BIC']).dropna(axis = 1)], ignore_index=True)


Unnamed: 0,Variable,Coefficient,P-Value,AIC,BIC
0,population,5e-06,7.914043e-07,1633.0582,1648.228343
1,redCounty,-2.241665,1.4957209999999998e-19,1948.025817,1963.195959
2,unemploymentRate,-19.445843,0.002839589,2082.614319,2097.784461
3,povertyProp,-1.460479,0.2475538,2089.316769,2104.486912
4,tempDiff,-0.136376,0.03557506,2086.179931,2101.350074
5,numDisasters,0.002648,0.00190895,2080.883574,2096.053717
6,avgEmissionsPerCapita,-0.081271,2.026106e-06,2046.253409,2061.423552
7,lessThanHighSchoolProp,-4.10023,0.007179528,2083.070827,2098.24097
8,medianHouseholdIncome,1.7e-05,3.400496e-11,2063.615413,2078.785556
9,medianAge,-0.089444,4.732101e-12,2059.208915,2074.379057


In [10]:
# All variables versus UNFCCC
FitLogistic(df_all, y_var = 'UNFCCC', X_vars = independent_variables, format = 'csv')

                           Logit Regression Results                           
Dep. Variable:                 UNFCCC   No. Observations:                14546
Model:                          Logit   Df Residuals:                    14534
Method:                           MLE   Df Model:                           11
Date:                Tue, 23 Apr 2024   Pseudo R-squ.:                  0.2872
Time:                        15:27:46   Log-Likelihood:                -743.53
converged:                       True   LL-Null:                       -1043.1
Covariance Type:                  HC3   LLR p-value:                2.089e-121
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -0.3804      1.780     -0.214      0.831      -3.869       3.108
population              5.005e-06   1.13e-06      4.438      0.000    2.79e-06    7.22e-06
redC

In [11]:
ForwardSelection(df = df_all, y_var = 'UNFCCC', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 1505.1750879131946
 
                           Logit Regression Results                           
Dep. Variable:                 UNFCCC   No. Observations:                14546
Model:                          Logit   Df Residuals:                    14537
Method:                           MLE   Df Model:                            8
Date:                Tue, 23 Apr 2024   Pseudo R-squ.:                  0.2872
Time:                        15:27:47   Log-Likelihood:                -743.59
converged:                       True   LL-Null:                       -1043.1
Covariance Type:                  HC3   LLR p-value:                3.689e-124
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -0.2816      0.904     -0.311      0.755      -2.053       1.490
population              4.996e-06   1.12e-06      4.459      0

##### b) Random Forest Classification

In [12]:
FitRandomForest(df_all, y_var = 'UNFCCC', X_vars = independent_variables)  

Unnamed: 0,Feature,Importance
0,population,0.632825
10,whiteProp,0.055952
7,lessThanHighSchoolProp,0.052133
9,medianAge,0.049217
8,medianHouseholdIncome,0.045786
3,povertyProp,0.041056
2,unemploymentRate,0.037487
6,avgEmissionsPerCapita,0.02857
4,tempDiff,0.024352
5,numDisasters,0.022865


### 4. Analysing Determinants of Cities to Undertake Individual Actions Recorded by the UNFCCC

#### i) Has Commitments

##### a) Logistic Regression

In [13]:
FitLogistic(df_unfccc, y_var = 'hasCommitments', X_vars = independent_variables, format = 'csv')

                           Logit Regression Results                           
Dep. Variable:         hasCommitments   No. Observations:                  197
Model:                          Logit   Df Residuals:                      185
Method:                           MLE   Df Model:                           11
Date:                Tue, 23 Apr 2024   Pseudo R-squ.:                  0.1658
Time:                        15:27:49   Log-Likelihood:                -96.513
converged:                       True   LL-Null:                       -115.70
Covariance Type:                  HC3   LLR p-value:                 6.769e-05
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                      7.1363      4.706      1.516      0.129      -2.087      16.360
population              1.677e-06   5.22e-07      3.209      0.001    6.53e-07     2.7e-06
redC

In [14]:
ForwardSelection(df = df_unfccc, y_var = 'hasCommitments', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 205.95133559257675
 
                           Logit Regression Results                           
Dep. Variable:         hasCommitments   No. Observations:                  197
Model:                          Logit   Df Residuals:                      193
Method:                           MLE   Df Model:                            3
Date:                Tue, 23 Apr 2024   Pseudo R-squ.:                  0.1445
Time:                        15:27:49   Log-Likelihood:                -98.976
converged:                       True   LL-Null:                       -115.70
Covariance Type:                  HC3   LLR p-value:                 2.592e-07
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                      0.2236      0.900      0.249      0.804      -1.539       1.987
population              1.886e-06   5.46e-07      3.454      0

##### b) Random Forest Classification

In [15]:
FitRandomForest(df_unfccc, y_var = 'hasCommitments', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.250431
9,medianAge,0.122753
8,medianHouseholdIncome,0.106255
7,lessThanHighSchoolProp,0.09579
3,povertyProp,0.094591
10,whiteProp,0.077448
2,unemploymentRate,0.071777
6,avgEmissionsPerCapita,0.06575
4,tempDiff,0.054904
5,numDisasters,0.051215


#### ii) Has Actions Undertaken

##### a) Logistic Regression

In [16]:
FitLogistic(df_unfccc, y_var = 'hasActionsUndertaken', X_vars = independent_variables, format = 'csv')

                            Logit Regression Results                            
Dep. Variable:     hasActionsUndertaken   No. Observations:                  197
Model:                            Logit   Df Residuals:                      185
Method:                             MLE   Df Model:                           11
Date:                  Tue, 23 Apr 2024   Pseudo R-squ.:                  0.3307
Time:                          15:27:49   Log-Likelihood:                -66.531
converged:                         True   LL-Null:                       -99.405
Covariance Type:                    HC3   LLR p-value:                 7.787e-10
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                      2.4800      6.182      0.401      0.688      -9.637      14.597
population              6.638e-06   1.66e-06      3.999      0.000    3.38e-06 

In [17]:
ForwardSelection(df = df_unfccc, y_var = 'hasActionsUndertaken', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 148.92822282906616
 
                            Logit Regression Results                            
Dep. Variable:     hasActionsUndertaken   No. Observations:                  197
Model:                            Logit   Df Residuals:                      192
Method:                             MLE   Df Model:                            4
Date:                  Tue, 23 Apr 2024   Pseudo R-squ.:                  0.3012
Time:                          15:27:49   Log-Likelihood:                -69.464
converged:                         True   LL-Null:                       -99.405
Covariance Type:                    HC3   LLR p-value:                 3.071e-12
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                     7.7181      2.605      2.963      0.003       2.613      12.824
population             6.435e-06   1.76e-06      

##### b) Random Forest Classification

In [18]:
FitRandomForest(df_unfccc, y_var = 'hasActionsUndertaken', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.259076
8,medianHouseholdIncome,0.131483
9,medianAge,0.115843
10,whiteProp,0.09504
3,povertyProp,0.094828
7,lessThanHighSchoolProp,0.086245
2,unemploymentRate,0.059896
4,tempDiff,0.052598
6,avgEmissionsPerCapita,0.052469
5,numDisasters,0.046086


#### iii) hasEmissionInventory

##### a) Logistic Regression

In [19]:
FitLogistic(df_unfccc, y_var = 'hasEmissionInventory', X_vars = independent_variables, format = 'csv')

                            Logit Regression Results                            
Dep. Variable:     hasEmissionInventory   No. Observations:                  197
Model:                            Logit   Df Residuals:                      185
Method:                             MLE   Df Model:                           11
Date:                  Tue, 23 Apr 2024   Pseudo R-squ.:                  0.1962
Time:                          15:27:49   Log-Likelihood:                -108.86
converged:                         True   LL-Null:                       -135.43
Covariance Type:                    HC3   LLR p-value:                 1.700e-07
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -8.3588      3.906     -2.140      0.032     -16.014      -0.704
population              1.287e-06   3.48e-07      3.692      0.000    6.04e-07 

In [20]:
ForwardSelection(df = df_unfccc, y_var = 'hasEmissionInventory', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 230.9334041402301
 
                            Logit Regression Results                            
Dep. Variable:     hasEmissionInventory   No. Observations:                  197
Model:                            Logit   Df Residuals:                      192
Method:                             MLE   Df Model:                            4
Date:                  Tue, 23 Apr 2024   Pseudo R-squ.:                  0.1843
Time:                          15:27:49   Log-Likelihood:                -110.47
converged:                         True   LL-Null:                       -135.43
Covariance Type:                    HC3   LLR p-value:                 3.746e-10
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -6.5556      1.785     -3.673      0.000     -10.054      -3.057
population              1.268e-06   3.22e-07   

##### b) Random Forest Classification

In [21]:
FitRandomForest(df_unfccc, y_var = 'hasEmissionInventory', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.267751
10,whiteProp,0.112462
9,medianAge,0.099641
7,lessThanHighSchoolProp,0.09886
8,medianHouseholdIncome,0.084965
3,povertyProp,0.081718
2,unemploymentRate,0.078618
5,numDisasters,0.057819
6,avgEmissionsPerCapita,0.05184
4,tempDiff,0.050479


#### iv) hasInitiativeParticipations

##### a) Logistic Regression

In [22]:
FitLogistic(df_unfccc, y_var = 'hasInitiativeParticipations', X_vars = independent_variables, format = 'csv')

                                Logit Regression Results                               
Dep. Variable:     hasInitiativeParticipations   No. Observations:                  197
Model:                                   Logit   Df Residuals:                      185
Method:                                    MLE   Df Model:                           11
Date:                         Tue, 23 Apr 2024   Pseudo R-squ.:                  0.1189
Time:                                 15:27:49   Log-Likelihood:                -81.201
converged:                                True   LL-Null:                       -92.163
Covariance Type:                           HC3   LLR p-value:                   0.02496
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -7.0407      5.001     -1.408      0.159     -16.842       2.761
population             

In [23]:
ForwardSelection(df = df_unfccc, y_var = 'hasInitiativeParticipations', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 178.22869681211463
 
                                Logit Regression Results                               
Dep. Variable:     hasInitiativeParticipations   No. Observations:                  197
Model:                                   Logit   Df Residuals:                      194
Method:                                    MLE   Df Model:                            2
Date:                         Tue, 23 Apr 2024   Pseudo R-squ.:                 0.06563
Time:                                 15:27:50   Log-Likelihood:                -86.114
converged:                                True   LL-Null:                       -92.163
Covariance Type:                           HC3   LLR p-value:                  0.002360
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.5515      0.248      6.266      0.000       1.066       2.037
redCounty     -0.9902     

##### b) Random Forest Classification

In [24]:
FitRandomForest(df_unfccc, y_var = 'hasInitiativeParticipations', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.232991
10,whiteProp,0.109625
8,medianHouseholdIncome,0.101621
9,medianAge,0.088793
2,unemploymentRate,0.079446
6,avgEmissionsPerCapita,0.077014
7,lessThanHighSchoolProp,0.073682
3,povertyProp,0.071322
5,numDisasters,0.068455
4,tempDiff,0.063616


#### v) hasImpact

##### a) Logistic Regression

In [25]:
FitLogistic(df_unfccc, y_var = 'hasImpact', X_vars = independent_variables, format = 'csv')

                           Logit Regression Results                           
Dep. Variable:              hasImpact   No. Observations:                  197
Model:                          Logit   Df Residuals:                      185
Method:                           MLE   Df Model:                           11
Date:                Tue, 23 Apr 2024   Pseudo R-squ.:                  0.1392
Time:                        15:27:50   Log-Likelihood:                -34.042
converged:                       True   LL-Null:                       -39.548
Covariance Type:                  HC3   LLR p-value:                    0.4422
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -9.0717      6.143     -1.477      0.140     -21.111       2.968
population              4.557e-08   1.71e-07      0.266      0.790    -2.9e-07    3.81e-07
redC

In [26]:
ForwardSelection(df = df_unfccc, y_var = 'hasImpact', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 78.25775360746783
 
                           Logit Regression Results                           
Dep. Variable:              hasImpact   No. Observations:                  197
Model:                          Logit   Df Residuals:                      193
Method:                           MLE   Df Model:                            3
Date:                Tue, 23 Apr 2024   Pseudo R-squ.:                  0.1117
Time:                        15:27:50   Log-Likelihood:                -35.129
converged:                       True   LL-Null:                       -39.548
Covariance Type:                  HC3   LLR p-value:                   0.03152
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -4.7994      2.083     -2.304      0.021      -8.881      -0.717
numDisasters               0.0132      0.005      2.471      0.

##### b) Random Forest Classification

In [27]:
FitRandomForest(df_unfccc, y_var = 'hasImpact', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.255821
3,povertyProp,0.115115
9,medianAge,0.111488
2,unemploymentRate,0.101347
7,lessThanHighSchoolProp,0.095263
8,medianHouseholdIncome,0.087605
10,whiteProp,0.085188
5,numDisasters,0.054425
6,avgEmissionsPerCapita,0.047653
4,tempDiff,0.040428


#### vi) hasMitigations

##### a) Logistic Regression

In [28]:
FitLogistic(df_unfccc, y_var = 'hasMitigations', X_vars = independent_variables, format = 'csv')

                           Logit Regression Results                           
Dep. Variable:         hasMitigations   No. Observations:                  197
Model:                          Logit   Df Residuals:                      185
Method:                           MLE   Df Model:                           11
Date:                Tue, 23 Apr 2024   Pseudo R-squ.:                  0.2127
Time:                        15:27:50   Log-Likelihood:                -81.387
converged:                       True   LL-Null:                       -103.37
Covariance Type:                  HC3   LLR p-value:                 7.373e-06
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                      1.5733      5.734      0.274      0.784      -9.665      12.812
population              1.403e-06   1.02e-06      1.369      0.171   -6.05e-07    3.41e-06
redC

In [29]:
ForwardSelection(df = df_unfccc, y_var = 'hasMitigations', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 178.71769864766372
 
                           Logit Regression Results                           
Dep. Variable:         hasMitigations   No. Observations:                  197
Model:                          Logit   Df Residuals:                      192
Method:                           MLE   Df Model:                            4
Date:                Tue, 23 Apr 2024   Pseudo R-squ.:                  0.1839
Time:                        15:27:50   Log-Likelihood:                -84.359
converged:                       True   LL-Null:                       -103.37
Covariance Type:                  HC3   LLR p-value:                 1.110e-07
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                     2.4879      2.454      1.014      0.311      -2.322       7.298
population             1.439e-06   9.97e-07      1.444      0.149

##### b) Random Forest Classification

In [30]:
FitRandomForest(df_unfccc, y_var = 'hasMitigations', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.241399
8,medianHouseholdIncome,0.132779
9,medianAge,0.131792
10,whiteProp,0.092994
3,povertyProp,0.081019
7,lessThanHighSchoolProp,0.079659
2,unemploymentRate,0.069303
6,avgEmissionsPerCapita,0.058839
4,tempDiff,0.053483
5,numDisasters,0.050766


#### vii) hasAdaptations

##### a) Logistic Regression

In [31]:
FitLogistic(df_unfccc, y_var = 'hasAdaptations', X_vars = independent_variables, format = 'csv')

                           Logit Regression Results                           
Dep. Variable:         hasAdaptations   No. Observations:                  197
Model:                          Logit   Df Residuals:                      185
Method:                           MLE   Df Model:                           11
Date:                Tue, 23 Apr 2024   Pseudo R-squ.:                  0.1611
Time:                        15:27:50   Log-Likelihood:                -102.26
converged:                       True   LL-Null:                       -121.91
Covariance Type:                  HC3   LLR p-value:                 4.730e-05
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                      0.7971      4.548      0.175      0.861      -8.116       9.710
population              1.269e-06   8.48e-07      1.497      0.134   -3.93e-07    2.93e-06
redC

In [32]:
ForwardSelection(df = df_unfccc, y_var = 'hasAdaptations', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 219.6236800370598
 
                           Logit Regression Results                           
Dep. Variable:         hasAdaptations   No. Observations:                  197
Model:                          Logit   Df Residuals:                      193
Method:                           MLE   Df Model:                            3
Date:                Tue, 23 Apr 2024   Pseudo R-squ.:                  0.1320
Time:                        15:27:50   Log-Likelihood:                -105.81
converged:                       True   LL-Null:                       -121.91
Covariance Type:                  HC3   LLR p-value:                 4.773e-07
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                    -3.1342      1.185     -2.644      0.008      -5.457      -0.811
population             1.246e-06   7.57e-07      1.645      0.100 

##### b) Random Forest Classification

In [33]:
FitRandomForest(df_unfccc, y_var = 'hasAdaptations', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.270269
8,medianHouseholdIncome,0.113429
3,povertyProp,0.094731
10,whiteProp,0.089939
9,medianAge,0.088608
7,lessThanHighSchoolProp,0.08087
2,unemploymentRate,0.069806
6,avgEmissionsPerCapita,0.059525
4,tempDiff,0.058631
5,numDisasters,0.051789


#### viii) hasRiskAssessments

##### a) Logistic Regression

In [34]:
FitLogistic(df_unfccc, y_var = 'hasRiskAssessments', X_vars = independent_variables, format = 'csv')

                           Logit Regression Results                           
Dep. Variable:     hasRiskAssessments   No. Observations:                  197
Model:                          Logit   Df Residuals:                      185
Method:                           MLE   Df Model:                           11
Date:                Tue, 23 Apr 2024   Pseudo R-squ.:                  0.1471
Time:                        15:27:51   Log-Likelihood:                -116.20
converged:                       True   LL-Null:                       -136.24
Covariance Type:                  HC3   LLR p-value:                 3.461e-05
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -3.8236      4.141     -0.923      0.356     -11.940       4.293
population              8.474e-07   3.02e-07      2.808      0.005    2.56e-07    1.44e-06
redC

In [35]:
ForwardSelection(df = df_unfccc, y_var = 'hasRiskAssessments', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 245.06126170005405
 
                           Logit Regression Results                           
Dep. Variable:     hasRiskAssessments   No. Observations:                  197
Model:                          Logit   Df Residuals:                      194
Method:                           MLE   Df Model:                            2
Date:                Tue, 23 Apr 2024   Pseudo R-squ.:                  0.1227
Time:                        15:27:51   Log-Likelihood:                -119.53
converged:                       True   LL-Null:                       -136.24
Covariance Type:                  HC3   LLR p-value:                 5.521e-08
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                    -2.7077      0.680     -3.984      0.000      -4.040      -1.376
population             8.857e-07   3.04e-07      2.914      0.004

##### b) Random Forest Classification

In [36]:
FitRandomForest(df_unfccc, y_var = 'hasRiskAssessments', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.257498
8,medianHouseholdIncome,0.11215
9,medianAge,0.09482
10,whiteProp,0.090696
7,lessThanHighSchoolProp,0.088147
3,povertyProp,0.080081
2,unemploymentRate,0.074429
4,tempDiff,0.066834
5,numDisasters,0.061199
6,avgEmissionsPerCapita,0.060759


#### ix) hasClimateActionPlans

##### a) Logistic Regression

In [37]:
FitLogistic(df_unfccc, y_var = 'hasClimateActionPlans', X_vars = independent_variables, format = 'csv')

                             Logit Regression Results                            
Dep. Variable:     hasClimateActionPlans   No. Observations:                  197
Model:                             Logit   Df Residuals:                      185
Method:                              MLE   Df Model:                           11
Date:                   Tue, 23 Apr 2024   Pseudo R-squ.:                  0.1296
Time:                           15:27:51   Log-Likelihood:                -109.34
converged:                          True   LL-Null:                       -125.62
Covariance Type:                     HC3   LLR p-value:                 0.0006200
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -0.6963      4.380     -0.159      0.874      -9.280       7.888
population              9.725e-07    5.2e-07      1.870      0.061   -4

In [38]:
ForwardSelection(df = df_unfccc, y_var = 'hasClimateActionPlans', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 231.40073391185314
 
                             Logit Regression Results                            
Dep. Variable:     hasClimateActionPlans   No. Observations:                  197
Model:                             Logit   Df Residuals:                      192
Method:                              MLE   Df Model:                            4
Date:                   Tue, 23 Apr 2024   Pseudo R-squ.:                  0.1188
Time:                           15:27:51   Log-Likelihood:                -110.70
converged:                          True   LL-Null:                       -125.62
Covariance Type:                     HC3   LLR p-value:                 5.262e-06
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                      2.9802      2.099      1.420      0.156      -1.134       7.095
population              9.006e-07    4

##### b) Random Forest Classification

In [39]:
FitRandomForest(df_unfccc, y_var = 'hasClimateActionPlans', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.285863
8,medianHouseholdIncome,0.108173
9,medianAge,0.095593
10,whiteProp,0.085858
7,lessThanHighSchoolProp,0.080077
2,unemploymentRate,0.077586
3,povertyProp,0.071819
4,tempDiff,0.06479
5,numDisasters,0.058073
6,avgEmissionsPerCapita,0.057757


#### x) hasFinanceActions

##### a) Logistic Regression

In [40]:
FitLogistic(df_unfccc, y_var = 'hasFinanceActions', X_vars = independent_variables, format = 'csv')

                           Logit Regression Results                           
Dep. Variable:      hasFinanceActions   No. Observations:                  197
Model:                          Logit   Df Residuals:                      185
Method:                           MLE   Df Model:                           11
Date:                Tue, 23 Apr 2024   Pseudo R-squ.:                  0.1465
Time:                        15:27:51   Log-Likelihood:                -45.270
converged:                       True   LL-Null:                       -53.041
Covariance Type:                  HC3   LLR p-value:                    0.1590
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                      5.3011      8.481      0.625      0.532     -11.321      21.924
population              2.909e-07   2.32e-07      1.254      0.210   -1.64e-07    7.46e-07
redC

In [41]:
ForwardSelection(df = df_unfccc, y_var = 'hasFinanceActions', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 104.05154001823144
 
                           Logit Regression Results                           
Dep. Variable:      hasFinanceActions   No. Observations:                  197
Model:                          Logit   Df Residuals:                      195
Method:                           MLE   Df Model:                            1
Date:                Tue, 23 Apr 2024   Pseudo R-squ.:                 0.05685
Time:                        15:27:51   Log-Likelihood:                -50.026
converged:                       True   LL-Null:                       -53.041
Covariance Type:                  HC3   LLR p-value:                   0.01406
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.7623      0.305     -9.061      0.000      -3.360      -2.165
population  2.215e-07   1.41e-07      1.567      0.117   -5.56e-08    4.99e-07


##### b) Random Forest Classification

In [42]:
FitRandomForest(df_unfccc, y_var = 'hasFinanceActions', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.263177
10,whiteProp,0.110093
2,unemploymentRate,0.10881
7,lessThanHighSchoolProp,0.089421
9,medianAge,0.084683
4,tempDiff,0.081435
8,medianHouseholdIncome,0.080174
5,numDisasters,0.073556
3,povertyProp,0.056943
6,avgEmissionsPerCapita,0.040568


### Combine Results

In [43]:
results = FitRandomForest(df_unfccc, y_var = 'hasCommitments', X_vars = independent_variables)
results.rename(columns = {results.columns[1]: 'hasCommitments'}, inplace = True) 

for var in ['hasActionsUndertaken','hasEmissionInventory','hasInitiativeParticipations','hasImpact','hasMitigations','hasAdaptations','hasRiskAssessments','hasClimateActionPlans', 'hasFinanceActions']:
    result = FitRandomForest(df_unfccc, y_var = var, X_vars = independent_variables) 
    result.rename(columns = {result.columns[1]: var}, inplace = True)
    results = pd.merge(results, result, how = 'left', on = 'Feature')

results.to_csv('Results/Feature_Importances.csv', index = False)
results

Unnamed: 0,Feature,hasCommitments,hasActionsUndertaken,hasEmissionInventory,hasInitiativeParticipations,hasImpact,hasMitigations,hasAdaptations,hasRiskAssessments,hasClimateActionPlans,hasFinanceActions
0,population,0.244458,0.24997,0.277796,0.222905,0.266774,0.232687,0.279799,0.246984,0.293101,0.245553
1,medianAge,0.129208,0.125158,0.102832,0.084035,0.102776,0.147831,0.098186,0.087445,0.08335,0.096953
2,medianHouseholdIncome,0.104702,0.150547,0.100354,0.088305,0.095213,0.11605,0.128941,0.124039,0.110205,0.076325
3,lessThanHighSchoolProp,0.094249,0.076578,0.107652,0.083366,0.086653,0.08131,0.071435,0.081179,0.075789,0.095695
4,whiteProp,0.080416,0.090774,0.101664,0.105101,0.076766,0.101964,0.090678,0.099917,0.095339,0.109409
5,unemploymentRate,0.080071,0.062318,0.069402,0.082857,0.091848,0.060777,0.062735,0.07425,0.072478,0.099534
6,povertyProp,0.078604,0.101756,0.076865,0.078829,0.131677,0.082688,0.091173,0.078072,0.078211,0.071589
7,avgEmissionsPerCapita,0.065403,0.053814,0.053419,0.0817,0.045852,0.058715,0.058988,0.055155,0.060217,0.03851
8,tempDiff,0.059439,0.041508,0.048341,0.084161,0.04901,0.059332,0.056513,0.079567,0.058455,0.087052
9,numDisasters,0.053372,0.042688,0.049986,0.058617,0.047835,0.050592,0.047665,0.064615,0.059309,0.064657


In [44]:
results = {}

for var in ['hasCommitments','hasActionsUndertaken','hasEmissionInventory','hasInitiativeParticipations','hasImpact','hasMitigations','hasAdaptations','hasRiskAssessments','hasClimateActionPlans', 'hasFinanceActions']:
    fm = FitLogistic(df_unfccc, y_var = var, X_vars = independent_variables, format = 'model')
    fs = ForwardSelection(df_unfccc, y_var = var, vars_to_check = independent_variables, format = 'model')
    results[var] = [fm.aic, fm.bic, fs.aic, fs.bic]

results = pd.DataFrame(results, index = ['Full Model AIC', 'Full Model BIC', 'Forward Selection AIC', 'Forward Selection BIC'])
results.to_csv('Results/Information_Criterion.csv')
results

 
Best AIC: 205.95133559257675
 
 
Best AIC: 148.92822282906616
 
 
Best AIC: 230.9334041402301
 
 
Best AIC: 178.22869681211463
 
 
Best AIC: 78.25775360746783
 
 
Best AIC: 178.71769864766372
 
 
Best AIC: 219.6236800370598
 
 
Best AIC: 245.06126170005405
 
 
Best AIC: 231.40073391185314
 
 
Best AIC: 104.05154001823144
 


Unnamed: 0,hasCommitments,hasActionsUndertaken,hasEmissionInventory,hasInitiativeParticipations,hasImpact,hasMitigations,hasAdaptations,hasRiskAssessments,hasClimateActionPlans,hasFinanceActions
Full Model AIC,217.026428,157.061257,241.726611,186.401465,92.083653,186.77345,228.524534,256.401525,242.687013,114.540403
Full Model BIC,256.424873,196.459701,281.125056,225.79991,131.482097,226.171894,267.922979,295.79997,282.085458,153.938847
Forward Selection AIC,205.951336,148.928223,230.933404,178.228697,78.257754,178.717699,219.62368,245.061262,231.400734,104.05154
Forward Selection BIC,219.084151,165.344241,247.349423,188.078308,91.390569,195.133717,232.756495,254.910873,247.816753,110.617947
