# Econometric UNFCCC Green Cities Commitment Analysis: USA
## Econometric Analysis

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.ensemble import RandomForestClassifier

### 1. Load and Prepare Data

In [2]:
df_all = pd.read_csv('allcities.csv')
df_unfccc = pd.read_csv('UNFCCC.csv')

In [3]:
def BooltoZeroOne(df):
    for column in df.columns:
        if df[column].dtype == bool:
            df[column] = df[column].astype(int)

In [4]:
BooltoZeroOne(df_all)
BooltoZeroOne(df_unfccc)

In [5]:
independent_variables = ['population','redCounty','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'] # 'redState' remove to deal with multicolinearity issues

### 2. Define Model Fitting Functions

In [6]:
def FitLogistic(df, y_var, X_vars, format = 'return', constant = True):
    df = df.dropna()

    X = df[X_vars]
    
    if constant:
        X = sm.add_constant(X)

    y = df[y_var]

    model = sm.Logit(y, X).fit(cov_type = 'HC3', disp = False)

    if format == 'return':
        return model.summary()
    elif format == 'print':
        print(model.summary())
    elif format == 'model':
        return model
    elif format == 'csv':
        print(model.summary())
        summary = pd.read_html(model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]
        summary.to_csv(f'Results/LR_{y_var}_FM.csv')

In [7]:
def FitRandomForest(df, y_var, X_vars, format = 'importance'):
    df = df.dropna()

    X = df[X_vars]

    y = df[y_var]

    rf = RandomForestClassifier()
    model = rf.fit(X, y)

    if format == 'importance':

        return pd.DataFrame({
            'Feature':X_vars,
            'Importance':model.feature_importances_
        }).sort_values(by = 'Importance', ascending = False)
    
    elif format == 'model':
        return model

In [8]:
def ForwardSelection(df, y_var, vars_to_check, format = 'return'):
    best_model = None
    best_aic = float('inf')

    vars_selected = []
    v = vars_to_check.copy()

    while v:
        potential_vars = []
        for var in v:
            vars_current = vars_selected + [var]
            model = FitLogistic(df, y_var, vars_current, format = 'model')
            aic = model.aic
            if aic < best_aic:
                best_aic = aic
                best_model = model
                potential_vars = vars_current
        
        if not potential_vars:
            break

        var_selected = potential_vars[-1]
        vars_selected.append(var_selected)
        v.remove(var_selected)

    print(' ')
    print(f'Best AIC: {best_aic}')
    print(' ')
    
    if format == 'return':
        return best_model.summary()
    elif format == 'print':
        print(best_model.summary())
    elif format == 'model':
        return best_model
    elif format == 'csv':
        print(best_model.summary())
        summary = pd.read_html(best_model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]
        summary.to_csv(f'Results/LR_{y_var}_FS.csv')

### 3. Analysing Determinants of Cities to Sign up to the UNFCC

##### a) Logistic Regression

In [9]:
# All variables on their own versus UNFCCC
df = pd.DataFrame(columns = ['Variable','Coefficient','P-Value','AIC','BIC'])

for i in range(len(independent_variables)):
    lr = FitLogistic(df_all, y_var = 'UNFCCC', X_vars = [independent_variables[i]], format = 'model')
    df = pd.concat([df, pd.DataFrame([[independent_variables[i], lr.params.iloc[1], lr.pvalues.iloc[1], lr.aic, lr.bic]], columns =  ['Variable','Coefficient','P-Value','AIC','BIC']).dropna(axis = 1)], ignore_index=True)    
df

  df = pd.concat([df, pd.DataFrame([[independent_variables[i], lr.params.iloc[1], lr.pvalues.iloc[1], lr.aic, lr.bic]], columns =  ['Variable','Coefficient','P-Value','AIC','BIC']).dropna(axis = 1)], ignore_index=True)


Unnamed: 0,Variable,Coefficient,P-Value,AIC,BIC
0,population,5e-06,7.914043e-07,1633.0582,1648.228343
1,redCounty,-2.241665,1.4957209999999998e-19,1948.025817,1963.195959
2,unemploymentRate,-19.445843,0.002839589,2082.614319,2097.784461
3,povertyProp,-1.460479,0.2475538,2089.316769,2104.486912
4,tempDiff,-0.136376,0.03557506,2086.179931,2101.350074
5,numDisasters,0.002648,0.00190895,2080.883574,2096.053717
6,avgEmissionsPerCapita,-0.081271,2.026106e-06,2046.253409,2061.423552
7,lessThanHighSchoolProp,-4.10023,0.007179528,2083.070827,2098.24097
8,medianHouseholdIncome,1.7e-05,3.400496e-11,2063.615413,2078.785556


In [10]:
# All variables versus UNFCCC
FitLogistic(df_all, y_var = 'UNFCCC', X_vars = independent_variables, format = 'csv')

                           Logit Regression Results                           
Dep. Variable:                 UNFCCC   No. Observations:                14546
Model:                          Logit   Df Residuals:                    14536
Method:                           MLE   Df Model:                            9
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.2852
Time:                        16:48:30   Log-Likelihood:                -745.59
converged:                       True   LL-Null:                       -1043.1
Covariance Type:                  HC3   LLR p-value:                2.387e-122
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -2.6832      1.063     -2.525      0.012      -4.766      -0.601
population              5.112e-06   1.13e-06      4.508      0.000    2.89e-06    7.33e-06
redC

  summary = pd.read_html(model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


In [11]:
ForwardSelection(df = df_all, y_var = 'UNFCCC', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 1508.0410742380075
 
                           Logit Regression Results                           
Dep. Variable:                 UNFCCC   No. Observations:                14546
Model:                          Logit   Df Residuals:                    14538
Method:                           MLE   Df Model:                            7
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.2848
Time:                        16:48:32   Log-Likelihood:                -746.02
converged:                       True   LL-Null:                       -1043.1
Covariance Type:                  HC3   LLR p-value:                4.288e-124
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -2.2571      0.476     -4.738      0.000      -3.191      -1.323
population              5.104e-06   1.13e-06      4.535      0

  summary = pd.read_html(best_model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


##### b) Random Forest Classification

In [12]:
FitRandomForest(df_all, y_var = 'UNFCCC', X_vars = independent_variables)  

Unnamed: 0,Feature,Importance
0,population,0.621927
7,lessThanHighSchoolProp,0.082198
8,medianHouseholdIncome,0.070566
3,povertyProp,0.066003
2,unemploymentRate,0.054355
6,avgEmissionsPerCapita,0.035062
4,tempDiff,0.030842
5,numDisasters,0.028105
1,redCounty,0.010941


### 4. Analysing Determinants of Cities to Undertake Individual Actions Recorded by the UNFCCC

#### i) Has Commitments

##### a) Logistic Regression

In [13]:
FitLogistic(df_unfccc, y_var = 'hasCommitments', X_vars = independent_variables, format = 'csv')

                           Logit Regression Results                           
Dep. Variable:         hasCommitments   No. Observations:                  197
Model:                          Logit   Df Residuals:                      187
Method:                           MLE   Df Model:                            9
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1520
Time:                        16:48:34   Log-Likelihood:                -98.116
converged:                       True   LL-Null:                       -115.70
Covariance Type:                  HC3   LLR p-value:                 5.569e-05
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -0.1391      2.187     -0.064      0.949      -4.426       4.148
population              1.996e-06   5.72e-07      3.489      0.000    8.74e-07    3.12e-06
redC

  summary = pd.read_html(model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


In [14]:
ForwardSelection(df = df_unfccc, y_var = 'hasCommitments', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 205.95133559257675
 
                           Logit Regression Results                           
Dep. Variable:         hasCommitments   No. Observations:                  197
Model:                          Logit   Df Residuals:                      193
Method:                           MLE   Df Model:                            3
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1445
Time:                        16:48:34   Log-Likelihood:                -98.976
converged:                       True   LL-Null:                       -115.70
Covariance Type:                  HC3   LLR p-value:                 2.592e-07
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                      0.2236      0.900      0.249      0.804      -1.539       1.987
population              1.886e-06   5.46e-07      3.454      0

  summary = pd.read_html(best_model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


##### b) Random Forest Classification

In [15]:
FitRandomForest(df_unfccc, y_var = 'hasCommitments', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.302055
8,medianHouseholdIncome,0.142913
3,povertyProp,0.126079
7,lessThanHighSchoolProp,0.125803
2,unemploymentRate,0.103068
6,avgEmissionsPerCapita,0.07216
4,tempDiff,0.062247
5,numDisasters,0.051437
1,redCounty,0.014239


#### ii) Has Actions Undertaken

##### a) Logistic Regression

In [16]:
FitLogistic(df_unfccc, y_var = 'hasActionsUndertaken', X_vars = independent_variables, format = 'csv')

                            Logit Regression Results                            
Dep. Variable:     hasActionsUndertaken   No. Observations:                  197
Model:                            Logit   Df Residuals:                      187
Method:                             MLE   Df Model:                            9
Date:                  Wed, 17 Apr 2024   Pseudo R-squ.:                  0.3082
Time:                          16:48:35   Log-Likelihood:                -68.766
converged:                         True   LL-Null:                       -99.405
Covariance Type:                    HC3   LLR p-value:                 7.599e-10
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -2.0394      2.724     -0.749      0.454      -7.379       3.300
population              6.722e-06   1.69e-06      3.973      0.000    3.41e-06 

  summary = pd.read_html(model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


In [17]:
ForwardSelection(df = df_unfccc, y_var = 'hasActionsUndertaken', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 149.90686314892724
 
                            Logit Regression Results                            
Dep. Variable:     hasActionsUndertaken   No. Observations:                  197
Model:                            Logit   Df Residuals:                      191
Method:                             MLE   Df Model:                            5
Date:                  Wed, 17 Apr 2024   Pseudo R-squ.:                  0.3063
Time:                          16:48:35   Log-Likelihood:                -68.953
converged:                         True   LL-Null:                       -99.405
Covariance Type:                    HC3   LLR p-value:                 7.905e-12
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                    -1.7988      2.639     -0.682      0.495      -6.970       3.373
population             6.782e-06   1.75e-06      

  summary = pd.read_html(best_model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


##### b) Random Forest Classification

In [18]:
FitRandomForest(df_unfccc, y_var = 'hasActionsUndertaken', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.309502
8,medianHouseholdIncome,0.173818
3,povertyProp,0.142366
7,lessThanHighSchoolProp,0.103071
6,avgEmissionsPerCapita,0.084664
2,unemploymentRate,0.078921
5,numDisasters,0.051408
4,tempDiff,0.049109
1,redCounty,0.007141


#### iii) hasEmissionInventory

##### a) Logistic Regression

In [19]:
FitLogistic(df_unfccc, y_var = 'hasEmissionInventory', X_vars = independent_variables, format = 'csv')

                            Logit Regression Results                            
Dep. Variable:     hasEmissionInventory   No. Observations:                  197
Model:                            Logit   Df Residuals:                      187
Method:                             MLE   Df Model:                            9
Date:                  Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1900
Time:                          16:48:35   Log-Likelihood:                -109.70
converged:                         True   LL-Null:                       -135.43
Covariance Type:                    HC3   LLR p-value:                 5.708e-08
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -6.9776      2.093     -3.333      0.001     -11.080      -2.875
population              1.242e-06   3.21e-07      3.871      0.000    6.13e-07 

  summary = pd.read_html(model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


In [20]:
ForwardSelection(df = df_unfccc, y_var = 'hasEmissionInventory', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 230.9334041402301
 
                            Logit Regression Results                            
Dep. Variable:     hasEmissionInventory   No. Observations:                  197
Model:                            Logit   Df Residuals:                      192
Method:                             MLE   Df Model:                            4
Date:                  Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1843
Time:                          16:48:35   Log-Likelihood:                -110.47
converged:                         True   LL-Null:                       -135.43
Covariance Type:                    HC3   LLR p-value:                 3.746e-10
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -6.5556      1.785     -3.673      0.000     -10.054      -3.057
population              1.268e-06   3.22e-07   

  summary = pd.read_html(best_model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


##### b) Random Forest Classification

In [21]:
FitRandomForest(df_unfccc, y_var = 'hasEmissionInventory', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.331316
8,medianHouseholdIncome,0.135762
7,lessThanHighSchoolProp,0.112454
3,povertyProp,0.109655
2,unemploymentRate,0.107586
6,avgEmissionsPerCapita,0.07021
5,numDisasters,0.063954
4,tempDiff,0.052829
1,redCounty,0.016235


#### iv) hasInitiativeParticipations

##### a) Logistic Regression

In [22]:
FitLogistic(df_unfccc, y_var = 'hasInitiativeParticipations', X_vars = independent_variables, format = 'csv')

                                Logit Regression Results                               
Dep. Variable:     hasInitiativeParticipations   No. Observations:                  197
Model:                                   Logit   Df Residuals:                      187
Method:                                    MLE   Df Model:                            9
Date:                         Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1135
Time:                                 16:48:35   Log-Likelihood:                -81.704
converged:                                True   LL-Null:                       -92.163
Covariance Type:                           HC3   LLR p-value:                   0.01301
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -2.3845      2.482     -0.961      0.337      -7.249       2.479
population             

  summary = pd.read_html(model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


In [23]:
ForwardSelection(df = df_unfccc, y_var = 'hasInitiativeParticipations', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 178.22869681211463
 
                                Logit Regression Results                               
Dep. Variable:     hasInitiativeParticipations   No. Observations:                  197
Model:                                   Logit   Df Residuals:                      194
Method:                                    MLE   Df Model:                            2
Date:                         Wed, 17 Apr 2024   Pseudo R-squ.:                 0.06563
Time:                                 16:48:36   Log-Likelihood:                -86.114
converged:                                True   LL-Null:                       -92.163
Covariance Type:                           HC3   LLR p-value:                  0.002360
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.5515      0.248      6.266      0.000       1.066       2.037
redCounty     -0.9902     

  summary = pd.read_html(best_model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


##### b) Random Forest Classification

In [24]:
FitRandomForest(df_unfccc, y_var = 'hasInitiativeParticipations', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.27344
8,medianHouseholdIncome,0.129273
3,povertyProp,0.105813
6,avgEmissionsPerCapita,0.097748
7,lessThanHighSchoolProp,0.09338
4,tempDiff,0.091394
2,unemploymentRate,0.087153
5,numDisasters,0.083117
1,redCounty,0.038682


#### v) hasImpact

##### a) Logistic Regression

In [25]:
FitLogistic(df_unfccc, y_var = 'hasImpact', X_vars = independent_variables, format = 'csv')

                           Logit Regression Results                           
Dep. Variable:              hasImpact   No. Observations:                  197
Model:                          Logit   Df Residuals:                      187
Method:                           MLE   Df Model:                            9
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1173
Time:                        16:48:36   Log-Likelihood:                -34.911
converged:                       True   LL-Null:                       -39.548
Covariance Type:                  HC3   LLR p-value:                    0.4123
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -4.9208      3.353     -1.467      0.142     -11.493       1.652
population              1.528e-08   2.05e-07      0.074      0.941   -3.87e-07    4.18e-07
redC

  summary = pd.read_html(model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


In [26]:
ForwardSelection(df = df_unfccc, y_var = 'hasImpact', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 78.25775360746783
 
                           Logit Regression Results                           
Dep. Variable:              hasImpact   No. Observations:                  197
Model:                          Logit   Df Residuals:                      193
Method:                           MLE   Df Model:                            3
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1117
Time:                        16:48:36   Log-Likelihood:                -35.129
converged:                       True   LL-Null:                       -39.548
Covariance Type:                  HC3   LLR p-value:                   0.03152
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -4.7994      2.083     -2.304      0.021      -8.881      -0.717
numDisasters               0.0132      0.005      2.471      0.

  summary = pd.read_html(best_model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


##### b) Random Forest Classification

In [27]:
FitRandomForest(df_unfccc, y_var = 'hasImpact', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.26638
3,povertyProp,0.175909
7,lessThanHighSchoolProp,0.141555
8,medianHouseholdIncome,0.125967
2,unemploymentRate,0.121706
4,tempDiff,0.056743
5,numDisasters,0.056174
6,avgEmissionsPerCapita,0.048546
1,redCounty,0.00702


#### vi) hasMitigations

##### a) Logistic Regression

In [28]:
FitLogistic(df_unfccc, y_var = 'hasMitigations', X_vars = independent_variables, format = 'csv')

                           Logit Regression Results                           
Dep. Variable:         hasMitigations   No. Observations:                  197
Model:                          Logit   Df Residuals:                      187
Method:                           MLE   Df Model:                            9
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1843
Time:                        16:48:36   Log-Likelihood:                -84.313
converged:                       True   LL-Null:                       -103.37
Covariance Type:                  HC3   LLR p-value:                 1.665e-05
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -4.5465      2.664     -1.707      0.088      -9.767       0.674
population              1.577e-06   1.03e-06      1.537      0.124   -4.34e-07    3.59e-06
redC

  summary = pd.read_html(model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


In [29]:
ForwardSelection(df = df_unfccc, y_var = 'hasMitigations', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 181.54821795245488
 
                           Logit Regression Results                           
Dep. Variable:         hasMitigations   No. Observations:                  197
Model:                          Logit   Df Residuals:                      192
Method:                           MLE   Df Model:                            4
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1702
Time:                        16:48:36   Log-Likelihood:                -85.774
converged:                       True   LL-Null:                       -103.37
Covariance Type:                  HC3   LLR p-value:                 4.247e-07
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                    -3.1234      1.352     -2.310      0.021      -5.774      -0.473
population             1.605e-06   1.06e-06      1.508      0.131

  summary = pd.read_html(best_model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


##### b) Random Forest Classification

In [30]:
FitRandomForest(df_unfccc, y_var = 'hasMitigations', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.289224
8,medianHouseholdIncome,0.166196
7,lessThanHighSchoolProp,0.124578
3,povertyProp,0.110108
2,unemploymentRate,0.082786
4,tempDiff,0.076798
6,avgEmissionsPerCapita,0.075343
5,numDisasters,0.062369
1,redCounty,0.012598


#### vii) hasAdaptations

##### a) Logistic Regression

In [31]:
FitLogistic(df_unfccc, y_var = 'hasAdaptations', X_vars = independent_variables, format = 'csv')

                           Logit Regression Results                           
Dep. Variable:         hasAdaptations   No. Observations:                  197
Model:                          Logit   Df Residuals:                      187
Method:                           MLE   Df Model:                            9
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1524
Time:                        16:48:37   Log-Likelihood:                -103.33
converged:                       True   LL-Null:                       -121.91
Covariance Type:                  HC3   LLR p-value:                 2.477e-05
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -1.3016      2.036     -0.639      0.523      -5.291       2.688
population               1.29e-06   7.97e-07      1.618      0.106   -2.73e-07    2.85e-06
redC

  summary = pd.read_html(model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


In [32]:
ForwardSelection(df = df_unfccc, y_var = 'hasAdaptations', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 219.6236800370598
 
                           Logit Regression Results                           
Dep. Variable:         hasAdaptations   No. Observations:                  197
Model:                          Logit   Df Residuals:                      193
Method:                           MLE   Df Model:                            3
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1320
Time:                        16:48:37   Log-Likelihood:                -105.81
converged:                       True   LL-Null:                       -121.91
Covariance Type:                  HC3   LLR p-value:                 4.773e-07
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                    -3.1342      1.185     -2.644      0.008      -5.457      -0.811
population             1.246e-06   7.57e-07      1.645      0.100 

  summary = pd.read_html(best_model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


##### b) Random Forest Classification

In [33]:
FitRandomForest(df_unfccc, y_var = 'hasAdaptations', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.308776
8,medianHouseholdIncome,0.171767
3,povertyProp,0.118985
7,lessThanHighSchoolProp,0.104497
2,unemploymentRate,0.091917
6,avgEmissionsPerCapita,0.070635
5,numDisasters,0.057306
4,tempDiff,0.055717
1,redCounty,0.020398


#### viii) hasRiskAssessments

##### a) Logistic Regression

In [34]:
FitLogistic(df_unfccc, y_var = 'hasRiskAssessments', X_vars = independent_variables, format = 'csv')

                           Logit Regression Results                           
Dep. Variable:     hasRiskAssessments   No. Observations:                  197
Model:                          Logit   Df Residuals:                      187
Method:                           MLE   Df Model:                            9
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1377
Time:                        16:48:37   Log-Likelihood:                -117.48
converged:                       True   LL-Null:                       -136.24
Covariance Type:                  HC3   LLR p-value:                 2.128e-05
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -5.0770      1.940     -2.617      0.009      -8.879      -1.275
population              8.596e-07   2.94e-07      2.927      0.003    2.84e-07    1.44e-06
redC

  summary = pd.read_html(model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


In [35]:
ForwardSelection(df = df_unfccc, y_var = 'hasRiskAssessments', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 245.06126170005405
 
                           Logit Regression Results                           
Dep. Variable:     hasRiskAssessments   No. Observations:                  197
Model:                          Logit   Df Residuals:                      194
Method:                           MLE   Df Model:                            2
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1227
Time:                        16:48:37   Log-Likelihood:                -119.53
converged:                       True   LL-Null:                       -136.24
Covariance Type:                  HC3   LLR p-value:                 5.521e-08
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                    -2.7077      0.680     -3.984      0.000      -4.040      -1.376
population             8.857e-07   3.04e-07      2.914      0.004

  summary = pd.read_html(best_model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


##### b) Random Forest Classification

In [36]:
FitRandomForest(df_unfccc, y_var = 'hasRiskAssessments', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.284671
8,medianHouseholdIncome,0.164368
3,povertyProp,0.123043
7,lessThanHighSchoolProp,0.115842
2,unemploymentRate,0.088592
4,tempDiff,0.073818
6,avgEmissionsPerCapita,0.071153
5,numDisasters,0.065764
1,redCounty,0.012749


#### ix) hasClimateActionPlans

##### a) Logistic Regression

In [37]:
FitLogistic(df_unfccc, y_var = 'hasClimateActionPlans', X_vars = independent_variables, format = 'csv')

                             Logit Regression Results                            
Dep. Variable:     hasClimateActionPlans   No. Observations:                  197
Model:                             Logit   Df Residuals:                      187
Method:                              MLE   Df Model:                            9
Date:                   Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1118
Time:                           16:48:37   Log-Likelihood:                -111.58
converged:                          True   LL-Null:                       -125.62
Covariance Type:                     HC3   LLR p-value:                 0.0009235
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -2.3055      1.953     -1.180      0.238      -6.133       1.522
population              1.011e-06   5.04e-07      2.008      0.045     

  summary = pd.read_html(model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


In [38]:
ForwardSelection(df = df_unfccc, y_var = 'hasClimateActionPlans', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 232.7526244018722
 
                             Logit Regression Results                            
Dep. Variable:     hasClimateActionPlans   No. Observations:                  197
Model:                             Logit   Df Residuals:                      193
Method:                              MLE   Df Model:                            3
Date:                   Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1054
Time:                           16:48:37   Log-Likelihood:                -112.38
converged:                          True   LL-Null:                       -125.62
Covariance Type:                     HC3   LLR p-value:                 7.517e-06
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -0.2797      0.846     -0.331      0.741      -1.938       1.378
population              1.039e-06   5.0

  summary = pd.read_html(best_model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


##### b) Random Forest Classification

In [39]:
FitRandomForest(df_unfccc, y_var = 'hasClimateActionPlans', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.315853
8,medianHouseholdIncome,0.146666
7,lessThanHighSchoolProp,0.110314
3,povertyProp,0.103286
4,tempDiff,0.086231
2,unemploymentRate,0.082003
5,numDisasters,0.069453
6,avgEmissionsPerCapita,0.066881
1,redCounty,0.019312


#### x) hasFinanceActions

##### a) Logistic Regression

In [40]:
FitLogistic(df_unfccc, y_var = 'hasFinanceActions', X_vars = independent_variables, format = 'csv')

                           Logit Regression Results                           
Dep. Variable:      hasFinanceActions   No. Observations:                  197
Model:                          Logit   Df Residuals:                      187
Method:                           MLE   Df Model:                            9
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1239
Time:                        16:48:38   Log-Likelihood:                -46.467
converged:                       True   LL-Null:                       -53.041
Covariance Type:                  HC3   LLR p-value:                    0.1560
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -4.6114      3.559     -1.296      0.195     -11.586       2.363
population              2.886e-07   1.95e-07      1.481      0.139   -9.34e-08    6.71e-07
redC

  summary = pd.read_html(model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


In [41]:
ForwardSelection(df = df_unfccc, y_var = 'hasFinanceActions', vars_to_check = independent_variables, format = 'csv')

 
Best AIC: 104.05154001823144
 
                           Logit Regression Results                           
Dep. Variable:      hasFinanceActions   No. Observations:                  197
Model:                          Logit   Df Residuals:                      195
Method:                           MLE   Df Model:                            1
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                 0.05685
Time:                        16:48:38   Log-Likelihood:                -50.026
converged:                       True   LL-Null:                       -53.041
Covariance Type:                  HC3   LLR p-value:                   0.01406
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.7623      0.305     -9.061      0.000      -3.360      -2.165
population  2.215e-07   1.41e-07      1.567      0.117   -5.56e-08    4.99e-07


  summary = pd.read_html(best_model.summary().tables[1].as_html(), header = 0, index_col = 0)[0]


##### b) Random Forest Classification

In [42]:
FitRandomForest(df_unfccc, y_var = 'hasFinanceActions', X_vars = independent_variables) 

Unnamed: 0,Feature,Importance
0,population,0.296385
2,unemploymentRate,0.146985
4,tempDiff,0.111565
8,medianHouseholdIncome,0.111393
7,lessThanHighSchoolProp,0.105282
3,povertyProp,0.099258
5,numDisasters,0.077918
6,avgEmissionsPerCapita,0.044375
1,redCounty,0.006839


### Combine Results

In [59]:
results = FitRandomForest(df_unfccc, y_var = 'hasCommitments', X_vars = independent_variables)
results.rename(columns = {results.columns[1]: 'hasCommitments'}, inplace = True) 

for var in ['hasActionsUndertaken','hasEmissionInventory','hasInitiativeParticipations','hasImpact','hasMitigations','hasAdaptations','hasRiskAssessments','hasClimateActionPlans', 'hasFinanceActions']:
    result = FitRandomForest(df_unfccc, y_var = var, X_vars = independent_variables) 
    result.rename(columns = {result.columns[1]: var}, inplace = True)
    results = pd.merge(results, result, how = 'left', on = 'Feature')

results.to_csv('Results/Feature_Importances.csv', index = False)
results

Unnamed: 0,Feature,hasCommitments,hasActionsUndertaken,hasEmissionInventory,hasInitiativeParticipations,hasImpact,hasMitigations,hasAdaptations,hasRiskAssessments,hasClimateActionPlans,hasFinanceActions
0,population,0.290024,0.337284,0.328372,0.268289,0.272521,0.296527,0.317405,0.29472,0.321969,0.262324
1,medianHouseholdIncome,0.142393,0.18474,0.144316,0.117391,0.117095,0.172446,0.161035,0.171207,0.139215,0.108348
2,lessThanHighSchoolProp,0.121489,0.103761,0.12659,0.100367,0.13835,0.110257,0.102552,0.105832,0.103463,0.146467
3,povertyProp,0.117586,0.118834,0.100342,0.106541,0.150589,0.105076,0.117346,0.121873,0.095634,0.082847
4,unemploymentRate,0.099375,0.072497,0.095997,0.099753,0.137099,0.08463,0.084717,0.091063,0.095919,0.129962
5,avgEmissionsPerCapita,0.088853,0.067258,0.075001,0.088265,0.056117,0.076096,0.074745,0.071182,0.087331,0.047186
6,tempDiff,0.068714,0.047659,0.056731,0.096381,0.051822,0.072991,0.062833,0.068076,0.070003,0.131262
7,numDisasters,0.056134,0.055633,0.061826,0.092691,0.066608,0.069418,0.060576,0.066458,0.068926,0.077911
8,redCounty,0.015434,0.012333,0.010825,0.030323,0.009799,0.012558,0.01879,0.009589,0.017541,0.013692


In [58]:
results = {}

for var in ['hasCommitments','hasActionsUndertaken','hasEmissionInventory','hasInitiativeParticipations','hasImpact','hasMitigations','hasAdaptations','hasRiskAssessments','hasClimateActionPlans', 'hasFinanceActions']:
    fm = FitLogistic(df_unfccc, y_var = var, X_vars = independent_variables, format = 'model')
    fs = ForwardSelection(df_unfccc, y_var = var, vars_to_check = independent_variables, format = 'model')
    results[var] = [fm.aic, fm.bic, fs.aic, fs.bic]

results = pd.DataFrame(results, index = ['Full Model AIC', 'Full Model BIC', 'Forward Selection AIC', 'Forward Selection BIC'])
results.to_csv('Results/Information_Criterion.csv')
results

 
Best AIC: 205.95133559257675
 
 
Best AIC: 149.90686314892724
 
 
Best AIC: 230.9334041402301
 
 
Best AIC: 178.22869681211463
 
 
Best AIC: 78.25775360746783
 
 
Best AIC: 181.54821795245488
 
 
Best AIC: 219.6236800370598
 
 
Best AIC: 245.06126170005405
 
 
Best AIC: 232.7526244018722
 
 
Best AIC: 104.05154001823144
 


Unnamed: 0,hasCommitments,hasActionsUndertaken,hasEmissionInventory,hasInitiativeParticipations,hasImpact,hasMitigations,hasAdaptations,hasRiskAssessments,hasClimateActionPlans,hasFinanceActions
Full Model AIC,216.232072,157.532704,239.393335,183.407443,89.821446,188.626997,226.665619,254.968647,243.161979,112.934162
Full Model BIC,249.064109,190.364741,272.225372,216.23948,122.653483,221.459034,259.497657,287.800684,275.994016,145.766199
Forward Selection AIC,205.951336,149.906863,230.933404,178.228697,78.257754,181.548218,219.62368,245.061262,232.752624,104.05154
Forward Selection BIC,219.084151,169.606086,247.349423,188.078308,91.390569,197.964237,232.756495,254.910873,245.885439,110.617947
