# Econometric UNFCCC Green Cities Commitment Analysis: USA
## Econometric Analysis

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.ensemble import RandomForestClassifier

### 1. Load and Prepare Data

In [2]:
df_all = pd.read_csv('allcities.csv')
df_unfccc = pd.read_csv('UNFCCC.csv')

In [3]:
def BooltoZeroOne(df):
    for column in df.columns:
        if df[column].dtype == bool:
            df[column] = df[column].astype(int)

In [4]:
BooltoZeroOne(df_all)
BooltoZeroOne(df_unfccc)

### 2. Define Model Fitting Functions

In [5]:
def FitLogistic(df, y_var, X_vars, format = 'return', constant = True):
    df = df.dropna()

    X = df[X_vars]
    if constant:
        X = sm.add_constant(X)

    y = df[y_var]

    model = sm.Logit(y, X).fit(cov_type = 'HC3', disp = False)

    if format == 'return':
        return model.summary()
    elif format == 'print':
        print(model.summary())
    elif format == 'model':
        return model

In [6]:
def FitRandomForest(df, y_var, X_vars, format = 'importance'):
    df = df.dropna()

    X = df[X_vars]

    y = df[y_var]

    rf = RandomForestClassifier()
    model = rf.fit(X, y)

    if format == 'importance':

        return pd.DataFrame({
            'Feature':X_vars,
            'Importance':model.feature_importances_
        }).sort_values(by = 'Importance', ascending = False)
    
    elif format == 'model':
        return model

In [7]:
def ForwardSelection(df, y_var, vars_to_check, format = 'return'):
    best_model = None
    best_aic = float('inf')

    vars_selected = []

    while vars_to_check:
        potential_vars = []
        for var in vars_to_check:
            vars_current = vars_selected + [var]
            model = FitLogistic(df, y_var, X_vars = vars_current, format = 'model')
            aic = model.aic
            if aic < best_aic:
                best_aic = aic
                best_model = model
                potential_vars = vars_current
        
        if not potential_vars:
            break

        var_selected = potential_vars[-1]
        vars_selected.append(var_selected)
        vars_to_check.remove(var_selected)

    print(' ')
    print(f'Best AIC: {best_aic}')
    print(' ')
    
    if format == 'return':
        return best_model.summary()
    elif format == 'print':
        print(best_model.summary())
    elif format == 'model':
        return best_model

### 3. Analysing Determinants of Cities to Sign up to the UNFCC

##### a) Logistic Regression

In [8]:
# All variables on their own versus UNFCCC
df = pd.DataFrame(columns = ['Variable','Coefficient','P-Value','AIC','BIC'])
vars = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome']
for i in range(len(vars)):
    lr = FitLogistic(df_all, y_var = 'UNFCCC', X_vars = [vars[i]], format = 'model')
    df = pd.concat([df, pd.DataFrame([[vars[i], lr.params.iloc[1], lr.pvalues.iloc[1], lr.aic, lr.bic]], columns =  ['Variable','Coefficient','P-Value','AIC','BIC']).dropna(axis = 1)], ignore_index=True)    
df

  df = pd.concat([df, pd.DataFrame([[vars[i], lr.params.iloc[1], lr.pvalues.iloc[1], lr.aic, lr.bic]], columns =  ['Variable','Coefficient','P-Value','AIC','BIC']).dropna(axis = 1)], ignore_index=True)


Unnamed: 0,Variable,Coefficient,P-Value,AIC,BIC
0,population,5e-06,7.914043e-07,1633.0582,1648.228343
1,redCounty,-2.241665,1.4957209999999998e-19,1948.025817,1963.195959
2,redState,-0.692852,5.979156e-06,2068.451332,2083.621475
3,unemploymentRate,-19.445843,0.002839589,2082.614319,2097.784461
4,povertyProp,-1.460479,0.2475538,2089.316769,2104.486912
5,tempDiff,-0.136376,0.03557506,2086.179931,2101.350074
6,numDisasters,0.002648,0.00190895,2080.883574,2096.053717
7,avgEmissionsPerCapita,-0.081271,2.026106e-06,2046.253409,2061.423552
8,lessThanHighSchoolProp,-4.10023,0.007179528,2083.070827,2098.24097
9,medianHouseholdIncome,1.7e-05,3.400496e-11,2063.615413,2078.785556


In [9]:
# All variables versus UNFCCC
FitLogistic(df_all, y_var = 'UNFCCC', X_vars = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

                           Logit Regression Results                           
Dep. Variable:                 UNFCCC   No. Observations:                14546
Model:                          Logit   Df Residuals:                    14535
Method:                           MLE   Df Model:                           10
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.2865
Time:                        09:48:22   Log-Likelihood:                -744.24
converged:                       True   LL-Null:                       -1043.1
Covariance Type:                  HC3   LLR p-value:                5.273e-122
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -2.2981      1.065     -2.157      0.031      -4.386      -0.210
population              5.141e-06   1.11e-06      4.612      0.000    2.96e-06    7.33e-06
redC

In [10]:
ForwardSelection(df = df_all, y_var = 'UNFCCC', vars_to_check = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

 
Best AIC: 1506.5863857935879
 
                           Logit Regression Results                           
Dep. Variable:                 UNFCCC   No. Observations:                14546
Model:                          Logit   Df Residuals:                    14537
Method:                           MLE   Df Model:                            8
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.2865
Time:                        09:48:24   Log-Likelihood:                -744.29
converged:                       True   LL-Null:                       -1043.1
Covariance Type:                  HC3   LLR p-value:                7.418e-124
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -2.3051      0.473     -4.871      0.000      -3.233      -1.378
population              5.142e-06   1.12e-06      4.595      0

##### b) Random Forest Classification

In [11]:
FitRandomForest(df_all, y_var = 'UNFCCC', X_vars = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'])  

Unnamed: 0,Feature,Importance
0,population,0.629305
8,lessThanHighSchoolProp,0.079432
9,medianHouseholdIncome,0.071286
4,povertyProp,0.060673
3,unemploymentRate,0.055205
5,tempDiff,0.030914
7,avgEmissionsPerCapita,0.030825
6,numDisasters,0.02841
1,redCounty,0.010313
2,redState,0.003637


### 4. Analysing Determinants of Cities to Undertake Individual Actions Recorded by the UNFCCC

#### i) Has Commitments

##### a) Logistic Regression

In [12]:
FitLogistic(df_unfccc, y_var = 'hasCommitments', X_vars = ['population','redCounty','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

                           Logit Regression Results                           
Dep. Variable:         hasCommitments   No. Observations:                  197
Model:                          Logit   Df Residuals:                      187
Method:                           MLE   Df Model:                            9
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1520
Time:                        09:48:26   Log-Likelihood:                -98.116
converged:                       True   LL-Null:                       -115.70
Covariance Type:                  HC3   LLR p-value:                 5.569e-05
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -0.1391      2.187     -0.064      0.949      -4.426       4.148
population              1.996e-06   5.72e-07      3.489      0.000    8.74e-07    3.12e-06
redC

In [13]:
ForwardSelection(df = df_unfccc, y_var = 'hasCommitments', vars_to_check = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

 
Best AIC: 205.95133559257675
 
                           Logit Regression Results                           
Dep. Variable:         hasCommitments   No. Observations:                  197
Model:                          Logit   Df Residuals:                      193
Method:                           MLE   Df Model:                            3
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1445
Time:                        09:48:26   Log-Likelihood:                -98.976
converged:                       True   LL-Null:                       -115.70
Covariance Type:                  HC3   LLR p-value:                 2.592e-07
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                      0.2236      0.900      0.249      0.804      -1.539       1.987
population              1.886e-06   5.46e-07      3.454      0

##### b) Random Forest Classification

In [14]:
FitRandomForest(df_unfccc, y_var = 'hasCommitments', X_vars = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome']) # ,'medianAge','whiteProp'

Unnamed: 0,Feature,Importance
0,population,0.297499
9,medianHouseholdIncome,0.136352
8,lessThanHighSchoolProp,0.12383
3,unemploymentRate,0.116736
4,povertyProp,0.113025
5,tempDiff,0.069211
7,avgEmissionsPerCapita,0.067882
6,numDisasters,0.052733
2,redState,0.012757
1,redCounty,0.009975


#### ii) Has Actions Undertaken

##### a) Logistic Regression

In [15]:
FitLogistic(df_unfccc, y_var = 'hasActionsUndertaken', X_vars = ['population','redCounty','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

                            Logit Regression Results                            
Dep. Variable:     hasActionsUndertaken   No. Observations:                  197
Model:                            Logit   Df Residuals:                      187
Method:                             MLE   Df Model:                            9
Date:                  Wed, 17 Apr 2024   Pseudo R-squ.:                  0.3082
Time:                          09:48:27   Log-Likelihood:                -68.766
converged:                         True   LL-Null:                       -99.405
Covariance Type:                    HC3   LLR p-value:                 7.599e-10
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -2.0394      2.724     -0.749      0.454      -7.379       3.300
population              6.722e-06   1.69e-06      3.973      0.000    3.41e-06 

In [16]:
ForwardSelection(df = df_unfccc, y_var = 'hasActionsUndertaken', vars_to_check = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

 
Best AIC: 149.90686314892724
 
                            Logit Regression Results                            
Dep. Variable:     hasActionsUndertaken   No. Observations:                  197
Model:                            Logit   Df Residuals:                      191
Method:                             MLE   Df Model:                            5
Date:                  Wed, 17 Apr 2024   Pseudo R-squ.:                  0.3063
Time:                          09:48:27   Log-Likelihood:                -68.953
converged:                         True   LL-Null:                       -99.405
Covariance Type:                    HC3   LLR p-value:                 7.905e-12
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                    -1.7988      2.639     -0.682      0.495      -6.970       3.373
population             6.782e-06   1.75e-06      

##### b) Random Forest Classification

In [17]:
FitRandomForest(df_unfccc, y_var = 'hasActionsUndertaken', X_vars = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome']) # ,'medianAge','whiteProp'

Unnamed: 0,Feature,Importance
0,population,0.327082
9,medianHouseholdIncome,0.173563
4,povertyProp,0.129076
8,lessThanHighSchoolProp,0.098803
7,avgEmissionsPerCapita,0.082205
3,unemploymentRate,0.080356
6,numDisasters,0.052609
5,tempDiff,0.044913
2,redState,0.006834
1,redCounty,0.00456


#### iii) hasEmissionInventory

##### a) Logistic Regression

In [18]:
FitLogistic(df_unfccc, y_var = 'hasEmissionInventory', X_vars = ['population','redCounty','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

                            Logit Regression Results                            
Dep. Variable:     hasEmissionInventory   No. Observations:                  197
Model:                            Logit   Df Residuals:                      187
Method:                             MLE   Df Model:                            9
Date:                  Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1900
Time:                          09:48:27   Log-Likelihood:                -109.70
converged:                         True   LL-Null:                       -135.43
Covariance Type:                    HC3   LLR p-value:                 5.708e-08
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -6.9776      2.093     -3.333      0.001     -11.080      -2.875
population              1.242e-06   3.21e-07      3.871      0.000    6.13e-07 

In [19]:
ForwardSelection(df = df_unfccc, y_var = 'hasEmissionInventory', vars_to_check = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

 
Best AIC: 230.9334041402301
 
                            Logit Regression Results                            
Dep. Variable:     hasEmissionInventory   No. Observations:                  197
Model:                            Logit   Df Residuals:                      192
Method:                             MLE   Df Model:                            4
Date:                  Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1843
Time:                          09:48:27   Log-Likelihood:                -110.47
converged:                         True   LL-Null:                       -135.43
Covariance Type:                    HC3   LLR p-value:                 3.746e-10
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -6.5556      1.785     -3.673      0.000     -10.054      -3.057
population              1.268e-06   3.22e-07   

##### b) Random Forest Classification

In [20]:
FitRandomForest(df_unfccc, y_var = 'hasEmissionInventory', X_vars = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome']) # ,'medianAge','whiteProp'

Unnamed: 0,Feature,Importance
0,population,0.319316
9,medianHouseholdIncome,0.136184
8,lessThanHighSchoolProp,0.125243
4,povertyProp,0.105369
3,unemploymentRate,0.095489
7,avgEmissionsPerCapita,0.070598
6,numDisasters,0.066432
5,tempDiff,0.054772
2,redState,0.0133
1,redCounty,0.013297


#### iv) hasInitiativeParticipations

##### a) Logistic Regression

In [21]:
FitLogistic(df_unfccc, y_var = 'hasInitiativeParticipations', X_vars = ['population','redCounty','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

                                Logit Regression Results                               
Dep. Variable:     hasInitiativeParticipations   No. Observations:                  197
Model:                                   Logit   Df Residuals:                      187
Method:                                    MLE   Df Model:                            9
Date:                         Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1135
Time:                                 09:48:28   Log-Likelihood:                -81.704
converged:                                True   LL-Null:                       -92.163
Covariance Type:                           HC3   LLR p-value:                   0.01301
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -2.3845      2.482     -0.961      0.337      -7.249       2.479
population             

In [None]:
ForwardSelection(df = df_unfccc, y_var = 'hasInitiativeParticipations', vars_to_check = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

##### b) Random Forest Classification

In [23]:
FitRandomForest(df_unfccc, y_var = 'hasInitiativeParticipations', X_vars = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome']) # ,'medianAge','whiteProp'

Unnamed: 0,Feature,Importance
0,population,0.245728
9,medianHouseholdIncome,0.110537
8,lessThanHighSchoolProp,0.108786
3,unemploymentRate,0.107082
7,avgEmissionsPerCapita,0.106388
4,povertyProp,0.097919
6,numDisasters,0.090589
5,tempDiff,0.081583
1,redCounty,0.026155
2,redState,0.025233


#### v) hasImpact

##### a) Logistic Regression

In [24]:
FitLogistic(df_unfccc, y_var = 'hasImpact', X_vars = ['population','redCounty','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

                           Logit Regression Results                           
Dep. Variable:              hasImpact   No. Observations:                  197
Model:                          Logit   Df Residuals:                      187
Method:                           MLE   Df Model:                            9
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1173
Time:                        09:49:11   Log-Likelihood:                -34.911
converged:                       True   LL-Null:                       -39.548
Covariance Type:                  HC3   LLR p-value:                    0.4123
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -4.9208      3.353     -1.467      0.142     -11.493       1.652
population              1.528e-08   2.05e-07      0.074      0.941   -3.87e-07    4.18e-07
redC

In [25]:
ForwardSelection(df = df_unfccc, y_var = 'hasImpact', vars_to_check = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

 
Best AIC: 78.25775360746783
 
                           Logit Regression Results                           
Dep. Variable:              hasImpact   No. Observations:                  197
Model:                          Logit   Df Residuals:                      193
Method:                           MLE   Df Model:                            3
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1117
Time:                        09:49:12   Log-Likelihood:                -35.129
converged:                       True   LL-Null:                       -39.548
Covariance Type:                  HC3   LLR p-value:                   0.03152
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -4.7994      2.083     -2.304      0.021      -8.881      -0.717
numDisasters               0.0132      0.005      2.471      0.

##### b) Random Forest Classification

In [26]:
FitRandomForest(df_unfccc, y_var = 'hasImpact', X_vars = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome']) # ,'medianAge','whiteProp'

Unnamed: 0,Feature,Importance
0,population,0.280542
4,povertyProp,0.158283
9,medianHouseholdIncome,0.133247
3,unemploymentRate,0.132852
8,lessThanHighSchoolProp,0.106627
7,avgEmissionsPerCapita,0.067102
5,tempDiff,0.052172
6,numDisasters,0.051827
1,redCounty,0.011582
2,redState,0.005764


#### vi) hasMitigations

##### a) Logistic Regression

In [27]:
FitLogistic(df_unfccc, y_var = 'hasMitigations', X_vars = ['population','redCounty','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

                           Logit Regression Results                           
Dep. Variable:         hasMitigations   No. Observations:                  197
Model:                          Logit   Df Residuals:                      187
Method:                           MLE   Df Model:                            9
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1843
Time:                        09:49:12   Log-Likelihood:                -84.313
converged:                       True   LL-Null:                       -103.37
Covariance Type:                  HC3   LLR p-value:                 1.665e-05
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -4.5465      2.664     -1.707      0.088      -9.767       0.674
population              1.577e-06   1.03e-06      1.537      0.124   -4.34e-07    3.59e-06
redC

In [28]:
ForwardSelection(df = df_unfccc, y_var = 'hasMitigations', vars_to_check = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

 
Best AIC: 181.54821795245488
 
                           Logit Regression Results                           
Dep. Variable:         hasMitigations   No. Observations:                  197
Model:                          Logit   Df Residuals:                      192
Method:                           MLE   Df Model:                            4
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1702
Time:                        09:49:12   Log-Likelihood:                -85.774
converged:                       True   LL-Null:                       -103.37
Covariance Type:                  HC3   LLR p-value:                 4.247e-07
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                    -3.1234      1.352     -2.310      0.021      -5.774      -0.473
population             1.605e-06   1.06e-06      1.508      0.131

##### b) Random Forest Classification

In [29]:
FitRandomForest(df_unfccc, y_var = 'hasMitigations', X_vars = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome']) # ,'medianAge','whiteProp'

Unnamed: 0,Feature,Importance
0,population,0.293979
9,medianHouseholdIncome,0.169409
8,lessThanHighSchoolProp,0.117065
4,povertyProp,0.112786
3,unemploymentRate,0.089694
7,avgEmissionsPerCapita,0.072921
5,tempDiff,0.06688
6,numDisasters,0.056635
1,redCounty,0.011232
2,redState,0.009398


#### vii) hasAdaptations

##### a) Logistic Regression

In [30]:
FitLogistic(df_unfccc, y_var = 'hasAdaptations', X_vars = ['population','redCounty','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

                           Logit Regression Results                           
Dep. Variable:         hasAdaptations   No. Observations:                  197
Model:                          Logit   Df Residuals:                      187
Method:                           MLE   Df Model:                            9
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1524
Time:                        09:49:13   Log-Likelihood:                -103.33
converged:                       True   LL-Null:                       -121.91
Covariance Type:                  HC3   LLR p-value:                 2.477e-05
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -1.3016      2.036     -0.639      0.523      -5.291       2.688
population               1.29e-06   7.97e-07      1.618      0.106   -2.73e-07    2.85e-06
redC

In [31]:
ForwardSelection(df = df_unfccc, y_var = 'hasAdaptations', vars_to_check = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

 
Best AIC: 219.6236800370598
 
                           Logit Regression Results                           
Dep. Variable:         hasAdaptations   No. Observations:                  197
Model:                          Logit   Df Residuals:                      193
Method:                           MLE   Df Model:                            3
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1320
Time:                        09:49:13   Log-Likelihood:                -105.81
converged:                       True   LL-Null:                       -121.91
Covariance Type:                  HC3   LLR p-value:                 4.773e-07
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                    -3.1342      1.185     -2.644      0.008      -5.457      -0.811
population             1.246e-06   7.57e-07      1.645      0.100 

##### b) Random Forest Classification

In [32]:
FitRandomForest(df_unfccc, y_var = 'hasAdaptations', X_vars = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome']) # ,'medianAge','whiteProp'

Unnamed: 0,Feature,Importance
0,population,0.314073
9,medianHouseholdIncome,0.160861
4,povertyProp,0.105756
8,lessThanHighSchoolProp,0.104108
3,unemploymentRate,0.093852
7,avgEmissionsPerCapita,0.075036
5,tempDiff,0.067597
6,numDisasters,0.052212
2,redState,0.013577
1,redCounty,0.012929


#### viii) hasRiskAssessments

##### a) Logistic Regression

In [33]:
FitLogistic(df_unfccc, y_var = 'hasRiskAssessments', X_vars = ['population','redCounty','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

                           Logit Regression Results                           
Dep. Variable:     hasRiskAssessments   No. Observations:                  197
Model:                          Logit   Df Residuals:                      187
Method:                           MLE   Df Model:                            9
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1377
Time:                        09:49:13   Log-Likelihood:                -117.48
converged:                       True   LL-Null:                       -136.24
Covariance Type:                  HC3   LLR p-value:                 2.128e-05
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -5.0770      1.940     -2.617      0.009      -8.879      -1.275
population              8.596e-07   2.94e-07      2.927      0.003    2.84e-07    1.44e-06
redC

In [34]:
ForwardSelection(df = df_unfccc, y_var = 'hasRiskAssessments', vars_to_check = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

 
Best AIC: 245.06126170005405
 
                           Logit Regression Results                           
Dep. Variable:     hasRiskAssessments   No. Observations:                  197
Model:                          Logit   Df Residuals:                      194
Method:                           MLE   Df Model:                            2
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1227
Time:                        09:49:13   Log-Likelihood:                -119.53
converged:                       True   LL-Null:                       -136.24
Covariance Type:                  HC3   LLR p-value:                 5.521e-08
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                    -2.7077      0.680     -3.984      0.000      -4.040      -1.376
population             8.857e-07   3.04e-07      2.914      0.004

##### b) Random Forest Classification

In [35]:
FitRandomForest(df_unfccc, y_var = 'hasRiskAssessments', X_vars = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome']) # ,'medianAge','whiteProp'

Unnamed: 0,Feature,Importance
0,population,0.27233
9,medianHouseholdIncome,0.174494
4,povertyProp,0.113269
8,lessThanHighSchoolProp,0.113111
3,unemploymentRate,0.100443
6,numDisasters,0.072884
5,tempDiff,0.072037
7,avgEmissionsPerCapita,0.06614
2,redState,0.008045
1,redCounty,0.007247


#### ix) hasClimateActionPlans

##### a) Logistic Regression

In [36]:
FitLogistic(df_unfccc, y_var = 'hasClimateActionPlans', X_vars = ['population','redCounty','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

                             Logit Regression Results                            
Dep. Variable:     hasClimateActionPlans   No. Observations:                  197
Model:                             Logit   Df Residuals:                      187
Method:                              MLE   Df Model:                            9
Date:                   Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1118
Time:                           09:49:14   Log-Likelihood:                -111.58
converged:                          True   LL-Null:                       -125.62
Covariance Type:                     HC3   LLR p-value:                 0.0009235
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -2.3055      1.953     -1.180      0.238      -6.133       1.522
population              1.011e-06   5.04e-07      2.008      0.045     

In [37]:
ForwardSelection(df = df_unfccc, y_var = 'hasClimateActionPlans', vars_to_check = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

 
Best AIC: 232.7526244018722
 
                             Logit Regression Results                            
Dep. Variable:     hasClimateActionPlans   No. Observations:                  197
Model:                             Logit   Df Residuals:                      193
Method:                              MLE   Df Model:                            3
Date:                   Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1054
Time:                           09:49:14   Log-Likelihood:                -112.38
converged:                          True   LL-Null:                       -125.62
Covariance Type:                     HC3   LLR p-value:                 7.517e-06
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -0.2797      0.846     -0.331      0.741      -1.938       1.378
population              1.039e-06   5.0

##### b) Random Forest Classification

In [38]:
FitRandomForest(df_unfccc, y_var = 'hasClimateActionPlans', X_vars = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome']) # ,'medianAge','whiteProp'

Unnamed: 0,Feature,Importance
0,population,0.312361
9,medianHouseholdIncome,0.129104
8,lessThanHighSchoolProp,0.110328
4,povertyProp,0.107527
3,unemploymentRate,0.086686
7,avgEmissionsPerCapita,0.079874
5,tempDiff,0.079063
6,numDisasters,0.067738
2,redState,0.01595
1,redCounty,0.011369


#### x) hasFinanceActions

##### a) Logistic Regression

In [39]:
FitLogistic(df_unfccc, y_var = 'hasFinanceActions', X_vars = ['population','redCounty','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

                           Logit Regression Results                           
Dep. Variable:      hasFinanceActions   No. Observations:                  197
Model:                          Logit   Df Residuals:                      187
Method:                           MLE   Df Model:                            9
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                  0.1239
Time:                        09:49:14   Log-Likelihood:                -46.467
converged:                       True   LL-Null:                       -53.041
Covariance Type:                  HC3   LLR p-value:                    0.1560
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -4.6114      3.559     -1.296      0.195     -11.586       2.363
population              2.886e-07   1.95e-07      1.481      0.139   -9.34e-08    6.71e-07
redC

In [40]:
ForwardSelection(df = df_unfccc, y_var = 'hasFinanceActions', vars_to_check = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

 
Best AIC: 104.05154001823144
 
                           Logit Regression Results                           
Dep. Variable:      hasFinanceActions   No. Observations:                  197
Model:                          Logit   Df Residuals:                      195
Method:                           MLE   Df Model:                            1
Date:                Wed, 17 Apr 2024   Pseudo R-squ.:                 0.05685
Time:                        09:49:14   Log-Likelihood:                -50.026
converged:                       True   LL-Null:                       -53.041
Covariance Type:                  HC3   LLR p-value:                   0.01406
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -2.7623      0.305     -9.061      0.000      -3.360      -2.165
population  2.215e-07   1.41e-07      1.567      0.117   -5.56e-08    4.99e-07


##### b) Random Forest Classification

In [41]:
FitRandomForest(df_unfccc, y_var = 'hasFinanceActions', X_vars = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome']) # ,'medianAge','whiteProp'

Unnamed: 0,Feature,Importance
0,population,0.289298
3,unemploymentRate,0.124743
9,medianHouseholdIncome,0.118085
5,tempDiff,0.113948
8,lessThanHighSchoolProp,0.108424
4,povertyProp,0.092659
6,numDisasters,0.088801
7,avgEmissionsPerCapita,0.045799
1,redCounty,0.009558
2,redState,0.008685
