# Econometric UNFCCC Green Cities Commitment Analysis: USA
## Econometric Analysis

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.ensemble import RandomForestClassifier

  from .autonotebook import tqdm as notebook_tqdm


### 1. Load and Prepare Data

In [2]:
df_all = pd.read_csv('allcities.csv')
df_unfccc = pd.read_csv('UNFCCC.csv')

In [3]:
def BooltoZeroOne(df):
    for column in df.columns:
        if df[column].dtype == bool:
            df[column] = df[column].astype(int)

In [4]:
BooltoZeroOne(df_all)
BooltoZeroOne(df_unfccc)

### 2. Define Model Fitting Functions

In [60]:
def FitLogistic(df, y_var, X_vars, format = 'return', constant = True):
    df = df.dropna()

    X = df[X_vars]
    if constant:
        X = sm.add_constant(X)

    y = df[y_var]

    model = sm.Logit(y, X).fit(cov_type = 'HC3', disp = False)

    if format == 'return'
        return model.summary()
    elif format == 'print':
        print(model.summary())
    elif format == 'model':
        return model

In [75]:
def FitRandomForest(df, y_var, X_vars, format = 'importance'):
    df = df.dropna()

    X = df[X_vars]

    y = df[y_var]

    rf = RandomForestClassifier()
    model = rf.fit(X, y)

    if format == 'importance':

        return pd.DataFrame({
            'Feature':X_vars,
            'Importance':model.feature_importances_
        }).sort_values(by = 'Importance', ascending = False)
    
    elif format == 'model':
        return model

### 3. Analysing Determinants of Cities to Sign up to the UNFCC

##### a) Logistic Regression

In [63]:
# All variables on their own versus UNFCCC
df = pd.DataFrame(columns = ['Variable','Coefficient','P-Value','AIC','BIC'])
vars = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome']
for i in range(len(vars)):
    lr = FitLogistic(df_all, y_var = 'UNFCCC', X_vars = [vars[i]], format = 'model')
    df = pd.concat([df, pd.DataFrame([[vars[i], lr.params.iloc[1], lr.pvalues.iloc[1], lr.aic, lr.bic]], columns =  ['Variable','Coefficient','P-Value','AIC','BIC']).dropna(axis = 1)], ignore_index=True)    
df

  df = pd.concat([df, pd.DataFrame([[vars[i], lr.params.iloc[1], lr.pvalues.iloc[1], lr.aic, lr.bic]], columns =  ['Variable','Coefficient','P-Value','AIC','BIC']).dropna(axis = 1)], ignore_index=True)


Unnamed: 0,Variable,Coefficient,P-Value,AIC,BIC
0,population,5e-06,7.914043e-07,1633.0582,1648.228343
1,redCounty,-2.241665,1.4957209999999998e-19,1948.025817,1963.195959
2,redState,-0.692852,5.979156e-06,2068.451332,2083.621475
3,unemploymentRate,-19.445843,0.002839589,2082.614319,2097.784461
4,povertyProp,-1.460479,0.2475538,2089.316769,2104.486912
5,tempDiff,-0.136376,0.03557506,2086.179931,2101.350074
6,numDisasters,0.002648,0.00190895,2080.883574,2096.053717
7,avgEmissionsPerCapita,-0.081271,2.026106e-06,2046.253409,2061.423552
8,lessThanHighSchoolProp,-4.10023,0.007179528,2083.070827,2098.24097
9,medianHouseholdIncome,1.7e-05,3.400496e-11,2063.615413,2078.785556


In [67]:
# All variables versus UNFCCC
FitLogistic(df_all, y_var = 'UNFCCC', X_vars = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

                           Logit Regression Results                           
Dep. Variable:                 UNFCCC   No. Observations:                14546
Model:                          Logit   Df Residuals:                    14535
Method:                           MLE   Df Model:                           10
Date:                Mon, 15 Apr 2024   Pseudo R-squ.:                  0.2865
Time:                        13:16:24   Log-Likelihood:                -744.24
converged:                       True   LL-Null:                       -1043.1
Covariance Type:                  HC3   LLR p-value:                5.273e-122
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -2.2981      1.065     -2.157      0.031      -4.386      -0.210
population              5.141e-06   1.11e-06      4.612      0.000    2.96e-06    7.33e-06
redC

In [77]:
# Forward Selection
best_model = None
best_aic = float('inf')
vars_to_check = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome']
vars_selected = []

while vars_to_check:
    potential_vars = []
    for var in vars_to_check:
        vars_current = vars_selected + [var]
        model = FitLogistic(df_all, y_var = 'UNFCCC', X_vars = vars_current, format = 'model')
        aic = model.aic
        if aic < best_aic:
            best_aic = aic
            best_model = model
            potential_vars = vars_current
    
    if not potential_vars:
        break

    var_selected = potential_vars[-1]
    vars_selected.append(var_selected)
    vars_to_check.remove(var_selected)

print(best_model.summary())


                           Logit Regression Results                           
Dep. Variable:                 UNFCCC   No. Observations:                14546
Model:                          Logit   Df Residuals:                    14537
Method:                           MLE   Df Model:                            8
Date:                Mon, 15 Apr 2024   Pseudo R-squ.:                  0.2865
Time:                        13:32:26   Log-Likelihood:                -744.29
converged:                       True   LL-Null:                       -1043.1
Covariance Type:                  HC3   LLR p-value:                7.418e-124
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -2.3051      0.473     -4.871      0.000      -3.233      -1.378
population              5.142e-06   1.12e-06      4.595      0.000    2.95e-06    7.34e-06
redC

##### b) Random Forest Classification

In [6]:
FitRandomForest(df_all, y_var = 'UNFCCC', X_vars = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'])  

Unnamed: 0,Feature,Importance
0,population,0.626577
8,lessThanHighSchoolProp,0.077023
9,medianHouseholdIncome,0.072769
4,povertyProp,0.064666
3,unemploymentRate,0.053076
7,avgEmissionsPerCapita,0.033541
5,tempDiff,0.029442
6,numDisasters,0.028116
1,redCounty,0.009978
2,redState,0.004812


### 4. Analysing Determinants of Cities to Undertake Individual Actions Recorded by the UNFCCC

#### i) Has Commitments

##### a) Logistic Regression

In [74]:
FitLogistic(df_unfccc, y_var = 'hasCommitments', X_vars = ['population','redCounty','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome'], format = 'print')

                           Logit Regression Results                           
Dep. Variable:         hasCommitments   No. Observations:                  197
Model:                          Logit   Df Residuals:                      187
Method:                           MLE   Df Model:                            9
Date:                Mon, 15 Apr 2024   Pseudo R-squ.:                  0.1520
Time:                        13:30:05   Log-Likelihood:                -98.116
converged:                       True   LL-Null:                       -115.70
Covariance Type:                  HC3   LLR p-value:                 5.569e-05
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -0.1391      2.187     -0.064      0.949      -4.426       4.148
population              1.996e-06   5.72e-07      3.489      0.000    8.74e-07    3.12e-06
redC

##### b) Random Forest Classification

In [7]:
FitRandomForest(df_unfccc, y_var = 'hasCommitments', X_vars = ['population','redCounty','redState','unemploymentRate','povertyProp','tempDiff','numDisasters','avgEmissionsPerCapita','lessThanHighSchoolProp','medianHouseholdIncome']) # ,'medianAge','whiteProp'

Unnamed: 0,Feature,Importance
0,population,0.284352
9,medianHouseholdIncome,0.147056
8,lessThanHighSchoolProp,0.123839
4,povertyProp,0.117844
3,unemploymentRate,0.106948
7,avgEmissionsPerCapita,0.079041
5,tempDiff,0.064139
6,numDisasters,0.057886
1,redCounty,0.009651
2,redState,0.009245


#### ii) Has Actions Undertaken

#### iii) hasEmissionInventory

#### iv) hasInitiativeParticipations
#### v) hasImpact
#### vi) hasMitigations
#### vii) hasAdaptations
#### viii) hasRiskAssessments
#### ix) hasClimateActionPlans
#### x) hasFinanceActions