# Regressions and Map Visualization

Date: 09/01/2022

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime as dt
%matplotlib inline

# Machine Learning
import scipy as sp
import scipy.stats as stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate

# Map
from PIL import Image
from urllib.request import urlopen
import json
import plotly.express as px
import kaleido

### 1. Read in NPP, GPP, Yield, and Weather Data

In [None]:
def read_concat(file_name, crop, start_year, end_year):
    """
    Read in a set of data sets and concatenate them into one dataframe.
    """
    cwd = os.path.dirname(os.getcwd())
    filename = 'Data_original\\' + file_name + str(start_year) + '_' + crop + '.csv'
    location = os.path.join(cwd, filename)
    output_df = pd.read_csv(location, header=0)

    for y in range(start_year+1, end_year+1):
        filename = 'Data_original\\' + file_name + str(y) + '_' + crop + '.csv'
        location = os.path.join(cwd, filename)
        df = pd.read_csv(location, header=0)
        output_df =  pd.concat([output_df, df], ignore_index=True)
        
    return output_df

In [None]:
# Read in yield data by county.
cwd = os.path.dirname(os.getcwd())
filename = 'Data_original\\corn_yield_data.csv'
location = os.path.join(cwd, filename)
yield_df = pd.read_csv(location, header=0)
yield_df = yield_df[['year', 'state_ansi', 'county_ansi', 'Value']].copy()

# Read in GPP data by county.
GPP_acc = read_concat('GPPintegral', 'Corn', 2001, 2020)
GPP_acc.drop(columns=['system:index', '.geo', 'QC_sum'], inplace=True)
GPP_acc.dropna(subset=['GPP_sum'], inplace=True)
GPP_acc.reset_index(drop=True, inplace=True)
corn_df = GPP_acc.merge(yield_df, 
                        left_on=['STATEFP', 'COUNTYFP', 'Year'], 
                        right_on=['state_ansi', 'county_ansi', 'year'], 
                        how='outer')

# Read in NPP data by county.
NPP_acc = read_concat('NPPintegral', 'Corn', 2001, 2019)
NPP_acc = NPP_acc[['STATEFP', 'COUNTYFP', 'Year', 'annualNPP_sum']]
NPP_acc.dropna(subset=['annualNPP_sum'], inplace=True)
NPP_acc.reset_index(drop=True, inplace=True)
corn_df = corn_df.merge(NPP_acc, 
                        on=['STATEFP', 'COUNTYFP', 'Year'], 
                        how='outer')

# Keep only useful variables
corn_df.Year.fillna(corn_df.year, inplace=True)
corn_df.COUNTYFP.fillna(corn_df.county_ansi, inplace=True)
corn_df.STATEFP.fillna(corn_df.state_ansi, inplace=True)
corn_df.drop(columns=['year', 'state_ansi', 'county_ansi'], inplace=True)
corn_df.columns = ['AFFGEOID', 'ALAND', 'AWATER', 'COUNTYFP', 'COUNTYNS', 'CropLand',
                   'GEOID', 'GPP_integral', 'LSAD', 'NAME', 'STATEFP', 'Year', 'Yield',
                   'NPP_integral']
corn_df.drop(columns=['AFFGEOID', 'ALAND', 'AWATER', 'COUNTYNS', 'LSAD'], 
             inplace=True)

# Read in weather data by county.
filename = 'Data_original\\corn_weather.csv'
location = os.path.join(cwd, filename)
weather_df_corn = pd.read_csv(location, header=0)
corn_df = corn_df.merge(weather_df_corn, 
                        left_on=['STATEFP', 'COUNTYFP', 'Year'], 
                        right_on=['statefp', 'countyfp', 'year'], 
                        how='left')

### 2.  Random Forest Regression
#### 2.1. CUE
##### 2.1.1. Parameter tuning

In [None]:
# Parameters
proportion_test = 0.4
seed_num = 159
# Tuning range
max_depth = range(5, 31)
num_trees = [100, 500, 1000]

# Split the data into training and test sets
var_list_rf = ['GPP_integral', 'NPP_integral', 'Yield', 
               'Year', 'prcp', 'gdd', 'edd']
df_rf = corn_df.loc[:, var_list_rf].dropna(
    subset=['GPP_integral', 'NPP_integral'], how='any').copy()
df_rf['CUE'] = df_rf['NPP_integral']/df_rf['GPP_integral']

# Mask the missing data
target = 'CUE'
missing_mask = ~ df_rf[target].isna()
df_rf = df_rf[missing_mask]
missing_mask = ~ df_rf['gdd'].isna()
df_rf = df_rf[missing_mask]
data_train, data_test = train_test_split(df_rf, 
                                         test_size=proportion_test, 
                                         random_state=seed_num)

In [None]:
# Use cross validation to find the optimal max depth for each tree
features = data_train.columns
features_select = list(set(features) - 
                       set([target, 'GPP_integral', 'NPP_integral', 'Yield']))
X = data_train[features_select].values
y = data_train[target].values
result_test = {}
result_train = {}
for t in num_trees:
    for i in max_depth:
        clf = RandomForestRegressor(n_estimators=t, 
                                    max_depth=i, 
                                    max_features="sqrt")
        scores = cross_validate(clf, X, y, 
                                cv=10, 
                                return_train_score=True)
        result_test[(i,t)] = scores['test_score'].mean()
        result_train[(i,t)] = scores['train_score'].mean()

In [None]:
result_test = pd.DataFrame(result_test, index=[0])
result_test = result_test.transpose()
result_train = pd.DataFrame(result_train, index=[0])
result_train = result_train.transpose()
optimal_depth = result_test.idxmax()[0][0]
optimal_num_trees = result_test.idxmax()[0][1]
r1 = pd.DataFrame(result_test).reset_index()
r2 = pd.DataFrame(result_train).reset_index()
r1.columns = ['max_depth', 'num_trees', 'Rsquared']
r2.columns = ['max_depth', 'num_trees', 'Rsquared']

In [None]:
fig, ax = plt.subplots(1, figsize=(8,6))
plt.plot(r2["max_depth"], r2["Rsquared"], 
         'o',color='blue',label='Train')
plt.plot(r1["max_depth"], r1["Rsquared"], 
         'o',color='red',label='Test')
plt.xlabel('Max depth')
plt.ylabel('Average R-squared')
plt.legend()
filename = 'figures\\training_max_depth_corn_cue.png'
location = os.path.join(cwd, filename)
fig.savefig(location)

In [None]:
fig, ax = plt.subplots(1, figsize=(8,6))
plt.plot(r2["num_trees"], r2["Rsquared"], 
         'o',color='blue',label='Train')
plt.plot(r1["num_trees"], r1["Rsquared"], 
         'o',color='red',label='Test')
plt.xlabel('num_trees')
plt.ylabel('Average R-squared')
plt.legend()
filename = 'figures\\training_num_trees_corn_cue.png'
location = os.path.join(cwd, filename)
fig.savefig(location)

##### 2.1.2. Random forest model using tuned parameters

In [None]:
optimal_num_trees

In [None]:
optimal_depth

In [None]:
# Using the optimal max depth, we fit a random forest model 
# with the training data set, and get the test accuracy.
test_X = data_test[features_select].values
test_y = data_test[target].values
clf = RandomForestRegressor(n_estimators=optimal_num_trees, 
                            max_depth=optimal_depth, 
                            max_features="sqrt")
clf.fit(X,y)

# Test accuracy
train_score = clf.score(X,y)
hold_score = clf.score(test_X, test_y)
print('Training R squared: {:.2f}'.format(train_score))
print('Testing R squared: {:.2f}'.format(hold_score))

In [None]:
predict_test = clf.predict(test_X)
predict_train = clf.predict(X)
df_test = pd.DataFrame(predict_test)
df_train = pd.DataFrame(predict_train)
df_test.columns = ["predicted_value"]
df_train.columns = ["predicted_value"]
df_test['true_value'] = test_y
df_train['true_value'] = y
df_test['dataset'] = 'test'
df_train['dataset'] = 'train'
df_figure = pd.concat([df_test, df_train], ignore_index=True)
RMSE_train = round(np.sqrt(np.mean(np.power(
    df_train['true_value']-df_train['predicted_value'],2))),2)
RMSE_test = round(np.sqrt(np.mean(
    np.power(df_test['true_value']-df_test['predicted_value'],2))),2)

In [None]:
outputfig, axs = plt.subplots(2, 2, figsize=(20, 16))
axs[1, 0].scatter(df_train['true_value'], 
            df_train['predicted_value'], 
            color='blue', 
            label='training data', 
            alpha=0.2)
axs[1, 0].scatter(df_test['true_value'], 
            df_test['predicted_value'], 
            color='red', 
            label='testing data', 
            alpha=0.2)
axs[1, 0].set_xlabel('True Value', fontsize = 25)
axs[1, 0].set_ylabel('Predicted Value', fontsize = 25)
axs[1, 0].legend(loc='lower right', fontsize = 25)
axs[1, 0].set_title('(b-1) CUE True Values Vs. Predictions', 
                    fontsize = 25)
axs[1, 0].plot([0, 1.5], [0, 1.5], color = 'black', linewidth = 2)
axs[1, 0].text(-0.05, 1.12, 'R-squared for training data: '+ 
              str(round(train_score,4)) +
              '\nRMSE for training data: ' +
              str(RMSE_train) +
              '\nR-squared for testing data: ' +
              str(round(hold_score,4)) +
              '\nRMSE for testing data: ' +
              str(RMSE_test), fontsize=25)
plt.rc('xtick', labelsize=25)
plt.rc('ytick', labelsize=25)

In [None]:
# Feature importances
importances = clf.feature_importances_
idx = np.argsort(importances)
axs[1, 1].set_title('(b-2) CUE Feature importance', 
                    fontsize = 25)
axs[1, 1].barh(range(len(idx)), 
               importances[idx], 
               color='blue')
axs[1, 1].set_yticks(range(len(idx)), 
                     [features_select[i] for i in idx])
axs[1, 1].set_xlabel('Mean Impurity Decrease within Each Tree', fontsize = 25)
axs[1, 1].set_ylabel('Feature', fontsize = 25)
outputfig

#### 2.2. Yield
##### 2.2.1. Parameter tuning

In [None]:
# Parameters
proportion_test = 0.4
seed_num = 1237
# Tuning range
max_depth = range(5, 31)
num_trees = [100, 500, 1000]

# Split the data into training and test sets
var_list_rf = ['GPP_integral', 'NPP_integral', 'Yield', 
               'Year', 'prcp', 'gdd', 'edd']
df_rf = corn_df.loc[:, var_list_rf].dropna(
    subset=['GPP_integral', 'NPP_integral'], how='any').copy()

# Mask the missing data
target = 'Yield'
missing_mask = ~ df_rf[target].isna()
df_rf = df_rf[missing_mask]
missing_mask = ~ df_rf['gdd'].isna()
df_rf = df_rf[missing_mask]
data_train, data_test = train_test_split(df_rf, 
                                         test_size=proportion_test, 
                                         random_state=seed_num)

In [None]:
# Use cross validation to find the optimal max depth for each tree
features = data_train.columns
features_select = list(set(features) - 
                       set([target, 'GPP_integral', 'NPP_integral']))
X = data_train[features_select].values
y = data_train[target].values
result_test = {}
result_train = {}
for t in num_trees:
    for i in max_depth:
        clf = RandomForestRegressor(n_estimators=t, 
                                    max_depth=i, 
                                    max_features="sqrt")
        scores = cross_validate(clf, X, y, 
                                cv=10, 
                                return_train_score=True)
        result_test[(i,t)] = scores['test_score'].mean()
        result_train[(i,t)] = scores['train_score'].mean()

In [None]:
result_test = pd.DataFrame(result_test, index=[0])
result_test = result_test.transpose()
result_train = pd.DataFrame(result_train, index=[0])
result_train = result_train.transpose()
optimal_depth = result_test.idxmax()[0][0]
optimal_num_trees = result_test.idxmax()[0][1]
r1 = pd.DataFrame(result_test).reset_index()
r2 = pd.DataFrame(result_train).reset_index()
r1.columns = ['max_depth', 'num_trees', 'Rsquared']
r2.columns = ['max_depth', 'num_trees', 'Rsquared']

In [None]:
fig, ax = plt.subplots(1, figsize=(8,6))
plt.plot(r2["max_depth"], r2["Rsquared"], 
         'o',color='blue',label='Train')
plt.plot(r1["max_depth"], r1["Rsquared"], 
         'o',color='red',label='Test')
plt.xlabel('Max depth')
plt.ylabel('Average R-squared')
plt.legend()
filename = 'figures\\training_max_depth_corn_yield.png'
location = os.path.join(cwd, filename)
fig.savefig(location)

In [None]:
fig, ax = plt.subplots(1, figsize=(8,6))
plt.plot(r2["num_trees"], r2["Rsquared"], 
         'o',color='blue',label='Train')
plt.plot(r1["num_trees"], r1["Rsquared"], 
         'o',color='red',label='Test')
plt.xlabel('num_trees')
plt.ylabel('Average R-squared')
plt.legend()
filename = 'figures\\training_num_trees_corn_yield.png'
location = os.path.join(cwd, filename)
fig.savefig(location)

##### 2.2.2. Random forest model using tuned parameters

In [None]:
optimal_num_trees

In [None]:
optimal_depth

In [None]:
# Using the optimal max depth, we fit a random forest model 
# with the training data set, and get the test accuracy.
test_X = data_test[features_select].values
test_y = data_test[target].values
clf = RandomForestRegressor(n_estimators=optimal_num_trees, 
                            max_depth=optimal_depth, 
                            max_features="sqrt")
clf.fit(X,y)

# Test accuracy
train_score = clf.score(X,y)
hold_score = clf.score(test_X, test_y)
print('Training R squared: {:.2f}'.format(train_score))
print('Testing R squared: {:.2f}'.format(hold_score))

In [None]:
predict_test = clf.predict(test_X)
predict_train = clf.predict(X)
df_test = pd.DataFrame(predict_test)
df_train = pd.DataFrame(predict_train)
df_test.columns = ["predicted_value"]
df_train.columns = ["predicted_value"]
df_test['true_value'] = test_y
df_train['true_value'] = y
df_test['dataset'] = 'test'
df_train['dataset'] = 'train'
df_figure = pd.concat([df_test, df_train], ignore_index=True)
RMSE_train = round(np.sqrt(np.mean(np.power(
    df_train['true_value']-df_train['predicted_value'],2))),2)
RMSE_test = round(np.sqrt(np.mean(
    np.power(df_test['true_value']-df_test['predicted_value'],2))),2)

In [None]:
axs[0, 0].scatter(df_train['true_value'], 
            df_train['predicted_value'], 
            color='blue', 
            label='training data', 
            alpha=0.2)
axs[0, 0].scatter(df_test['true_value'], 
            df_test['predicted_value'], 
            color='red', 
            label='testing data', 
            alpha=0.2)
axs[0, 0].set_xlabel('True Value (Bushels/Acre)', fontsize = 25)
axs[0, 0].set_ylabel('Predicted Value (Bushels/Acre)', fontsize = 25)
axs[0, 0].legend(loc='lower right', fontsize = 25)
axs[0, 0].set_title('(a-1) Yield True Values Vs. Predictions', 
                    fontsize = 25)
axs[0, 0].plot([0, 250], [0, 250], color = 'black', linewidth = 2)
axs[0, 0].text(-8, 185, 'R-squared for training data: '+ 
              str(round(train_score,4)) +
              '\nRMSE for training data: ' +
              str(RMSE_train) +
              '\nR-squared for testing data: ' +
              str(round(hold_score,4)) +
              '\nRMSE for testing data: ' +
              str(RMSE_test), fontsize=25)
outputfig

In [None]:
# Feature importances
importances = clf.feature_importances_
idx = np.argsort(importances)
axs[0, 1].set_title('(a-2) Yield Feature importance', 
                    fontsize = 25)
axs[0, 1].barh(range(len(idx)), 
               importances[idx], 
               color='blue')
axs[0, 1].set_yticks(range(len(idx)), 
                     [features_select[i] for i in idx])
axs[0, 1].set_xlabel('Mean Impurity Decrease within Each Tree', fontsize = 25)
axs[0, 1].set_ylabel('Feature', fontsize = 25)
outputfig

In [None]:
axs[0,0].tick_params(axis='both', labelsize=25)
axs[0,1].tick_params(axis='both', labelsize=25)
axs[1,0].tick_params(axis='both', labelsize=25)
axs[1,1].tick_params(axis='both', labelsize=25)
outputfig.subplots_adjust(wspace=0.2, hspace=0.3)
outputfig

In [None]:
filename = 'figures\\figure_3.png'
location = os.path.join(cwd, filename)
outputfig.savefig(location)

### 3. Map Visualization
#### 3.1. Map Preperation

In [None]:
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

#### 3.2. Data Preperation

In [None]:
var_list_yield = ['STATEFP', 'COUNTYFP', 'Yield', 
                  'Year', 'prcp', 'gdd', 'edd']
corn_df_yield = corn_df.loc[:, var_list_yield].dropna(subset=['Yield'])
corn_df_yield = corn_df_yield.loc[corn_df_yield.Year>2000]
corn_counts = pd.DataFrame(corn_df_yield.groupby(['STATEFP', 'COUNTYFP']).size(), 
                           columns=['ct']).reset_index()
corn_df_yield = corn_df_yield.merge(corn_counts, 
                                    on=['STATEFP', 'COUNTYFP'], 
                                    how='left')
corn_df_yield = corn_df_yield.loc[corn_df_yield.ct>10,:]
corn_df_yield['fips'] = corn_df_yield.COUNTYFP + corn_df_yield.STATEFP*1000
corn_df_yield['fips'] = ['{0:05}'.format(int(x)) 
                         for x in corn_df_yield['fips']]

In [None]:
var_list_cue = ['STATEFP', 'COUNTYFP', 'GPP_integral', 
                'NPP_integral', 'Year', 'prcp', 'gdd', 'edd']
corn_df_cue = corn_df.loc[:, var_list_cue].dropna(
    subset=['GPP_integral', 'NPP_integral'], how='any')
corn_counts = pd.DataFrame(corn_df_cue.groupby(['STATEFP', 'COUNTYFP']).size(), 
                           columns=['ct']).reset_index()
corn_df_cue = corn_df_cue.merge(corn_counts, 
                                on=['STATEFP', 'COUNTYFP'], 
                                how='left')
corn_df_cue = corn_df_cue.loc[corn_df_cue.ct>10]
corn_df_cue['CUE'] = corn_df_cue['NPP_integral']/corn_df_cue['GPP_integral']
corn_df_cue['fips'] = corn_df_cue.COUNTYFP + corn_df_cue.STATEFP*1000
corn_df_cue['fips'] = ['{0:05}'.format(int(x)) 
                       for x in corn_df_cue['fips']]

#### 3.3. Corn CUE and yield visualization

In [None]:
corn_yield_map = pd.DataFrame(corn_df_yield.groupby('fips').mean())
corn_yield_map.reset_index(inplace=True)

In [None]:
corn_cue_map = pd.DataFrame(corn_df_cue.groupby('fips').mean())
corn_cue_map.reset_index(inplace=True)

In [None]:
corn_cue_map.CUE.max()

In [None]:
fig = px.choropleth(corn_cue_map.loc[corn_cue_map.CUE>0,:], 
                    geojson=counties, locations='fips', color='CUE',
                    color_continuous_scale="YlGn",
                    range_color=(0.388, 0.683),
                    scope="usa",
                    labels={'CUE':'CUE'}
                   )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_layout(font=dict(size=20, color='black'))
fig.show()
filename = 'figures\\CUE.png'
location = os.path.join(cwd, filename)
fig.write_image(location)

In [None]:
fig = px.choropleth(corn_yield_map.loc[corn_yield_map.Yield>0,:], geojson=counties, locations='fips', color='Yield',
                           color_continuous_scale="YlGn",
                           range_color=(46, 205),
                           scope="usa",
                           labels={'Yield':'Yield'}
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_layout(font=dict(size=20, color='black'))
fig.show()
filename = 'figures\\yield.png'
location = os.path.join(cwd, filename)
fig.write_image(location)

In [None]:
filename = 'figures\\CUE.png'
location = os.path.join(cwd, filename)
img1 = Image.open(location)

filename = 'figures\\GDD.png'
location = os.path.join(cwd, filename)
img2 = Image.open(location)

filename = 'figures\\prcp.png'
location = os.path.join(cwd, filename)
img3 = Image.open(location)

filename = 'figures\\EDD.png'
location = os.path.join(cwd, filename)
img4 = Image.open(location)

fig, axs = plt.subplots(2, 2,
                        figsize=(20, 16))

axs[0, 0].set_title('(a) CUE', fontsize = 25)
axs[0, 0].imshow(img1)
axs[0, 0].axis('off')

axs[0, 1].set_title('(b) GDD', fontsize = 25)
axs[0, 1].imshow(img2)
axs[0, 1].axis('off')

axs[1, 0].set_title('(c) Precipitation', fontsize = 25)
axs[1, 0].imshow(img3)
axs[1, 0].axis('off')

axs[1, 1].set_title('(d) EDD', fontsize = 25)
axs[1, 1].imshow(img4)
axs[1, 1].axis('off')

fig.tight_layout()
plt.show()

In [None]:
filename = 'figures\\figure_2.png'
location = os.path.join(cwd, filename)
fig.savefig(location)

#### 3.4. Corn CUE and yield responses to weather variables visualization

##### 3.4.1. Regressions

In [None]:
corn_yield_coef = []

for i in corn_df_yield.fips.unique():
    d = {}
    df = corn_df_yield.loc[corn_df_yield.fips==i, ['Yield','edd','gdd','prcp']]
    mod = smf.ols(formula='Yield ~ edd + gdd + prcp', \
              data=df, missing='drop')
    res = mod.fit()
    d['fips'] = i
    d['edd'] = res.params.edd
    d['gdd'] = res.params.gdd
    d['prcp'] = res.params.prcp
    d['edd_p'] = res.pvalues.edd
    d['gdd_p'] = res.pvalues.gdd
    d['prcp_p'] = res.pvalues.prcp
    corn_yield_coef.append(d)

corn_yield_coef = pd.DataFrame(corn_yield_coef)

In [None]:
corn_cue_coef = []

for i in corn_df_cue.fips.unique():
    d = {}
    df = corn_df_cue.loc[corn_df_cue.fips==i, ['CUE','edd','gdd','prcp']]
    mod = smf.ols(formula='CUE ~ edd + gdd + prcp', \
              data=df, missing='drop')
    res = mod.fit()
    d['fips'] = i
    d['edd'] = res.params.edd
    d['gdd'] = res.params.gdd
    d['prcp'] = res.params.prcp
    d['edd_p'] = res.pvalues.edd
    d['gdd_p'] = res.pvalues.gdd
    d['prcp_p'] = res.pvalues.prcp
    corn_cue_coef.append(d)

corn_cue_coef = pd.DataFrame(corn_cue_coef)

In [None]:
Corn_CUE_Yield_coef = corn_cue_coef.merge(corn_yield_coef, on=['fips'], suffixes=('_CUE', '_yield'), how='inner')

In [None]:
for i in range(len(Corn_CUE_Yield_coef)):
    if Corn_CUE_Yield_coef.loc[i,'gdd_yield']>0 and Corn_CUE_Yield_coef.loc[i,'gdd_CUE']>0:
        Corn_CUE_Yield_coef.loc[i,'gdd_category'] = '++'
    elif Corn_CUE_Yield_coef.loc[i,'gdd_yield']>0 and Corn_CUE_Yield_coef.loc[i,'gdd_CUE']<0:
        Corn_CUE_Yield_coef.loc[i,'gdd_category'] = '+-'
    elif Corn_CUE_Yield_coef.loc[i,'gdd_yield']<0 and Corn_CUE_Yield_coef.loc[i,'gdd_CUE']>0:
        Corn_CUE_Yield_coef.loc[i,'gdd_category'] = '-+'
    else:
        Corn_CUE_Yield_coef.loc[i,'gdd_category'] = '--'

In [None]:
fig = px.choropleth(Corn_CUE_Yield_coef, geojson=counties, locations='fips', color='gdd_category',
                    color_discrete_map={'++':'red', '--':'blue', '+-':'yellow', '-+':'#00CC96'},
                    scope="usa",
                    labels={'gdd_category':'Yield/CUE'}
                    )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_layout(font=dict(size=20, color='black'))
fig.show()
filename = 'figures\\gdd_yield_cue.png'
location = os.path.join(cwd, filename)
fig.write_image(location)

In [None]:
for i in range(len(Corn_CUE_Yield_coef)):
    if Corn_CUE_Yield_coef.loc[i,'prcp_yield']>0 and Corn_CUE_Yield_coef.loc[i,'prcp_CUE']>0:
        Corn_CUE_Yield_coef.loc[i,'prcp_category'] = '++'
    elif Corn_CUE_Yield_coef.loc[i,'prcp_yield']>0 and Corn_CUE_Yield_coef.loc[i,'prcp_CUE']<0:
        Corn_CUE_Yield_coef.loc[i,'prcp_category'] = '+-'
    elif Corn_CUE_Yield_coef.loc[i,'prcp_yield']<0 and Corn_CUE_Yield_coef.loc[i,'prcp_CUE']>0:
        Corn_CUE_Yield_coef.loc[i,'prcp_category'] = '-+'
    else:
        Corn_CUE_Yield_coef.loc[i,'prcp_category'] = '--'

In [None]:
fig = px.choropleth(Corn_CUE_Yield_coef, geojson=counties, locations='fips', color='prcp_category',
                    color_discrete_map={'++':'red', '--':'blue', '+-':'yellow', '-+':'#00CC96'},
                    scope="usa",
                    labels={'prcp_category':'Yield/CUE'}
                    )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_layout(font=dict(size=20, color='black'))
fig.show()
filename = 'figures\\prcp_yield_cue.png'
location = os.path.join(cwd, filename)
fig.write_image(location)

In [None]:
for i in range(len(Corn_CUE_Yield_coef)):
    if abs(Corn_CUE_Yield_coef.loc[i,'gdd_p_CUE'])<0.1 and abs(Corn_CUE_Yield_coef.loc[i,'gdd_p_yield'])<0.1:
        Corn_CUE_Yield_coef.loc[i,'sig_category'] = 'both'
    elif abs(Corn_CUE_Yield_coef.loc[i,'gdd_p_CUE'])<0.1:
        Corn_CUE_Yield_coef.loc[i,'sig_category'] = 'CUE'
    elif abs(Corn_CUE_Yield_coef.loc[i,'gdd_p_yield'])<0.1:
        Corn_CUE_Yield_coef.loc[i,'sig_category'] = 'yield'
    else:
        Corn_CUE_Yield_coef.loc[i,'sig_category'] = np.nan

In [None]:
fig = px.choropleth(Corn_CUE_Yield_coef.dropna(subset=['sig_category']), 
                    geojson=counties, 
                    locations='fips', 
                    color='sig_category',
                    color_discrete_map={'both':'purple', 
                                        'CUE':'blue', 
                                        'yield':'red'},
                    scope="usa",
                    labels={'sig_category':'Significance'}
                    )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_layout(font=dict(size=20, color='black'))
fig.show()
filename = 'figures\\sig_gdd_yield_cue.png'
location = os.path.join(cwd, filename)
fig.write_image(location)

In [None]:
for i in range(len(Corn_CUE_Yield_coef)):
    if abs(Corn_CUE_Yield_coef.loc[i,'prcp_p_CUE'])<0.2 and abs(Corn_CUE_Yield_coef.loc[i,'prcp_p_yield'])<0.2:
        Corn_CUE_Yield_coef.loc[i,'sig_category_prcp'] = 'both'
    elif abs(Corn_CUE_Yield_coef.loc[i,'prcp_p_CUE'])<0.2:
        Corn_CUE_Yield_coef.loc[i,'sig_category_prcp'] = 'CUE'
    elif abs(Corn_CUE_Yield_coef.loc[i,'prcp_p_yield'])<0.2:
        Corn_CUE_Yield_coef.loc[i,'sig_category_prcp'] = 'yield'
    else:
        Corn_CUE_Yield_coef.loc[i,'sig_category_prcp'] = np.nan

In [None]:
fig = px.choropleth(Corn_CUE_Yield_coef.dropna(subset=['sig_category_prcp']), 
                    geojson=counties, 
                    locations='fips', 
                    color='sig_category_prcp',
                    color_discrete_map={'both':'purple', 
                                        'CUE':'blue', 
                                        'yield':'red'},
                    scope="usa",
                    labels={'sig_category_prcp':'Significance'}
                    )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_layout(font=dict(size=20, color='black'))
fig.show()
filename = 'figures\\sig_prcp_yield_cue.png'
location = os.path.join(cwd, filename)
fig.write_image(location)

In [None]:
filename = 'figures\\gdd_yield_cue.png'
location = os.path.join(cwd, filename)
img1 = Image.open(location)

filename = 'figures\\prcp_yield_cue.png'
location = os.path.join(cwd, filename)
img2 = Image.open(location)

filename = 'figures\\sig_gdd_yield_cue.png'
location = os.path.join(cwd, filename)
img3 = Image.open(location)

filename = 'figures\\sig_prcp_yield_cue.png'
location = os.path.join(cwd, filename)
img4 = Image.open(location)

fig, axs = plt.subplots(2, 2,
                        figsize=(20, 16))

axs[0, 0].set_title('(a) Yield and CUE vs. GDD', fontsize = 25)
axs[0, 0].imshow(img1)
axs[0, 0].axis('off')

axs[0, 1].set_title('(b) Yield and CUE vs precipitation', fontsize = 25)
axs[0, 1].imshow(img2)
axs[0, 1].axis('off')

axs[1, 0].set_title('(c) GDD significance level', fontsize = 25)
axs[1, 0].imshow(img3)
axs[1, 0].axis('off')

axs[1, 1].set_title('(d) Precipitation significance level', fontsize = 25)
axs[1, 1].imshow(img4)
axs[1, 1].axis('off')

fig.tight_layout()
plt.show()

In [None]:
filename = 'figures\\figure_4.png'
location = os.path.join(cwd, filename)
fig.savefig(location)