# Explore Results from Infrastructure Model for Use in Deployment

* Currently covers China only

## Import Libraries

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

## Input/output Files and Parameters

### Input Files

* 10km CS macro grid - with waterbody parameter
* 10km CS macro grid - without waterbody parameter
* Separated for cement and steel

In [None]:
macro_10km_cement_water_shp = "../../resources/nt-model/10km_CS_revised/macroloc_cement_CHN_10_correct.shp"
macro_10km_cement_nowater_shp = "../../resources/nt-model/10km_CS_revised/macroloc_cement_CHN_10_correct1.shp"
macro_10km_steel_water_shp = "../../resources/nt-model/10km_CS_revised/macroloc_steel_CHN_10_correct.shp"
macro_10km_steel_nowater_shp = "../../resources/nt-model/10km_CS_revised/macroloc_steel_CHN_10_correct1.shp"

* Cement and steel plants with exact and approximate locations

In [None]:
cement_ext_gjson = "../../resources/nt-model/cement_exact_china_v4.1.geojson"
cement_appx_gjson = "../../resources/nt-model/cement_approximate_china_v4.1.geojson"
steel_ext_gjson = "../../resources/nt-model/steel_exact_china_v4.1.geojson"
steel_appx_gjson = "../../resources/nt-model/steel_approximate_china_v4.1.geojson"

### Parameters

In [None]:
calc_crs = "EPSG:3395"

china_area = 9326410.0  #sq km

l8_chip_size = 35  #pixels
l8_gsd = 0.03  #km

s2_chip_size = 300  #pixels
s2_gsd = 0.01  #km

## Read in Results - Cement

#### With Waterbodies

In [None]:
macro_10km_cement_water_gdf = gpd.read_file(macro_10km_cement_water_shp)

In [None]:
macro_10km_cement_water_gdf.head(10)

In [None]:
print("CRS: ", macro_10km_cement_water_gdf.crs)
print("Number of grid cells in 10km CS Macro, Cement, with Waterbodies: ", len(macro_10km_cement_water_gdf))
print("Count of unique index: ", macro_10km_cement_water_gdf.index.nunique())

#### Without Waterbodies

In [None]:
macro_10km_cement_nowater_gdf = gpd.read_file(macro_10km_cement_nowater_shp)

In [None]:
macro_10km_cement_nowater_gdf.head(10)

In [None]:
print("CRS: ", macro_10km_cement_nowater_gdf.crs)
print("Number of grid cells in 10km CS Macro, Cement, without Waterbodies: ", len(macro_10km_cement_nowater_gdf))
print("Count of unique index: ", macro_10km_cement_nowater_gdf.index.nunique())

## Read in Results - Steel

#### With Waterbodies

In [None]:
macro_10km_steel_water_gdf = gpd.read_file(macro_10km_steel_water_shp)

In [None]:
macro_10km_steel_water_gdf.head(10)

In [None]:
print("CRS: ", macro_10km_steel_water_gdf.crs)
print("Number of grid cells in 10km CS Macro, Steel, with Waterbodies: ", len(macro_10km_steel_water_gdf))
print("Count of unique index: ", macro_10km_steel_water_gdf.index.nunique())

#### Without Waterbodies

In [None]:
macro_10km_steel_nowater_gdf = gpd.read_file(macro_10km_steel_nowater_shp)

In [None]:
macro_10km_steel_nowater_gdf.head(10)

In [None]:
print("CRS: ", macro_10km_steel_nowater_gdf.crs)
print("Number of grid cells in 10km CS Macro, Steel, without Waterbodies: ", len(macro_10km_steel_nowater_gdf))
print("Count of unique index: ", macro_10km_steel_nowater_gdf.index.nunique())

## Read in Cement and Steel Plant Data

In [None]:
cement_ext_gdf = gpd.read_file(cement_ext_gjson)
cement_appx_gdf = gpd.read_file(cement_appx_gjson)

### Steel Plants

In [None]:
steel_ext_gdf = gpd.read_file(steel_ext_gjson)
steel_appx_gdf = gpd.read_file(steel_appx_gjson)

## Examine Model Accuracy and Deployment Region Considerations

Questions to answer:
1. What fraction of plants with exact locations are within the grid cells? Approximates recall.
2. What fraction of plants with exact locations are not within the grid cells? Approximates false-negative rate.
3. How much total area would we have to survey?
4. How many Landsat-8 chips would need to be created (approximate only)?
5. How many Sentinel-2 chips would need to be created (approximate only)?

In [None]:
l8_chip_area = pow(l8_gsd * l8_chip_size,2)  #sq km
s2_chip_area = pow(s2_gsd * s2_chip_size,2)  #sq km

### Macro 10km Grid - with Waterbodies

In [None]:
macro_10km_water_phys = macro_10km_cement_water_gdf.to_crs(calc_crs)
macro_10km_water_area = sum(macro_10km_water_phys.area / 1e6)

In [None]:
l8_10km_water_chip_cnt = macro_10km_water_area / l8_chip_area
s2_10km_water_chip_cnt = macro_10km_water_area / s2_chip_area

In [None]:
print("Deployment Area for 10km Grid with Waterbodies: ", macro_10km_water_area, " sq km")
print("Percent Area of China: ", macro_10km_water_area/china_area * 100, "%")
print("Number of Landsat-8 Chips Required: ", round(l8_10km_water_chip_cnt))
print("Number of Sentinel-2 Chips Required: ", round(s2_10km_water_chip_cnt))

### Macro 10km Grid - without Waterbodies

In [None]:
macro_10km_nowater_phys = macro_10km_cement_nowater_gdf.to_crs(calc_crs)
macro_10km_nowater_area = sum(macro_10km_nowater_phys.area / 1e6)

In [None]:
l8_10km_nowater_chip_cnt = macro_10km_nowater_area / l8_chip_area
s2_10km_nowater_chip_cnt = macro_10km_nowater_area / s2_chip_area

In [None]:
print("Deployment Area for 10km Grid without Waterbodies: ", macro_10km_nowater_area, " sq km")
print("Percent Area of China: ", macro_10km_nowater_area/china_area * 100, "%")
print("Number of Landsat-8 Chips Required: ", round(l8_10km_nowater_chip_cnt))
print("Number of Sentinel-2 Chips Required: ", round(s2_10km_nowater_chip_cnt))

### Cement

#### 10km Grid - with Waterbodies

In [None]:
cement_ext_water_intsct = gpd.sjoin(macro_10km_cement_water_gdf, cement_ext_gdf, how="inner", op="intersects")

In [None]:
cement_water_tpr = len(cement_ext_water_intsct) / (len(cement_ext_gdf)) * 100

In [None]:
print ("Cement Recall, 10km Grid, with Waterbodies: ", cement_water_tpr, "%")
print ("Cement Miss Rate, 10km Grid: ", 100 - cement_water_tpr, "%")

#### 10km Grid - without Waterbodies

In [None]:
cement_ext_nowater_intsct = gpd.sjoin(macro_10km_cement_nowater_gdf, cement_ext_gdf, how="inner", op="intersects")

In [None]:
cement_nowater_tpr = len(cement_ext_nowater_intsct) / (len(cement_ext_gdf)) * 100

In [None]:
print ("Cement Recall, 10km Grid, without Waterbodies: ", cement_nowater_tpr, "%")
print ("Cement Miss Rate, 10km Grid: ", 100 - cement_nowater_tpr, "%")

#### 10km Grid, with Waterbodies, with Thresholds

In [None]:
preds_thresh = np.arange(0, 0.3, 0.001)
area_thresh_cement_water = [sum(macro_10km_water_phys[macro_10km_water_phys.preds >= thresh].area / 1e6) \
               for thresh in preds_thresh]
cnt_thresh = [cement_ext_water_intsct[cement_ext_water_intsct.preds >= thresh].uid.nunique() \
              for thresh in preds_thresh]

In [None]:
cement_survey_water_df = pd.DataFrame({"thresh": preds_thresh,
                                 "area": area_thresh_cement_water,
                                 "cnt": cnt_thresh})
cement_survey_water_df['tpr'] = cement_survey_water_df.cnt / len(cement_ext_gdf) * 100
cement_survey_water_df['fnr'] = 100 - cement_survey_water_df.tpr
cement_survey_water_df['pct_area'] = cement_survey_water_df.area / china_area * 100
cement_survey_water_df['l8_chp_cnt'] = round(cement_survey_water_df.area / l8_chip_area)
cement_survey_water_df['s2_chp_cnt'] = round(cement_survey_water_df.area / s2_chip_area)

#### 10km Grid, without Waterbodies, with Thresholds

In [None]:
area_thresh_cement_nowater = [sum(macro_10km_nowater_phys[macro_10km_nowater_phys.preds >= thresh].area / 1e6) \
               for thresh in preds_thresh]
cnt_thresh = [cement_ext_nowater_intsct[cement_ext_nowater_intsct.preds >= thresh].uid.nunique() \
              for thresh in preds_thresh]

In [None]:
cement_survey_nowater_df = pd.DataFrame({"thresh": preds_thresh,
                                 "area": area_thresh_cement_nowater,
                                 "cnt": cnt_thresh})
cement_survey_nowater_df['tpr'] = cement_survey_nowater_df.cnt / len(cement_ext_gdf) * 100
cement_survey_nowater_df['fnr'] = 100 - cement_survey_nowater_df.tpr
cement_survey_nowater_df['pct_area'] = cement_survey_nowater_df.area / china_area * 100
cement_survey_nowater_df['l8_chp_cnt'] = round(cement_survey_nowater_df.area / l8_chip_area)
cement_survey_nowater_df['s2_chp_cnt'] = round(cement_survey_nowater_df.area / s2_chip_area)

In [None]:
plt.title("Cement: Recall Versus Prediction Threshold") 
plt.xlabel("Prediction Minimum Threshold") 
plt.ylabel("Recall (True Positive Rate)") 
plt.plot(cement_survey_nowater_df.thresh, cement_survey_nowater_df.tpr, 'b-', label='Without Waterbodies')
plt.plot(cement_survey_water_df.thresh, cement_survey_water_df.tpr, 'g--', label='With Waterbodies')
plt.plot(cement_survey_nowater_df.thresh[0], cement_survey_nowater_df.tpr[0], 'b*', label='thresh=0.000')
plt.legend()
plt.show()

In [None]:
plt.title("Cement: Recall Versus Survey Area") 
plt.xlabel("Survey Area (sq km)") 
plt.ylabel("Recall (True Positive Rate)") 
plt.plot(cement_survey_nowater_df.area, cement_survey_nowater_df.tpr, 'b-', label='Without Waterbodies')
plt.plot(cement_survey_water_df.area, cement_survey_water_df.tpr, 'g--', label='With Waterbodies')
plt.plot(cement_survey_nowater_df.area[0], cement_survey_nowater_df.tpr[0], 'b*', label='thresh=0.000')
plt.legend()
plt.show()

In [None]:
plt.title("Cement: Recall Versus No. Landsat-8 Chips") 
plt.xlabel("Count of Landsat-8 Chips") 
plt.ylabel("Recall (True Positive Rate)") 
plt.plot(cement_survey_nowater_df.l8_chp_cnt, cement_survey_nowater_df.tpr, 'b-', label='Without Waterbodies')
plt.plot(cement_survey_water_df.l8_chp_cnt, cement_survey_water_df.tpr, 'g--', label='With Waterbodies')
plt.legend()
plt.show()

In [None]:
plt.title("Cement: Recall Versus No. Sentinel-2 Chips") 
plt.xlabel("Count of Sentinel-2 Chips") 
plt.ylabel("Recall (True Positive Rate)") 
plt.plot(cement_survey_nowater_df.s2_chp_cnt, cement_survey_nowater_df.tpr, 'b-', label='Without Waterbodies')
plt.plot(cement_survey_water_df.s2_chp_cnt, cement_survey_water_df.tpr, 'g--', label='With Waterbodies')
plt.legend()
plt.show()

### Steel

#### 10km Grid - with Waterbodies

In [None]:
steel_ext_water_intsct = gpd.sjoin(macro_10km_steel_water_gdf, steel_ext_gdf, how="inner", op="intersects")

In [None]:
steel_water_tpr = len(steel_ext_water_intsct) / (len(steel_ext_gdf)) * 100

In [None]:
print ("Steel Recall, 10km Grid, with Waterbodies: ", steel_water_tpr, "%")
print ("Steel Miss Rate, 10km Grid: ", 100 - steel_water_tpr, "%")

#### 10km Grid - without Waterbodies

In [None]:
steel_ext_nowater_intsct = gpd.sjoin(macro_10km_steel_nowater_gdf, steel_ext_gdf, how="inner", op="intersects")

In [None]:
steel_nowater_tpr = len(steel_ext_nowater_intsct) / (len(steel_ext_gdf)) * 100

In [None]:
print ("Steel Recall, 10km Grid, without Waterbodies: ", steel_nowater_tpr, "%")
print ("Steel Miss Rate, 10km Grid: ", 100 - steel_nowater_tpr, "%")

#### 10km Grid, with Waterbodies, with Thresholds

In [None]:
cnt_thresh = [steel_ext_water_intsct[steel_ext_water_intsct.preds >= thresh].uid.nunique() \
              for thresh in preds_thresh]

In [None]:
steel_survey_water_df = pd.DataFrame({"thresh": preds_thresh,
                                 "area": area_thresh_cement_water,
                                 "cnt": cnt_thresh})
steel_survey_water_df['tpr'] = steel_survey_water_df.cnt / len(steel_ext_gdf) * 100
steel_survey_water_df['fnr'] = 100 - steel_survey_water_df.tpr
steel_survey_water_df['pct_area'] = steel_survey_water_df.area / china_area * 100
steel_survey_water_df['l8_chp_cnt'] = round(steel_survey_water_df.area / l8_chip_area)
steel_survey_water_df['s2_chp_cnt'] = round(steel_survey_water_df.area / s2_chip_area)

#### 10km Grid, without Waterbodies, with Thresholds

In [None]:
cnt_thresh = [steel_ext_nowater_intsct[steel_ext_nowater_intsct.preds >= thresh].uid.nunique() \
              for thresh in preds_thresh]

In [None]:
steel_survey_nowater_df = pd.DataFrame({"thresh": preds_thresh,
                                 "area": area_thresh_cement_nowater,
                                 "cnt": cnt_thresh})
steel_survey_nowater_df['tpr'] = steel_survey_nowater_df.cnt / len(steel_ext_gdf) * 100
steel_survey_nowater_df['fnr'] = 100 - steel_survey_nowater_df.tpr
steel_survey_nowater_df['pct_area'] = steel_survey_nowater_df.area / china_area * 100
steel_survey_nowater_df['l8_chp_cnt'] = round(steel_survey_nowater_df.area / l8_chip_area)
steel_survey_nowater_df['s2_chp_cnt'] = round(steel_survey_nowater_df.area / s2_chip_area)

In [None]:
plt.title("Steel: Recall Versus Prediction Threshold") 
plt.xlabel("Prediction Minimum Threshold") 
plt.ylabel("Recall (True Positive Rate)") 
plt.plot(steel_survey_nowater_df.thresh, steel_survey_nowater_df.tpr, 'b-', label='Without Waterbodies') 
plt.plot(steel_survey_water_df.thresh, steel_survey_water_df.tpr, 'g--', label='With Waterbodies') 
plt.plot(steel_survey_nowater_df.thresh[0], steel_survey_nowater_df.tpr[0], 'b*', label='thresh=0.000')
plt.legend()
plt.show()

In [None]:
plt.title("Steel: Recall Versus Survey Area") 
plt.xlabel("Survey Area (sq km)") 
plt.ylabel("Recall (True Positive Rate)") 
plt.plot(steel_survey_nowater_df.area, steel_survey_nowater_df.tpr, 'b-', label='Without Waterbodies') 
plt.plot(steel_survey_water_df.area, steel_survey_water_df.tpr, 'g--', label='With Waterbodies') 
plt.plot(steel_survey_nowater_df.area[0], steel_survey_nowater_df.tpr[0], 'b*', label='thresh=0.000')
plt.legend()
plt.show()

In [None]:
plt.title("Steel: Recall Versus No. Landsat-8 Chips") 
plt.xlabel("Count of Landsat-8 Chips") 
plt.ylabel("Recall (True Positive Rate)") 
plt.plot(steel_survey_nowater_df.l8_chp_cnt, steel_survey_nowater_df.tpr, 'b-', label='Without Waterbodies') 
plt.plot(steel_survey_water_df.l8_chp_cnt, steel_survey_water_df.tpr, 'g--', label='With Waterbodies') 
plt.legend()
plt.show()

In [None]:
plt.title("Steel: Recall Versus No. Sentinel-2 Chips") 
plt.xlabel("Count of Sentinel-2 Chips") 
plt.ylabel("Recall (True Positive Rate)") 
plt.plot(steel_survey_nowater_df.s2_chp_cnt, steel_survey_nowater_df.tpr, 'b-', label='Without Waterbodies') 
plt.plot(steel_survey_water_df.s2_chp_cnt, steel_survey_water_df.tpr, 'g--', label='With Waterbodies') 
plt.legend()
plt.show()