# Define Additional Deployment Region Centered on Approximate Plant Locations

There are a number of plants with approximate locations. Define deployment regions centered around these plants.

## Import Libraries

In [None]:
import os
import pandas as pd
import geopandas as gpd

## Define input and output files

* Input: Cement and steel plants (v4.1) with approximate locations in China (csv)
* Output: Polygons centered on approximate locations for cement and steel plants for use in model deployment

In [None]:
cement_input_csv = '../../resources/asset-subsets-v4p1/cement_approximate_china_v4.1.csv'
steel_input_csv = '../../resources/asset-subsets-v4p1/steel_approximate_china_v4.1.csv'

In [None]:
cement_output_gjson = "../../resources/macro-loc-model-deployment/cement_approximate_china_v4.1.geojson"
steel_output_gjson = "../../resources/macro-loc-model-deployment/steel_approximate_china_v4.1.geojson"

## Define buffer sizes, and CRS for calculations

* `buffer_size`: 0.5*length of square region centered on steel or cement approximate location
* `calc_crs`: coordinate system in m to use for buffer calculations

In [None]:
buffer_size = 12500 # in m
calc_crs = "EPSG:3395"

## Create cement plant geojson file

In [None]:
cement_df = pd.read_csv(cement_input_csv, index_col=False)

### Statistics and quality checks

In [None]:
# Number of plants
tot_cnt = len(cement_df)
print("Count of cement plants: ", tot_cnt)

# Number of plants with null positions
null_cnt = sum(cement_df['latitude'].isnull())
print("Count of cement plants will null position values: ", null_cnt)

In [None]:
# Limit to plants with approximate lat/long
cement_df = cement_df[~cement_df['latitude'].isnull()]

### Create GeoDataFrame (define geometry and crs)

In [None]:
cement_gdf = gpd.GeoDataFrame(cement_df.uid,
                              geometry=gpd.points_from_xy(cement_df.longitude, 
                                                          cement_df.latitude),
                              crs="EPSG:4326")

#### Convert to EPSG:3395 and create buffer

In [None]:
cement_gdf = cement_gdf.to_crs(calc_crs)

In [None]:
cement_gdf = gpd.GeoDataFrame(geometry=cement_gdf.buffer(buffer_size).envelope,
                              crs=calc_crs)

### Write GeoJson file

In [None]:
cement_gdf = cement_gdf.to_crs("EPSG:4326")
cement_gdf.to_file(cement_output_gjson, driver='GeoJSON')

## Create steel plant geojson file

In [None]:
steel_df = pd.read_csv(steel_input_csv, index_col=False)

### Statistics and quality checks

In [None]:
# Number of plants
tot_cnt = len(steel_df)
print("Count of steel plants: ", tot_cnt)

# Number of plants with null positions
null_cnt = sum(steel_df['latitude'].isnull())
print("Count of steel plants will null position values: ", null_cnt)

In [None]:
# Limit to plants with approximate lat/long
steel_df = steel_df[~steel_df['latitude'].isnull()]

### Create GeoDataFrame (define geometry and crs)

In [None]:
steel_gdf = gpd.GeoDataFrame(steel_df.uid,
                              geometry=gpd.points_from_xy(steel_df.longitude, 
                                                          steel_df.latitude),
                              crs="EPSG:4326")

#### Convert to EPSG:3395 and create buffer

In [None]:
steel_gdf = steel_gdf.to_crs(calc_crs)

In [None]:
steel_gdf = gpd.GeoDataFrame(geometry=steel_gdf.buffer(buffer_size).envelope,
                              crs=calc_crs)

### Write GeoJson file

In [None]:
steel_gdf = steel_gdf.to_crs("EPSG:4326")
steel_gdf.to_file(steel_output_gjson, driver='GeoJSON')