# Create Annotations for Sentinel-2 RGB Chips

This notebook creates a cvs file with annotations for the cement and steel plant Sentinel-2 chips.

* Limited to plants with exact locations in China
* Output columns:
    * uid: unique plant identifyer
    * filename: GeoTiff chip for the plant
    * long_min: minimum longitude at 1-km buffer around the plant
    * lat_min: minimum latitude at 1-km buffer around the plant
    * long_max: maximum longitude at 1-km buffer around the plant
    * lat_max: maximum latitude at 1-km buffer around the plant
    * class: plant type (steel = 1, cement = 2)

## Import libraries

In [None]:
import geopandas as gpd
from geopandas import GeoDataFrame
import pandas as pd
from shapely.geometry import Point,Polygon, LineString
import os, sys
import matplotlib.pyplot as plt
%matplotlib inline
import fiona
import numpy as np
import time
import folium

## Define buffer size

In [None]:
# buffer_size = distance from cement or steel plants to compute min/max long/lat
buffer_size = 1000 # in m

## Define input and output files

In [None]:
# Paths to input cement and steel data sets (stored locally)
path = r'../../resources/'
cemfile = r'cement_dataset_v4.xlsx'
steelfile = r'steel_dataset_v4.xlsx'

# Path to output csv file (stored locally)
csvout = r'../../resources/cement_steel_chip_annotations/cement_steel_S2_chip_annotations_v4_CHINA.csv'

## Define function to return min/max long/lat

In [None]:
def get_long_lat_bounds(path, filename, buffer_size=1000, cement=False):

    df = pd.read_excel(os.path.join(path, filename))
    
    # Limit to China, exact locations
    df = df[ (df.iso3 == 'CHN') & (df.accuracy == 'Exact') ]
    # If cement, limit to integrated
    if cement:
        df = df[df.plant_type == 'Integrated']
    
    # Define the geometry 
    geometry = [Point(xy) for xy in zip(df.longitude, df.latitude)] #df.x, df.y

    # Create a geodataframe
    geo_df = gpd.GeoDataFrame(df, crs='EPSG:4326', geometry=geometry)
    
    # Convert crs to UTM to preserve uniform area
    geo_df_UTM = geo_df.to_crs('EPSG:3395')

    # Create geometry with desired buffer around the plants
    gdf_buffer = geo_df_UTM.buffer(buffer_size).envelope
    geo_df_buffer = geo_df_UTM
    geo_df_buffer['geometry'] = gdf_buffer

    # Convert buffer to long/lat
    geo_df_buffer = geo_df_buffer.to_crs('EPSG:4326')

    # Get min/max long/lat
    uid = []
    long_min = []
    lat_min = []
    long_max = []
    lat_max = []
    for index, row in geo_df_buffer.iterrows():
        ll_bounds = list(row['geometry'].bounds)
        long_min.append(ll_bounds[0])
        lat_min.append(ll_bounds[1])
        long_max.append(ll_bounds[2])
        lat_max.append(ll_bounds[3])
        uid.append(row['uid'])
    
    # Return data frame
    df_ret = pd.DataFrame(list(zip(uid, long_min, lat_min, long_max, lat_max)),
                          columns =['uid', 'long_min', 'lat_min,', 'long_max', 'lat_max'])  
    return(df_ret)

## Create annotations and write to csv

### STEEL

In [None]:
# Get min/max long/lat
steel_df = get_long_lat_bounds(path, steelfile, buffer_size=1000, cement=False)

# Define chip name
# (See 03-S2-RGB-steel-chip-creation.ipynb)
steel_df['filename'] = steel_df['uid'] + '_steel_v4_S2_RGB_20200601_20200731.tif'

# Define class (steel = 1)
steel_df['class'] = 1
steel_df

### Cement

In [None]:
# Get min/max long/lat
cement_df = get_long_lat_bounds(path, cemfile, buffer_size=1000, cement=True)

# Define chip name
# (See 01-S2-RGB-cement-chip-creation.ipynb)
cement_df['filename'] = cement_df['uid'] + '_cement_v4_S2_RGB_20200601_20200731.tif'

# Define class (cement = 2)
cement_df['class'] = 2
cement_df

### Concatenate and save

In [None]:
# Merge steel and cement
plant_df = cement_df.append(steel_df)

In [None]:
# Write to csv
plant_df.to_csv(csvout)