# Create Geometries for Landcover Classes, for Sentinel-2 Chips

This notebook creates 3.0 x 3.0 sq km landcover class polygons offset from the cement and steel plants for use as negative examples for the S2 model training. Note that these differ from the chips created for the TIR model: we use a larger offset size from the cement plants since the S2 chips are 3 km x 3 km.

We create 3 km x 3 km geometries for cement and steel too.

## Import libraries

In [None]:
import geopandas as gpd
from geopandas import GeoDataFrame
import pandas as pd
from shapely.geometry import Point,Polygon, LineString
import os, sys
import matplotlib.pyplot as plt
%matplotlib inline
import fiona
import numpy as np
import time
import folium

## Define buffer and offset sizes

In [None]:
# buffer_size x 2 = length of chips centered on steel, cement, or landcover
buffer_size = 1500 # in m

# offset_size = distance from cement or steel plants to create landcover chips from
offset_size = 7500 # in m

## Define input and output files

In [None]:
# Paths to input cement and steel data sets (stored locally)
cempath = r'../../resources/'
cemfile = r'cement_dataset_v4.xlsx'

steelpath = r'../../resources/'
steelfile = 'steel_dataset_v4.xlsx'

# Paths to output data sets (stored locally)
cemout = 'cement_steel_land_geoms/cement_datasetv4_UTM_CHINA_exactlocPOLYS_3000m.geojson'
steelout = 'cement_steel_land_geoms/steel_datasetv4_UTM_CHINA_exactlocPOLYS_3000m.geojson'
landout = 'cement_steel_land_geoms/landcover_datasetv4_UTM_CHINA_exactlocPOLYS_3000m.geojson'

## Define function to create cement and steel polygons

In [None]:
def create_polys(path, filename, planttype, buffer_size=700):
    
    #open the file
    df = pd.read_excel(os.path.join(path,filename))

    #make sure there's no NaN location
    df = df[df['latitude'].notnull()]

    #define the geometry 
    geometry = [Point(xy) for xy in zip(df.longitude, df.latitude)] #df.x, df.y

    #Create a geodataframe
    crs = 'EPSG:4326' #http://www.spatialreference.org/ref/epsg/2263/
    geo_df = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
    
    #Convert crs to UTM
    geo_df_UTM = geo_df.to_crs('EPSG:3395')
    print("CRS = ", geo_df_UTM.crs)
    
    #create an AOI box
    polygon_buffer= geo_df_UTM.buffer(buffer_size).envelope
    print("Buffer size = ", buffer_size, " m")

    #Replace point data with polygon data
    geo_df_UTM['geometry'] = polygon_buffer
    geo_df_UTM['plantID']=np.arange(len(geo_df_UTM))
    
    '''Only include China
    '''
    
    df_china = geo_df_UTM[geo_df_UTM.iso3=='CHN']
    df_china = df_china[df_china.accuracy=='Exact']
    if planttype == 'cement':
        print('cement plant')
        #for cement only: get integrated plants only
        df_china= df_china[df_china.plant_type == 'Integrated']
    else:
        print('steel plant')
    
    return df_china

## Define function to create landcover polygons

In [None]:
def get_lc_centrepoints(path, filename, buffer_size=700, offset_size=3500):
    
    '''Creates landcover samples around cement China plants
    '''
    
    #open the file
    df = pd.read_excel(os.path.join(path, filename))

    #make sure there's no NaN location
    df = df[df['latitude'].notnull()]

    #define the geometry 
    geometry = [Point(xy) for xy in zip(df.longitude, df.latitude)] #df.x, df.y
    
    #Create a geodataframe
    crs = 'EPSG:4326' #http://www.spatialreference.org/ref/epsg/2263/
    geo_df = gpd.GeoDataFrame(df, crs=crs, geometry=geometry)
    
    #Convert crs to UTM
    geo_df_UTM = geo_df.to_crs('EPSG:3395')
    print('CRS: ', geo_df_UTM.crs)
    
    #Limit to China
    df_china = geo_df_UTM[geo_df_UTM.iso3=='CHN']
    df_china = df_china[df_china.accuracy=='Exact']
    df_china= df_china[df_china.plant_type == 'Integrated']

    #create a HUGE buffer to sample landcover samples
    gdf_buffer=df_china.buffer(offset_size).envelope
    
    '''Intersect large squares so they do not interact with each other
    '''
    # Note - returned no rows initially; commented out unary_union
    
    gdf_union = gpd.GeoDataFrame()
    
    #Union the polygons so any that overlap become one
    gdf_union['geometry'] = gdf_buffer#.unary_union
    gdf_union.crs='EPSG:3395'
    
    '''Get the coordinates of the large polygons around which
    landcover samples will be generated
    '''
    
    #get the number of nodes in each polygon
    point_list = []
    IDs = []
    for index, row in gdf_union.iterrows():
        coords = list(row['geometry'].exterior.coords)        
        #create a point for each coordinate
        for item in coords:
            points = Point(item)
            point_list.append(points)
            IDs.append(index)
    
    gdf_points = gpd.GeoDataFrame()
    gdf_points['geometry'] = point_list
    
    #define the coordinates for the points
    gdf_points.crs = 'EPSG:3395'
    
    
    ''' Create the buffers around each landcover polygon
    '''
    
    polygon_buffer= gdf_points.buffer(buffer_size).envelope
    gdf_polys = gpd.GeoDataFrame()
    gdf_polys['id'] = [str(i).zfill(4) for i in range(1,len(polygon_buffer))]
    gdf_polys['geometry'] = polygon_buffer
    gdf_polys.crs = 'EPSG:3395'
    print("Buffer size: ", buffer_size, " m")
    print("landcover")

    return gdf_polys

## Create polygons and write to geojson

### STEEL

In [None]:
# Create polygons
steel = create_polys(steelpath, steelfile, 'steel', buffer_size=buffer_size)

In [None]:
# Write to GeoJson
steel.to_file(os.path.join(steelpath, steelout), driver='GeoJSON')

### Cement

In [None]:
# Create polygons
cem = create_polys(cempath, cemfile, 'cement', buffer_size=buffer_size)

In [None]:
# Write to GeoJson
cem.to_file(os.path.join(cempath, cemout), driver='GeoJSON')

### Landcover

In [None]:
# Create polygons
lc = get_lc_centrepoints(cempath, cemfile, buffer_size=buffer_size, offset_size=offset_size)

In [None]:
# Exclude landcover polygons that intersect w/ cement or steel
lc_intrsct_cem = gpd.sjoin(lc, cem, how='inner', op='intersects')
lc = lc[~lc.id.isin(lc_intrsct_cem['id'])]

lc_intrsct_steel = gpd.sjoin(lc, steel, how='inner', op='intersects')
lc = lc[~lc.id.isin(lc_intrsct_steel['id'])]

In [None]:
# Write to GeoJson
lc.to_file(os.path.join(cempath, landout), driver='GeoJSON')

## Visualize polygons

In [None]:
# Plot cement polygons
style_function = lambda x: {'fillColor': '#f003fc', 'color': '#f003fc'}
cement_gpd = cem.geometry.to_crs('OGC:CRS84')
cement_polys = folium.features.GeoJson(cement_gpd.to_json(), style_function=style_function)
m = folium.Map(location=[cement_gpd.geometry.centroid.y.mean(), 
                         cement_gpd.geometry.centroid.x.mean()],
               zoom_start=4)

# Plot steel polygons
style_function = lambda x: {'fillColor': '#3734eb', 'color': '#3734eb'}
steel_gpd = steel.geometry.to_crs('OGC:CRS84')
steel_polys = folium.features.GeoJson(steel_gpd.to_json(), style_function=style_function)

# Plot landcover polygons
style_function = lambda x: {'fillColor': '#68eb34', 'color': '#68eb34'}
land_gpd = lc.geometry.to_crs('OGC:CRS84')
land_polys = folium.features.GeoJson(land_gpd.to_json(), style_function=style_function)

m.add_children(cement_polys)
m.add_children(steel_polys)
m.add_children(land_polys)
m