In [1]:
%load_ext autoreload
%autoreload 2

import sys, os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
from google.colab import drive
import ee
import geemap.foliumap as geemap

In [2]:
ee.Authenticate()
ee.Initialize(project="215656163750")
drive.mount('/content/drive')

sys.path.append('/content/drive/MyDrive/Colab Notebooks/')
import explore

path = "/content/drive/MyDrive/CAFO_data/Misc_global/shapefiles/"

Mounted at /content/drive


In [3]:
# TURKEY - region boundary coords defined by hand using Google Maps

turkey = {"Name": "Turkey", "Code": "TUR"}
turkey["CRS"] = "EPSG:5636"
turkey["Boundary latlon"] = [(38.675596, 30.745101), (38.693221, 30.607857),\
                             (38.788433, 30.603912), (38.810095, 30.636428),\
                             (38.821603, 30.691116), (38.722434, 30.699930)]
turkey["Boundary lonlat"] = [x[::-1] for x in turkey["Boundary latlon"]]
turkey["Boundary"]= Polygon(turkey["Boundary lonlat"])

# Save the region boundary for later use
gdf = gpd.GeoDataFrame(crs="EPSG:4326", geometry=[turkey["Boundary"]])
turkey["Boundary gdf"] = gdf
gdf.to_file(f"{path}Turkey.shp")

In [4]:
# INDIA (Tamil Nadu)

india = {"Name": "India", "Code": "IND"}
india["CRS"] = "EPSG:7785"

india["Boundary latlon"] = [(11.264466, 78.195514), (11.257338, 78.132436),\
                            (11.278384, 78.094542), (11.359775, 78.122547),\
                            (11.351684, 78.183257), (11.282473, 78.202641)]
india["Boundary lonlat"] = [x[::-1] for x in india["Boundary latlon"]]
india["Boundary"]= Polygon(india["Boundary lonlat"])

# Save the region boundary for later use
gdf = gpd.GeoDataFrame(crs="EPSG:4326", geometry=[india["Boundary"]])
india["Boundary gdf"] = gdf
gdf.to_file(f"{path}India.shp")

In [5]:
# THAILAND

thailand = {"Name": "Thailand", "Code": "THA"}
thailand["CRS"] = "EPSG:24047"

thailand["Boundary latlon"] = [(13.274320, 99.816416), (13.262629, 99.723449),\
                               (13.473833, 99.591679), (13.489666, 99.724536)]
thailand["Boundary lonlat"] = [x[::-1] for x in thailand["Boundary latlon"]]
thailand["Boundary"]= Polygon(thailand["Boundary lonlat"])

# Save the region boundary for later use
gdf = gpd.GeoDataFrame(crs="EPSG:4326", geometry=[thailand["Boundary"]])
thailand["Boundary gdf"] = gdf
gdf.to_file(f"{path}Thailand.shp")

In [6]:
# PERU

peru = {"Name": "Peru", "Code": "PER"}
peru["CRS"] = "EPSG:5387" #UTM 18S

peru["Boundary latlon"] = [(-13.437699, -76.128270), (-13.463783, -76.130057),\
                           (-13.453825, -75.982573), (-13.107994, -76.077131),\
                           (-13.221282, -76.244480), (-13.322838, -76.241936),\
                           (-13.399845, -76.110958), ]
peru["Boundary lonlat"] = [x[::-1] for x in peru["Boundary latlon"]]
peru["Boundary"]= Polygon(peru["Boundary lonlat"])

# Save the region boundary for later use
gdf = gpd.GeoDataFrame(crs="EPSG:4326", geometry=[peru["Boundary"]])
peru["Boundary gdf"] = gdf
gdf.to_file(f"{path}Peru.shp")

In [7]:
min_building_size=800

where = peru

In [8]:
# Obtain a feature collection of buildings > min_building_size within the
# specified boundary

def get_buildings(area, min_building_size, code):
  gdf = gpd.GeoDataFrame(crs="EPSG:4326", geometry=[area["Boundary"]])
  geom = geemap.geopandas_to_ee(gdf[['geometry']])

  buildings_fc = (
      ee.FeatureCollection(f"projects/sat-io/open-datasets/VIDA_COMBINED/{code}")
      .filter(ee.Filter.gt('area_in_meters', min_building_size))
      .filterBounds(geom)
  )

  return buildings_fc, geom

In [9]:
# Combine closely-spaced buildings into a single polygon, and define a box
# around the centroid of each clump. The box should be roughly the same size as
# the Sentinel snippets I eventually use in model training and application;
# they don't get used directly but are useful for visualizing what we're
# working with

def merge_and_make_box(area, buildings_fc):
  buildings = geemap.ee_to_gdf(buildings_fc)

  merged = buildings.to_crs(area["CRS"]).buffer(50).union_all()
  merged = gpd.GeoDataFrame(merged.geoms).set_geometry(0).set_crs(area["CRS"])

  boxes = gpd.GeoDataFrame(merged.centroid.buffer(200).envelope)
  boxes = boxes.rename(columns={0: "geometry"}).set_geometry("geometry").to_crs("EPSG:4326")

  # remove boxes that intersect with boundary edges, as they'd be discarded
  # when the Sentinel images are created, anyway

  boxes = boxes.sjoin(area["Boundary gdf"], how="inner", predicate="within")
  boxes = boxes.drop(columns=["index_right"]).reset_index(drop=True)

  print(f"Went from {len(buildings)} buildings to {len(boxes)} boxes")

  return merged, boxes

In [10]:
buildings_fc, boundary = get_buildings(where, min_building_size, where["Code"])
merged, boxes = merge_and_make_box(where, buildings_fc)

Went from 1334 buildings to 245 boxes


In [11]:
# Visualize the entire area, including its boundary, all the large buildings,
# the merged building polygons, and the boxes

boxes_fc = geemap.geopandas_to_ee(boxes)
merged_fc = geemap.geopandas_to_ee(merged.to_crs("EPSG:4326"))

os.environ["HYBRID"] = 'https://mt1.google.com/vt/lyrs=y&x={x}&y={y}&z={z}'

boundary_viz = {
  'color': 'red',
  'width': 2,
  'fillColor': '00000000'
}

buildings_viz = {
  'color': 'yellow',
  'width': 2,
  'fillColor': '00000000'
}

merged_viz = {
  'color': 'cyan',
  'width': 2,
  'fillColor': '00000000'
}

boxes_viz = {
  'color': 'blue',
  'width': 2,
  'fillColor': '00000000'
}

Map = geemap.Map()
Map.centerObject(buildings_fc.first().geometry(), 13)
Map.add_basemap("HYBRID")
Map.addLayer(boundary.style(**boundary_viz), {}, "Boundary")
Map.addLayer(buildings_fc.style(**buildings_viz), {}, "Buildings")
Map.addLayer(merged_fc.style(**merged_viz), {}, "Merged")
Map.addLayer(boxes_fc.style(**boxes_viz), {}, "Boxes")

Map

In [21]:
# Step through each of the boxes. Use the "reject" option to mark the CAFOs,
# we'll use the returned list to label them afterwards

cafos5 = explore.loop_over_buildings(boxes[200:])

Working on feature 1 of 45


Unnamed: 0,geometry
200,"POLYGON ((-76.05867 -13.45472, -76.05497 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 2 of 45


Unnamed: 0,geometry
201,"POLYGON ((-76.04994 -13.45296, -76.04624 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 3 of 45


Unnamed: 0,geometry
202,"POLYGON ((-76.05484 -13.45241, -76.05115 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 4 of 45


Unnamed: 0,geometry
203,"POLYGON ((-76.05048 -13.44896, -76.04679 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 5 of 45


Unnamed: 0,geometry
204,"POLYGON ((-76.04423 -13.43703, -76.04053 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 6 of 45


Unnamed: 0,geometry
205,"POLYGON ((-76.06036 -13.42912, -76.05667 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 7 of 45


Unnamed: 0,geometry
206,"POLYGON ((-76.04449 -13.42682, -76.04079 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 8 of 45


Unnamed: 0,geometry
207,"POLYGON ((-76.05788 -13.41688, -76.05419 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 9 of 45


Unnamed: 0,geometry
208,"POLYGON ((-76.05648 -13.37587, -76.05278 -13.3..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 10 of 45


Unnamed: 0,geometry
209,"POLYGON ((-76.05245 -13.37087, -76.04875 -13.3..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 11 of 45


Unnamed: 0,geometry
210,"POLYGON ((-76.03219 -13.45344, -76.02849 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 12 of 45


Unnamed: 0,geometry
211,"POLYGON ((-76.04029 -13.4537, -76.0366 -13.453..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 13 of 45


Unnamed: 0,geometry
212,"POLYGON ((-76.04426 -13.45082, -76.04056 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 14 of 45


Unnamed: 0,geometry
213,"POLYGON ((-76.0382 -13.44885, -76.0345 -13.448..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 15 of 45


Unnamed: 0,geometry
214,"POLYGON ((-76.04205 -13.44754, -76.03835 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 16 of 45


Unnamed: 0,geometry
215,"POLYGON ((-76.03877 -13.44578, -76.03507 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 17 of 45


Unnamed: 0,geometry
216,"POLYGON ((-76.03257 -13.43378, -76.02887 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 18 of 45


Unnamed: 0,geometry
217,"POLYGON ((-76.04112 -13.43738, -76.03743 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 19 of 45


Unnamed: 0,geometry
218,"POLYGON ((-76.04024 -13.43381, -76.03654 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 20 of 45


Unnamed: 0,geometry
219,"POLYGON ((-76.0419 -13.43321, -76.0382 -13.433..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 21 of 45


Unnamed: 0,geometry
220,"POLYGON ((-76.04408 -13.42838, -76.04039 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 22 of 45


Unnamed: 0,geometry
221,"POLYGON ((-76.03923 -13.42619, -76.03554 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 23 of 45


Unnamed: 0,geometry
222,"POLYGON ((-76.02326 -13.456, -76.01957 -13.456..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 24 of 45


Unnamed: 0,geometry
223,"POLYGON ((-76.01197 -13.45442, -76.00827 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 25 of 45


Unnamed: 0,geometry
224,"POLYGON ((-76.02493 -13.45366, -76.02124 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 26 of 45


Unnamed: 0,geometry
225,"POLYGON ((-76.02617 -13.45207, -76.02248 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 27 of 45


Unnamed: 0,geometry
226,"POLYGON ((-76.02152 -13.45114, -76.01783 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 28 of 45


Unnamed: 0,geometry
227,"POLYGON ((-76.02744 -13.45065, -76.02375 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 29 of 45


Unnamed: 0,geometry
228,"POLYGON ((-76.0228 -13.44443, -76.01911 -13.44..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 30 of 45


Unnamed: 0,geometry
229,"POLYGON ((-76.02861 -13.44278, -76.02492 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 31 of 45


Unnamed: 0,geometry
230,"POLYGON ((-76.02237 -13.44271, -76.01868 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 32 of 45


Unnamed: 0,geometry
231,"POLYGON ((-76.01995 -13.43704, -76.01625 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 33 of 45


Unnamed: 0,geometry
232,"POLYGON ((-76.00431 -13.43387, -76.00062 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 34 of 45


Unnamed: 0,geometry
233,"POLYGON ((-76.02094 -13.43194, -76.01725 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 35 of 45


Unnamed: 0,geometry
234,"POLYGON ((-76.01697 -13.43202, -76.01327 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 36 of 45


Unnamed: 0,geometry
235,"POLYGON ((-76.01563 -13.42963, -76.01193 -13.4..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 37 of 45


Unnamed: 0,geometry
236,"POLYGON ((-76.02045 -13.4283, -76.01675 -13.42..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 38 of 45


Unnamed: 0,geometry
237,"POLYGON ((-76.01788 -13.39788, -76.01419 -13.3..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 39 of 45


Unnamed: 0,geometry
238,"POLYGON ((-76.02126 -13.3977, -76.01756 -13.39..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 40 of 45


Unnamed: 0,geometry
239,"POLYGON ((-76.01699 -13.39399, -76.01329 -13.3..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 41 of 45


Unnamed: 0,geometry
240,"POLYGON ((-76.02354 -13.39418, -76.01985 -13.3..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 42 of 45


Unnamed: 0,geometry
241,"POLYGON ((-76.02162 -13.3936, -76.01792 -13.39..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 43 of 45


Unnamed: 0,geometry
242,"POLYGON ((-76.0115 -13.38947, -76.00781 -13.38..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 44 of 45


Unnamed: 0,geometry
243,"POLYGON ((-76.01143 -13.38608, -76.00774 -13.3..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 45 of 45


Unnamed: 0,geometry
244,"POLYGON ((-76.01052 -13.4035, -76.00683 -13.40..."


Enter reject to reject, exit to exit, or any key to continue  reject


In [23]:

cafos1 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
cafos2 = [50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 72, 73, 76, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 89, 91, 92, 93, 94, 95, 96, 97, 98, 99]
cafos3 = [100, 101, 102, 103, 107, 109, 110, 111, 114, 115, 116, 117, 118, 119, 124, 126, 128, 129, 131, 132, 133, 134, 135, 136, 137, 138, 141, 142, 144, 146, 149]
cafos4 = [150, 151, 152, 155, 156, 157, 158, 159, 160, 161, 170, 172, 173, 174, 175, 177, 178, 179, 180, 181, 185, 186, 190, 191, 192, 193, 194, 199]
cafos5 = [200, 201, 202, 203, 204, 205, 206, 207, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 227, 228, 229, 230, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244]
cafos = cafos1 + cafos2 + cafos3 + cafos4 + cafos5

In [24]:
# Create a df where each row contains the centroid of one of the above boxes,
# is labelled as Unknown CAFO or Non-farm, and has the same columns as all the
# training datasets (Mexico, Iowa, etc.)

# This df doesn't identify any particular building as the main or central one
# in any of the boxes, or contain info about building sizes etc. It could be
# made to, but for now it does not.

def make_final_df(area, boxes, cafos):
  candidates = boxes.copy()

  centroids = candidates.to_crs(area["CRS"]).geometry.centroid.to_crs("EPSG:4326")
  candidates.loc[:, "geometry"] = centroids

  candidates.loc[candidates.index.isin(cafos), 'Farm type'] = "Unknown CAFO"
  candidates.loc[~candidates.index.isin(cafos), 'Farm type'] = "Non-farm"
  coldict = {"Dataset name": area["Name"], "Parent coords": None,\
             "Number of animals": np.nan, "Area (sq m)": np.nan,\
             "Length (m)": np.nan, "Aspect ratio": np.nan}
  for col, val in coldict.items():
    candidates.loc[:, col] = val

  candidates = explore.re_order(candidates)
  print(f'Saving {len(candidates[candidates["Farm type"] == "Unknown CAFO"])} farm coords')
  print(f'Saving {len(candidates[candidates["Farm type"] == "Non-farm"])} non-farm coords')

  path = "/content/drive/MyDrive/CAFO_data/forTraining/interim_files/"
  candidates.to_pickle(f'{path}{area["Name"]}_bldgs.pkl')

In [25]:
make_final_df(where, boxes, cafos)

Saving 194 farm coords
Saving 51 non-farm coords
