In [1]:
%load_ext autoreload
%autoreload 2

import sys, os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
from google.colab import drive
import ee
import geemap.foliumap as geemap

In [2]:
ee.Authenticate()
ee.Initialize(project="215656163750")
drive.mount('/content/drive')

sys.path.append('/content/drive/MyDrive/Colab Notebooks/')
import explore

path = "/content/drive/MyDrive/CAFO_data/Misc_global/shapefiles/"

Mounted at /content/drive


In [3]:
# TURKEY - region boundary coords defined by hand using Google Maps

turkey = {"Name": "Turkey", "Code": "TUR"}
turkey["CRS"] = "EPSG:5636"
turkey["Boundary latlon"] = [(38.675596, 30.745101), (38.693221, 30.607857),\
                             (38.788433, 30.603912), (38.810095, 30.636428),\
                             (38.821603, 30.691116), (38.722434, 30.699930)]
turkey["Boundary lonlat"] = [x[::-1] for x in turkey["Boundary latlon"]]
turkey["Boundary"]= Polygon(turkey["Boundary lonlat"])

# Save the region boundary for later use
gdf = gpd.GeoDataFrame(crs="EPSG:4326", geometry=[turkey["Boundary"]])
turkey["Boundary gdf"] = gdf
gdf.to_file(f"{path}Turkey.shp")

In [4]:
# INDIA (Tamil Nadu)

india = {"Name": "India", "Code": "IND"}
india["CRS"] = "EPSG:7785"

india["Boundary latlon"] = [(11.264466, 78.195514), (11.257338, 78.132436),\
                            (11.278384, 78.094542), (11.359775, 78.122547),\
                            (11.351684, 78.183257), (11.282473, 78.202641)]
india["Boundary lonlat"] = [x[::-1] for x in india["Boundary latlon"]]
india["Boundary"]= Polygon(india["Boundary lonlat"])

# Save the region boundary for later use
gdf = gpd.GeoDataFrame(crs="EPSG:4326", geometry=[india["Boundary"]])
india["Boundary gdf"] = gdf
gdf.to_file(f"{path}India.shp")

In [5]:
min_building_size=800

where = india

In [6]:
# Obtain a feature collection of buildings > min_building_size within the
# specified boundary

def get_buildings(area, min_building_size, code):
  gdf = gpd.GeoDataFrame(crs="EPSG:4326", geometry=[area["Boundary"]])
  geom = geemap.geopandas_to_ee(gdf[['geometry']])

  buildings_fc = (
      ee.FeatureCollection(f"projects/sat-io/open-datasets/VIDA_COMBINED/{code}")
      .filter(ee.Filter.gt('area_in_meters', min_building_size))
      .filterBounds(geom)
  )

  return buildings_fc, geom

In [7]:
# Combine closely-spaced buildings into a single polygon, and define a box
# around the centroid of each clump. The box should be roughly the same size as
# the Sentinel snippets I eventually use in model training and application;
# they don't get used directly but are useful for visualizing what we're
# working with

def merge_and_make_box(area, buildings_fc):
  buildings = geemap.ee_to_gdf(buildings_fc)

  merged = buildings.to_crs(area["CRS"]).buffer(50).union_all()
  merged = gpd.GeoDataFrame(merged.geoms).set_geometry(0).set_crs(area["CRS"])

  boxes = gpd.GeoDataFrame(merged.centroid.buffer(200).envelope)
  boxes = boxes.rename(columns={0: "geometry"}).set_geometry("geometry").to_crs("EPSG:4326")

  # remove boxes that intersect with boundary edges, as they'd be discarded
  # when the Sentinel images are created, anyway

  boxes = boxes.sjoin(area["Boundary gdf"], how="inner", predicate="within")
  boxes = boxes.drop(columns=["index_right"]).reset_index(drop=True)

  print(f"Went from {len(buildings)} buildings to {len(boxes)} boxes")

  return merged, boxes

In [8]:
buildings_fc, boundary = get_buildings(where, min_building_size, where["Code"])
merged, boxes = merge_and_make_box(where, buildings_fc)

Went from 568 buildings to 146 boxes


In [9]:
# Visualize the entire area, including its boundary, all the large buildings,
# the merged building polygons, and the boxes

boxes_fc = geemap.geopandas_to_ee(boxes)
merged_fc = geemap.geopandas_to_ee(merged.to_crs("EPSG:4326"))

os.environ["HYBRID"] = 'https://mt1.google.com/vt/lyrs=y&x={x}&y={y}&z={z}'

boundary_viz = {
  'color': 'red',
  'width': 2,
  'fillColor': '00000000'
}

buildings_viz = {
  'color': 'yellow',
  'width': 2,
  'fillColor': '00000000'
}

merged_viz = {
  'color': 'cyan',
  'width': 2,
  'fillColor': '00000000'
}

boxes_viz = {
  'color': 'blue',
  'width': 2,
  'fillColor': '00000000'
}

Map = geemap.Map()
Map.centerObject(buildings_fc.first().geometry(), 13)
Map.add_basemap("HYBRID")
Map.addLayer(boundary.style(**boundary_viz), {}, "Boundary")
Map.addLayer(buildings_fc.style(**buildings_viz), {}, "Buildings")
Map.addLayer(merged_fc.style(**merged_viz), {}, "Merged")
Map.addLayer(boxes_fc.style(**boxes_viz), {}, "Boxes")

Map

In [10]:
# Step through each of the boxes. Use the "reject" option to mark the CAFOs,
# we'll use the returned list to label them afterwards

#cafos1 = explore.loop_over_buildings(boxes[:50])
#cafos2 = explore.loop_over_buildings(boxes[50:100])
cafos3 = explore.loop_over_buildings(boxes[100:])

Working on feature 1 of 46


Unnamed: 0,geometry
100,"POLYGON ((78.17054 11.31399, 78.17421 11.31399..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 2 of 46


Unnamed: 0,geometry
101,"POLYGON ((78.16748 11.31678, 78.17114 11.31678..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 3 of 46


Unnamed: 0,geometry
102,"POLYGON ((78.16908 11.3192, 78.17274 11.3192, ..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 4 of 46


Unnamed: 0,geometry
103,"POLYGON ((78.16671 11.32608, 78.17038 11.32608..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 5 of 46


Unnamed: 0,geometry
104,"POLYGON ((78.16378 11.3277, 78.16744 11.3277, ..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 6 of 46


Unnamed: 0,geometry
105,"POLYGON ((78.16013 11.332, 78.16379 11.33201, ..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 7 of 46


Unnamed: 0,geometry
106,"POLYGON ((78.16222 11.33242, 78.16589 11.33243..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 8 of 46


Unnamed: 0,geometry
107,"POLYGON ((78.16736 11.34224, 78.17103 11.34224..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 9 of 46


Unnamed: 0,geometry
108,"POLYGON ((78.17037 11.34403, 78.17403 11.34403..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 10 of 46


Unnamed: 0,geometry
109,"POLYGON ((78.16765 11.34443, 78.17131 11.34444..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 11 of 46


Unnamed: 0,geometry
110,"POLYGON ((78.16136 11.34754, 78.16502 11.34754..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 12 of 46


Unnamed: 0,geometry
111,"POLYGON ((78.17452 11.26725, 78.17818 11.26726..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 13 of 46


Unnamed: 0,geometry
112,"POLYGON ((78.17931 11.2758, 78.18298 11.2758, ..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 14 of 46


Unnamed: 0,geometry
113,"POLYGON ((78.17738 11.27923, 78.18104 11.27924..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 15 of 46


Unnamed: 0,geometry
114,"POLYGON ((78.17078 11.28115, 78.17444 11.28116..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 16 of 46


Unnamed: 0,geometry
115,"POLYGON ((78.17551 11.28123, 78.17918 11.28123..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 17 of 46


Unnamed: 0,geometry
116,"POLYGON ((78.17262 11.28424, 78.17629 11.28424..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 18 of 46


Unnamed: 0,geometry
117,"POLYGON ((78.17938 11.28465, 78.18304 11.28465..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 19 of 46


Unnamed: 0,geometry
118,"POLYGON ((78.17475 11.29323, 78.17841 11.29323..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 20 of 46


Unnamed: 0,geometry
119,"POLYGON ((78.17596 11.29497, 78.17962 11.29498..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 21 of 46


Unnamed: 0,geometry
120,"POLYGON ((78.17736 11.29681, 78.18102 11.29681..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 22 of 46


Unnamed: 0,geometry
121,"POLYGON ((78.17461 11.29978, 78.17827 11.29978..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 23 of 46


Unnamed: 0,geometry
122,"POLYGON ((78.17748 11.30701, 78.18114 11.30701..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 24 of 46


Unnamed: 0,geometry
123,"POLYGON ((78.17292 11.30716, 78.17659 11.30716..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 25 of 46


Unnamed: 0,geometry
124,"POLYGON ((78.17202 11.3116, 78.17568 11.3116, ..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 26 of 46


Unnamed: 0,geometry
125,"POLYGON ((78.17397 11.32377, 78.17763 11.32377..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 27 of 46


Unnamed: 0,geometry
126,"POLYGON ((78.17497 11.32655, 78.17864 11.32655..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 28 of 46


Unnamed: 0,geometry
127,"POLYGON ((78.17337 11.3389, 78.17703 11.3389, ..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 29 of 46


Unnamed: 0,geometry
128,"POLYGON ((78.17651 11.34257, 78.18017 11.34257..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 30 of 46


Unnamed: 0,geometry
129,"POLYGON ((78.17909 11.34784, 78.18276 11.34785..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 31 of 46


Unnamed: 0,geometry
130,"POLYGON ((78.18722 11.26531, 78.19088 11.26531..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 32 of 46


Unnamed: 0,geometry
131,"POLYGON ((78.19076 11.26577, 78.19442 11.26578..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 33 of 46


Unnamed: 0,geometry
132,"POLYGON ((78.1875 11.26692, 78.19117 11.26692,..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 34 of 46


Unnamed: 0,geometry
133,"POLYGON ((78.18071 11.27018, 78.18437 11.27018..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 35 of 46


Unnamed: 0,geometry
134,"POLYGON ((78.18386 11.27063, 78.18753 11.27064..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 36 of 46


Unnamed: 0,geometry
135,"POLYGON ((78.18428 11.27413, 78.18794 11.27414..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 37 of 46


Unnamed: 0,geometry
136,"POLYGON ((78.1854 11.2952, 78.18906 11.2952, 7..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 38 of 46


Unnamed: 0,geometry
137,"POLYGON ((78.18785 11.30202, 78.19151 11.30202..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 39 of 46


Unnamed: 0,geometry
138,"POLYGON ((78.19097 11.30268, 78.19463 11.30268..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 40 of 46


Unnamed: 0,geometry
139,"POLYGON ((78.18134 11.3051, 78.18501 11.3051, ..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 41 of 46


Unnamed: 0,geometry
140,"POLYGON ((78.18181 11.30789, 78.18547 11.30789..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 42 of 46


Unnamed: 0,geometry
141,"POLYGON ((78.18381 11.30991, 78.18747 11.30991..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 43 of 46


Unnamed: 0,geometry
142,"POLYGON ((78.18081 11.3156, 78.18447 11.3156, ..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 44 of 46


Unnamed: 0,geometry
143,"POLYGON ((78.18768 11.31608, 78.19135 11.31609..."


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 45 of 46


Unnamed: 0,geometry
144,"POLYGON ((78.18533 11.31733, 78.189 11.31733, ..."


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 46 of 46


Unnamed: 0,geometry
145,"POLYGON ((78.18141 11.34154, 78.18508 11.34154..."


Enter reject to reject, exit to exit, or any key to continue  


In [12]:
cafos = cafos1 + cafos2 + cafos3

In [14]:
# Create a df where each row contains the centroid of one of the above boxes,
# is labelled as Unknown CAFO or Non-farm, and has the same columns as all the
# training datasets (Mexico, Iowa, etc.)

# This df doesn't identify any particular building as the main or central one
# in any of the boxes, or contain info about building sizes etc. It could be
# made to, but for now it does not.

def make_final_df(area, boxes, cafos):
  candidates = boxes.copy()

  centroids = candidates.to_crs(area["CRS"]).geometry.centroid.to_crs("EPSG:4326")
  candidates.loc[:, "geometry"] = centroids

  candidates.loc[candidates.index.isin(cafos), 'Farm type'] = "Unknown CAFO"
  candidates.loc[~candidates.index.isin(cafos), 'Farm type'] = "Non-farm"
  coldict = {"Dataset name": area["Name"], "Parent coords": None,\
             "Number of animals": np.nan, "Area (sq m)": np.nan,\
             "Length (m)": np.nan, "Aspect ratio": np.nan}
  for col, val in coldict.items():
    candidates.loc[:, col] = val

  candidates = explore.re_order(candidates)
  print(f'Saving {len(candidates[candidates["Farm type"] == "Unknown CAFO"])} farm coords')
  print(f'Saving {len(candidates[candidates["Farm type"] == "Non-farm"])} non-farm coords')

  path = "/content/drive/MyDrive/CAFO_data/forTraining/interim_files/"
  candidates.to_pickle(f'{path}{area["Name"]}_bldgs.pkl')

In [15]:
make_final_df(where, boxes, cafos)

Saving 86 farm coords
Saving 60 non-farm coords
