In [1]:
%load_ext autoreload
%autoreload 2

import sys, os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
from google.colab import drive
import ee
import geemap.foliumap as geemap

In [2]:
ee.Authenticate()
ee.Initialize(project="215656163750")
drive.mount('/content/drive')

sys.path.append('/content/drive/MyDrive/Colab Notebooks/')
import explore

Mounted at /content/drive


In [21]:
def get_relevant_boundaries(state_boundary, subareas, old_col, new_col, path, fname):
  """
  For application regions that aren't whole countries, get the boundaries of
  the lower-level admin units within the district of interest.
  """

  gdf = gpd.sjoin(subareas, state_boundary, how="inner",\
                                    predicate="intersects", lsuffix=None)
  # Standardize the admin region name, drop extraneous columns
  gdf.rename(columns={old_col:new_col}, inplace=True)
  gdf = gdf[[new_col, "geometry"]].reset_index(drop=True)

  # Save boundary file for use by getSentinel
  print(f"Saving boundary file to {path+fname}")
  gdf.to_file(path+fname, driver="GeoJSON")

  return gdf


In [14]:
# India - Gujarat

path = "/content/drive/MyDrive/CAFO_data/India/"

gujarat = {"Name": "Gujarat", "Code": "IND"}
gujarat["CRS"] = "EPSG:7761"

# Get Gujarat state boundary
india_adm1 = gpd.read_file(f"{path}geoBoundaries-IND-ADM1.geojson")
state_boundary = india_adm1[india_adm1["shapeISO"] == "IN-GJ"]

# Get ADM3 boundaries within Gujarat
india_adm3 = gpd.read_file(f"{path}geoBoundaries-IND-ADM3.geojson")
gujarat["Boundary gdf"] = get_relevant_boundaries(state_boundary, india_adm3,\
                                                  "shapeName", "ADM3", path,\
                                                  "Gujarat_ADM3.geojson")
gujarat["Region col"] = "ADM3"

In [51]:
# Sudan - entire country

path = "/content/drive/MyDrive/CAFO_data/Misc_global/shapefiles/"
sudan = {"Name": "Sudan", "Code": "SDN"}
sudan["CRS"] = "EPSG:20135"
sudan["Boundary gdf"] = gpd.read_file(path+"sdn_admbnda_adm2_cbs_nic_ssa_20200831.shp")
sudan["Boundary gdf"].rename(columns={"ADM2_EN": "ADM2"}, inplace=True)
sudan["Boundary gdf"] = sudan["Boundary gdf"][["ADM2", "geometry"]]
sudan["Region col"] = "ADM2"

In [22]:
# South Africa - Western Cape

path = "/content/drive/MyDrive/CAFO_data/Misc_global/shapefiles/"
w_cape = {"Name": "Western Cape", "Code": "ZAF"}
w_cape["CRS"] = "EPSG:2055"

# Western Cape state boundary
sa_adm1 = gpd.read_file(f"{path}zaf_admbnda_adm1_sadb_ocha_20201109.shp")
state_boundary = sa_adm1[sa_adm1["ADM1_EN"] == "Western Cape"]

# ADM3 boundaries within Western Cape
sa_adm3 = gpd.read_file(f"{path}zaf_admbnda_adm3_sadb_ocha_20201109.shp")
w_cape["Boundary gdf"] = get_relevant_boundaries(state_boundary, sa_adm3,\
                                                 "ADM3_EN", "ADM3", path,\
                                                 "WesternCape_ADM3.geojson")
w_cape["Region col"] = "ADM3"

Saving boundary file to /content/drive/MyDrive/CAFO_data/Misc_global/shapefiles/WesternCape_ADM3.geojson


In [18]:
min_building_size = 800
sentinel_bands = ['B4', 'B3', 'B2']
sentinel_year = 2023

where = w_cape

In [19]:

data_dict = {}

original_boundaries = where["Boundary gdf"].copy()
regions = where["Boundary gdf"][where["Region col"]].unique()
for n, adm_region in enumerate(regions):
  print(f'Processing {adm_region} ({n}/{len(regions)})')

  # Get the large buildings for this region
  where["Boundary gdf"] = where["Boundary gdf"][where["Boundary gdf"][where["Region col"]] == adm_region]
  where["Boundary"] = where["Boundary gdf"].geometry.iloc[0]
  buildings_fc, boundary = explore.get_buildings(where, min_building_size, where["Code"])

  # Some regions have no large buildings, so continue w/o them
  if buildings_fc.first().getInfo() is None:
    print(f" -- No large buildings in {adm_region}")
    where["Boundary gdf"] = original_boundaries
    continue

  # Merge buildings into "clusters", find largest building per cluster
  merged, largest = explore.merge_and_make_box(where, buildings_fc)

  # Get Sentinel data for visualization
  sentinel = explore.get_sentinel(where, boundary, sentinel_bands, sentinel_year)

  # Add output to data dict
  data_dict[adm_region] = {"Buildings": buildings_fc, "Boundary": boundary,\
                           "Merged": merged, "Largest": largest,\
                           "Sentinel": sentinel}

  # Restore original admin regions
  where["Boundary gdf"] = original_boundaries


Processing Beaufort West (0/31)
Went from 131 buildings to 80 boxes
Processing Bergrivier (1/31)
Went from 407 buildings to 260 boxes
Processing Bitou (2/31)
Went from 172 buildings to 100 boxes
Processing Breede Valley (3/31)
Went from 1070 buildings to 424 boxes
Processing Cape Agulhas (4/31)
Went from 303 buildings to 145 boxes
Processing Cederberg (5/31)
Went from 488 buildings to 270 boxes
Processing City of Cape Town (6/31)




Went from 13213 buildings to 2890 boxes
Processing Dr Beyers Naude (7/31)
Went from 186 buildings to 117 boxes
Processing Drakenstein (8/31)




Went from 1872 buildings to 663 boxes
Processing George (9/31)
Went from 1177 buildings to 340 boxes
Processing Hantam (10/31)
Went from 131 buildings to 80 boxes
Processing Hessequa (11/31)
Went from 395 buildings to 223 boxes
Processing Kamiesberg (12/31)
Went from 28 buildings to 22 boxes
Processing Kannaland (13/31)
Went from 72 buildings to 59 boxes
Processing Karoo Hoogland (14/31)
Went from 42 buildings to 31 boxes
Processing Knysna (15/31)
Went from 215 buildings to 89 boxes
Processing Kou-Kamma (16/31)
Went from 232 buildings to 150 boxes
Processing Laingsburg (17/31)
Went from 30 buildings to 23 boxes
Processing Langeberg (18/31)
Went from 475 buildings to 255 boxes
Processing Matzikama (19/31)
Went from 546 buildings to 279 boxes
Processing Mossel Bay (20/31)
Went from 459 buildings to 244 boxes
Processing Oudtshoorn (21/31)
Went from 439 buildings to 227 boxes
Processing Overstrand (22/31)
Went from 383 buildings to 162 boxes
Processing Prince Albert (23/31)
Went from 40 bu

In [None]:
# Visualize the merged polygons and the largest buildings over the entire area.
# This "works" in the sense that it doesn't crash, but it's very slow.

os.environ["HYBRID"] = 'https://mt1.google.com/vt/lyrs=y&x={x}&y={y}&z={z}'

Map = geemap.Map()
Map.add_basemap("HYBRID")

sentinel_viz = {
    'min': 0.0,
    'max': 3000,
    'bands': ['B4', 'B3', 'B2'],
}

boundary_viz = {
  'color': 'red',
  'width': 2,
  'fillColor': '00000000'
}

buildings_viz = {
  'color': 'yellow',
  'width': 2,
  'fillColor': '00000000'
}

merged_viz = {
  'color': 'cyan',
  'width': 2,
  'fillColor': '00000000'
}

largest_viz = {
  'color': 'blue',
  'width': 2,
  'fillColor': '00000000'
}

for n, adm_region in enumerate(data_dict.keys()):

  buildings_fc = data_dict[adm_region]["Buildings"]
  largest_fc = geemap.geopandas_to_ee(data_dict[adm_region]["Largest"].to_crs("EPSG:4326"))
  merged_fc = geemap.geopandas_to_ee(data_dict[adm_region]["Merged"].to_crs("EPSG:4326"))

  if n == 0:
    Map.centerObject(buildings_fc.first().geometry(), 8)
  #Map.addLayer(data_dict[adm_region]["Sentinel"], sentinel_viz, "Sentinel")
  #Map.addLayer(data_dict[adm_region]["Boundary"].style(**boundary_viz), {}, "Boundary")
  #Map.addLayer(buildings_fc.style(**buildings_viz), {}, "Buildings")
  Map.addLayer(merged_fc.style(**merged_viz), {}, "Merged")
  Map.addLayer(largest_fc.style(**largest_viz), {}, "Largest")

Map

In [20]:
# Create a file containing buildings to be used by getSentinel

candidates = pd.concat([data_dict[adm_region]["Largest"] for adm_region in data_dict.keys()])
candidates.loc[:, 'Farm type'] = "Unlabeled"
candidates.rename(columns={"area_in_meters": "Area (sq m)"}, inplace=True)
coldict = {"Dataset name": where["Name"], "Parent coords": None,\
            "Number of animals": np.nan, "Length (m)": np.nan,\
            "Aspect ratio": np.nan}
for col, val in coldict.items():
  candidates.loc[:, col] = val

candidates = explore.re_order(candidates)

path = "/content/drive/MyDrive/CAFO_data/forTraining/interim_files/"
fname = f'{path}{where["Name"]}_bldgs.pkl'
print(f'Saving {len(candidates)} large building images to {fname}')
candidates.to_pickle(fname)

Saving 9311 large building images to /content/drive/MyDrive/CAFO_data/forTraining/interim_files/Western Cape_bldgs.pkl
