In [34]:
%load_ext autoreload
%autoreload 2

import sys, os, math
import ee
import geemap.foliumap as geemap
import geemap.colormaps as cm
from google.colab import drive
from google.colab import auth
from google.auth import default
import gspread
import pandas as pd
import geopandas as gpd
import pyproj
import pyarrow
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from tensorflow.keras.utils import to_categorical
from keras.models import load_model

gpd.options.io_engine = "pyogrio"
os.environ["PYOGRIO_USE_ARROW"] = "1"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [35]:
auth.authenticate_user()
creds, _ = default()
ee.Authenticate()
ee.Initialize(project="215656163750")
drive.mount('/content/drive')
sys.path.append('/content/drive/MyDrive/Colab Notebooks/')
import utils
import explore

data_path = '/content/drive/MyDrive/CAFO_data/'
shp_path = '/content/drive/MyDrive/CAFO_data/Misc_global/shapefiles/'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [36]:
model_name = "VGG16"

training_data = "data_combo_1"

model = load_model(f'/content/drive/MyDrive/CAFO_models/{model_name}_{training_data}/model_ft.keras')

In [37]:
def bldgs_n_bounds(info_dict):

  #get the edges of the whole area, to be displayed on the map (e.g. West Java)
  shp = gpd.read_file(info_dict['shp'])
  outline = shp[shp[info_dict["shp_col"]] == info_dict['shp_name']].copy()
  # drop columns that can cause problems converting to fc
  #outline.drop(columns=["ADM1_REF", "ADM1ALT1EN", "ADM1ALT2EN"],\
  #             inplace=True, errors='ignore')
  outline = outline["geometry"].reset_index()
  outline_fc = geemap.geopandas_to_ee(outline)

  # get all the buildings with their Sentinel snippets
  bldgs = pd.read_pickle(info_dict['bf'])

  return bldgs, outline_fc

In [38]:
# Gujarat

# Gujarat has been handled in two parts so far. Here, we'll get the model
# predictions for both parts and then concatenate them for visualization

# 1st part
gujarat1 = {'Name': 'Gujarat1', 'CRS': "EPSG:7761"}
gujarat1['bf'] = f"{data_path}forTraining/guj1_iter0.pkl"
gujarat1['sf'] = f"{data_path}Misc_global/shapefiles/Gujarat1_ADM3.geojson"
gujarat1['min_prob'] = 0.95
gujarat1['center_at'] = ee.Feature(ee.Geometry.Point(71.922185, 22.912330), {})

# 2nd part. We'll just define what is needed for getting the model predictions,
# no need for min_prob and center_at which are used for display purposes.
gujarat2 = {'Name': 'Gujarat2'}
gujarat2['bf'] = f"{data_path}forTraining/guj2_iter0.pkl"
gujarat2['sf'] = f"{data_path}Misc_global/shapefiles/Gujarat2_ADM3.geojson"

In [39]:
# Sudan

sudan = {'Name': 'Sudan', 'CRS': "EPSG:20135"}
sudan['bf'] = f"{data_path}forTraining/sdn_iter0.pkl"
sudan['shp'] = f"{shp_path}geoBoundaries-SDN-ADM0.geojson"
sudan['shp_col'] = "shapeName"
sudan['shp_name'] = 'Sudan'
sudan['min_prob'] = 0.95
sudan['center_at'] = ee.Feature(ee.Geometry.Point(32.491341, 15.654058), {})

In [40]:
# Western Cape

wcape = {'Name': 'Western Cape', 'CRS': "EPSG:2055"}
wcape['bf'] = f"{data_path}forTraining/wcape_iter0.pkl"
wcape['shp'] = f"{shp_path}zaf_admbnda_adm1_sadb_ocha_20201109.shp"
wcape['shp_col'] = "ADM1_EN"
wcape['shp_name'] = 'Western Cape'
wcape['min_prob'] = 0.95
wcape['center_at'] = ee.Feature(ee.Geometry.Point(19.611816, -33.975383), {})

In [41]:
# West Java/Jawa Barat

wjava = {'Name': 'West Java', 'CRS': "EPSG:23830"}
wjava['bf'] = f"{data_path}forTraining/w_java_iter0.pkl"
wjava['shp'] = f"{shp_path}idn_admbnda_adm1_bps_20200401.shp"
wjava['shp_col'] = "ADM1_EN"
wjava['shp_name'] = 'Jawa Barat'
wjava['min_prob'] = 0.95
wjava['center_at'] = ee.Feature(ee.Geometry.Point(106.755048, -6.837401), {})

In [42]:
where = wjava
bldgs, bounds_fc = bldgs_n_bounds(where)
print(f"There are {len(bldgs)} buildings in the {where['Name']} dataset")

There are 26756 buildings in the West Java dataset


In [43]:
# For Gujarat only, combine both parts of the region into a single
# dataset and rename it to just "Gujarat"
if where == gujarat1:
  bldgs2, bounds2 = bldgs_n_bounds(gujarat2['bf'], gujarat2['sf'])
  print(f"There are {len(bldgs2)} buildings in the Gujarat2 dataset")
  bldgs = pd.concat([bldgs, bldgs2]).reset_index(drop=True)
  bounds = pd.concat([bounds, bounds2]).reset_index(drop=True)
  where["Name"] = "Gujarat"

In [44]:
# For Western Cape only, get coords of known farms (N2P piggeries)

if where == wcape:
  gc = gspread.authorize(creds)
  url = 'https://docs.google.com/spreadsheets/d/1i842rkJKgXNLelQT6msNwTdeMscJZ6DTSGtzyzAui_k/edit?usp=sharing'
  gsheets = gc.open_by_url(url)
  sheets = gsheets.worksheet('Sheet1').get_all_values()
  df = pd.DataFrame(sheets[1:], columns=sheets[0])
  df = df[df["Region"].str.contains("Western Cape")]
  df[['Latitude','Longitude']] = df['Latitude, longitude'].str.split(',',expand=True)
  gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.Longitude, df.Latitude),\
                         crs="EPSG:4326")
  where['N2P piggeries'] = gdf

In [45]:
# If we've already made predictions and created the results df, use this cell to
# read from file

f = f"/content/drive/MyDrive/CAFO_models_applied/results_{where['Name']}.pkl"
results = pd.read_pickle(f)

In [29]:
X = []
labels = []

for idx, row in bldgs.iterrows():
  labels.append("Unlabeled")
  X.append(row["Sentinel"])

label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)
y = to_categorical(encoded_labels, num_classes=len(set(labels)))

class_mapping = dict(zip(encoded_labels, labels))
print("Class mapping:", class_mapping)

results = utils.get_predictions(model, X, y, model_name, bldgs)
results.loc[:, "prob_0"] = results["Model Probabilities"].str[0]
results.to_pickle(f"/content/drive/MyDrive/CAFO_models_applied/results_{where['Name']}.pkl")

Class mapping: {0: 'Unlabeled'}


  self._warn_if_super_not_called()


[1m164/164[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 22ms/step


In [31]:
# Functions needed for visualization

def get_fc(df):
  gdf = gpd.GeoDataFrame(df, crs="EPSG:4326")
  gdf.drop(columns=['Sentinel', 'ADM2_REF'], errors='ignore', inplace=True)
  fc = geemap.geopandas_to_ee(gdf)
  return fc

def buffer_and_bound(feature, buffer_radius=240):
    return feature.centroid().buffer(buffer_radius, 2).bounds()

def process_in_chunks(df, chunk_size=5000):
    num = math.ceil(len(df)/chunk_size)
    fc_list = []
    for i in range(num):
      df1 = df[i*chunk_size:i*chunk_size+chunk_size]
      fc = get_fc(df1)
      fc_list.append(fc.map(buffer_and_bound))
    return fc_list

def get_centroids(df, crs):
  df = df.set_geometry("geometry").to_crs(crs)
  centroids = df.geometry.centroid
  df['geometry'] = centroids
  df = df.to_crs("EPSG:4326")
  df = df["geometry"].reset_index()
  fc = get_fc(df)
  return fc

In [47]:
# Visualize.

# Not sure this processing-in-chunks thing is helpful actually. Is not necessary
# for the CAFOs and seems unhelpful for the not-CAFOs

max_bldgs = 30000

if where['min_prob'] is None:
  all_fc = get_fc(results)
  # buildings --> image extents
  all_fc = all_fc.map(buffer_and_bound)
else:
  # buildings/images classed as CAFOs with high probability
  high_prob = results[results['prob_0'] >= where['min_prob']]
  print(f"Identified {len(high_prob)} high-probability CAFOs in {where['Name']}")
  high_prob_list = process_in_chunks(high_prob)

  # other buildings - centroids only to keep data manageable
  other = results[results['prob_0'] < where['min_prob']]
  if len(other) < max_bldgs:
    other_fc = get_centroids(other, where['CRS'])

os.environ["HYBRID"] = 'https://mt1.google.com/vt/lyrs=y&x={x}&y={y}&z={z}'

boundary_viz = {
  'color': 'purple',
  'width': 2,
  'fillColor': '00000000'
}

high_prob_viz = {
  'color': 'red',
  'width': 5,
  'fillColor': '00000000'
}

other_viz = {
  'color': 'cyan',
  'width': 2,
  'fillColor': '00000000'
}

pig_viz = {
  'color': 'pink',
  'width': 2,
  'fillColor': '00000000'
}

Map = geemap.Map()
Map.centerObject(where['center_at'].geometry(), 10)
Map.add_basemap("HYBRID")
Map.addLayer(bounds_fc.style(**boundary_viz), {}, 'Map bounds')

if where['min_prob'] is not None:
  if len(other) < max_bldgs:
    Map.addLayer(other_fc.style(**other_viz), {}, 'Other large buildings')
  for fc in high_prob_list:
    Map.addLayer(fc.style(**high_prob_viz), {},\
                  f"CAFO prob >= {where['min_prob']}")

else:
  empty = ee.Image().byte()
  probs = empty.paint(**{
    'featureCollection': all_fc,
    'color': 'prob_0',
    'width': 5
      })
  fillspalette = cm.get_palette(cmap_name="RdYlGn_r")
  Map.addLayer(probs, {'palette': fillspalette, 'min': 0, 'max': 1},\
              'CAFO probability')

# Show the South African piggeries, if applicable
if 'N2P piggeries' in where:
  pig_fc = get_fc(where['N2P piggeries'])
  Map.addLayer(pig_fc.style(**pig_viz), {}, 'N2P piggeries')

Map

Identified 751 high-probability CAFOs in West Java


In [48]:
save_to = f"/content/drive/MyDrive/CAFO_models_applied/"
Map.save(f"{save_to}interactive_map_{where['Name']}.html")