In [1]:
%load_ext autoreload
%autoreload 2

import sys, os, math
import ee
import geemap.foliumap as geemap
import geemap.colormaps as cm
from google.colab import drive
from google.colab import auth
from google.auth import default
import gspread
import pandas as pd
import geopandas as gpd
import pyproj
import pyarrow
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from tensorflow.keras.utils import to_categorical
from keras.models import load_model

gpd.options.io_engine = "pyogrio"
os.environ["PYOGRIO_USE_ARROW"] = "1"

In [2]:
auth.authenticate_user()
creds, _ = default()
ee.Authenticate()
ee.Initialize(project="215656163750")
drive.mount('/content/drive')
sys.path.append('/content/drive/MyDrive/Colab Notebooks/')
import utils
import explore

data_path = '/content/drive/MyDrive/CAFO_data/'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
model_name = "VGG16"

training_data = "data_combo_1"

model = load_model(f'/content/drive/MyDrive/CAFO_models/{model_name}_{training_data}/model_ft.keras')

In [4]:
def bldgs_n_bounds(bldgs_file, shp_file):
  bldgs = pd.read_pickle(bldgs_file)
  boundary = gpd.read_file(shp_file)
  boundary = boundary["geometry"]
  return bldgs, boundary

In [5]:
# Gujarat

# Gujarat has been handled in two parts so far. Here, we'll get the model
# predictions for both parts and then concatenate them for visualization

# 1st part
gujarat1 = {'Name': 'Gujarat1'}
gujarat1['bf'] = f"{data_path}forTraining/guj1_iter0.pkl"
gujarat1['sf'] = f"{data_path}Misc_global/shapefiles/Gujarat1_ADM3.geojson"
gujarat1['min_prob'] = 0.95
gujarat1['center_at'] = ee.Feature(ee.Geometry.Point(71.922185, 22.912330), {})

# 2nd part. We'll just define what is needed for getting the model predictions,
# no need for min_prob and center_at which are used for display purposes.
gujarat2 = {'Name': 'Gujarat2'}
gujarat2['bf'] = f"{data_path}forTraining/guj2_iter0.pkl"
gujarat2['sf'] = f"{data_path}Misc_global/shapefiles/Gujarat2_ADM3.geojson"


In [6]:
# Sudan

sudan = {'Name': 'Sudan'}
sudan['bf'] = f"{data_path}forTraining/sdn_iter0.pkl"
sudan['sf'] = f"{data_path}Misc_global/shapefiles/geoBoundaries-SDN-ADM0.geojson"
sudan['min_prob'] = 0.95
sudan['center_at'] = ee.Feature(ee.Geometry.Point(32.491341, 15.654058), {})

In [7]:
# Western Cape

wcape = {'Name': 'Western Cape'}
wcape['bf'] = f"{data_path}forTraining/wcape_iter0.pkl"
wcape['sf'] = f"{data_path}Misc_global/shapefiles/WesternCape_ADM3.geojson"
wcape['min_prob'] = 0.95
wcape['center_at'] = ee.Feature(ee.Geometry.Point(19.611816, -33.975383), {})

# For this location only, get coords of known farms (N2P piggeries)
gc = gspread.authorize(creds)
url = 'https://docs.google.com/spreadsheets/d/1i842rkJKgXNLelQT6msNwTdeMscJZ6DTSGtzyzAui_k/edit?usp=sharing'
gsheets = gc.open_by_url(url)
sheets = gsheets.worksheet('Sheet1').get_all_values()
df = pd.DataFrame(sheets[1:], columns=sheets[0])
df = df[df["Region"].str.contains("Western Cape")]
df[['Latitude','Longitude']] = df['Latitude, longitude'].str.split(',',expand=True)
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.Longitude, df.Latitude), crs="EPSG:4326")
wcape['N2P piggeries'] = gdf

In [8]:
where = gujarat1
bldgs, bounds = bldgs_n_bounds(where['bf'], where['sf'])
print(f"There are {len(bldgs)} buildings in the {where['Name']} dataset")

# If we're looking at Gujarat, combine both parts of the region into a single
# dataset and rename it to just "Gujarat"
if where == gujarat1:
  bldgs2, bounds2 = bldgs_n_bounds(gujarat2['bf'], gujarat2['sf'])
  print(f"There are {len(bldgs2)} buildings in the Gujarat2 dataset")
  bldgs = pd.concat([bldgs, bldgs2])
  bounds = pd.concat([bounds, bounds2])
  where["Name"] = "Gujarat"

There are 26340 buildings in the Gujarat1 dataset
There are 13084 buildings in the Gujarat2 dataset


In [9]:
# If we've already made predictions and created the results df, use this cell to
# read from file

f = f"/content/drive/MyDrive/CAFO_models_applied/results_{where['Name']}.pkl"
results = pd.read_pickle(f)

Unnamed: 0,Label,Model Probabilities,Model Class,ADM3,Area (sq m),geometry,Dataset name,Farm type,Sentinel,prob_0
0,0,"[0.023146247, 0.9768538]",1,Daman,2071.9947,"POLYGON ((70.95507 20.71132, 70.95513 20.71115...",Gujarat1,Unlabeled,"[[[61.004030393899264, 46.18193696292695, 34.0...",0.023146
1,0,"[0.0012048928, 0.99879503]",1,Daman,1209.0439,"POLYGON ((70.9531 20.71319, 70.95318 20.71294,...",Gujarat1,Unlabeled,"[[[30.272804637263125, 29.50013913366287, 21.5...",0.001205
2,0,"[0.062060636, 0.93793935]",1,Daman,1134.50354,"POLYGON ((70.97004 20.71355, 70.9702 20.7135, ...",Gujarat1,Unlabeled,"[[[73.32666302167745, 58.54613015082054, 45.09...",0.062061
3,0,"[0.012182048, 0.9878179]",1,Daman,1394.324219,"POLYGON ((70.95829 20.71421, 70.95837 20.71385...",Gujarat1,Unlabeled,"[[[40.84501598500999, 39.62423185490386, 27.98...",0.012182
4,0,"[0.009591412, 0.99040854]",1,Daman,930.091003,"POLYGON ((70.95435 20.71453, 70.95437 20.7142,...",Gujarat1,Unlabeled,"[[[29.995524884886496, 34.371111666451526, 24....",0.009591


26340


In [None]:
X = []
labels = []

for idx, row in bldgs.iterrows():
  labels.append("Unlabeled")
  X.append(row["Sentinel"])

label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)
y = to_categorical(encoded_labels, num_classes=len(set(labels)))

class_mapping = dict(zip(encoded_labels, labels))
print("Class mapping:", class_mapping)

results = utils.get_predictions(model, X, y, model_name, bldgs)
results.loc[:, "prob_0"] = results["Model Probabilities"].str[0]
results.to_pickle(f"/content/drive/MyDrive/CAFO_models_applied/results_{where['Name']}.pkl")

Class mapping: {0: 'Unlabeled'}


  self._warn_if_super_not_called()


[1m 342/1232[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m31:27[0m 2s/step

In [10]:
# Functions needed for visualization

def get_fc(df):
  gdf = gpd.GeoDataFrame(df, crs="EPSG:4326")
  if "Sentinel" in gdf.columns:
    fc = geemap.geopandas_to_ee(gdf.drop(columns=["Sentinel"]))
  else:
    fc = geemap.geopandas_to_ee(gdf)
  return fc

def buffer_and_bound(feature, buffer_radius=240):
    return feature.centroid().buffer(buffer_radius, 2).bounds()

def process_in_chunks(df, chunk_size=5000):
    num = math.ceil(len(df)/chunk_size)
    fc_list = []
    for i in range(num):
      df1 = df[i*chunk_size:i*chunk_size+chunk_size]
      fc = get_fc(df1)
      fc_list.append(fc.map(buffer_and_bound))
    return fc_list

In [11]:
# Visualize

boundary_fc = get_fc(bounds)
buildings_fc = get_fc(results)

if where['min_prob'] is None:
  all_fc = get_fc(results)
  # buildings --> image extents
  all_fc = all_fc.map(buffer_and_bound)
else:
  # buildings classed as CAFOs with high probability
  high_prob = results[results['prob_0'] >= where['min_prob']]
  print(f"Identified {len(high_prob)} high-probability CAFOs in {where['Name']}")
  high_prob_list = process_in_chunks(high_prob)
  # other buildings
  other = results[results['prob_0'] < where['min_prob']]
  other_list = process_in_chunks(other)

os.environ["HYBRID"] = 'https://mt1.google.com/vt/lyrs=y&x={x}&y={y}&z={z}'

boundary_viz = {
  'color': 'purple',
  'width': 1,
  'fillColor': '00000000'
}

high_prob_viz = {
  'color': 'red',
  'width': 5,
  'fillColor': '00000000'
}

other_viz = {
  'color': 'cyan',
  'width': 2,
  'fillColor': '00000000'
}

pig_viz = {
  'color': 'pink',
  'width': 2,
  'fillColor': '00000000'
}

Map = geemap.Map()
Map.centerObject(where['center_at'].geometry(), 8)
Map.add_basemap("HYBRID")
#Map.addLayer(boundary_fc.style(**boundary_viz), {}, 'Boundary')

if where['min_prob'] is not None:
  for fc in other_list:
    Map.addLayer(fc.style(**other_viz), {}, f"CAFO prob < {where['min_prob']}")
  for fc in high_prob_list:
    Map.addLayer(fc.style(**high_prob_viz), {},\
                  f"CAFO prob >= {where['min_prob']}")

else:
  empty = ee.Image().byte()
  probs = empty.paint(**{
    'featureCollection': all_fc,
    'color': 'prob_0',
    'width': 5
      })
  fillspalette = cm.get_palette(cmap_name="RdYlGn_r")
  Map.addLayer(probs, {'palette': fillspalette, 'min': 0, 'max': 1},\
              'CAFO probability')

# Show the South African piggeries, if applicable
if 'N2P piggeries' in where:
  pig_fc = get_fc(where['N2P piggeries'])
  Map.addLayer(pig_fc.style(**pig_viz), {}, 'N2P piggeries')

Map

Identified 255 high-probability CAFOs in Gujarat1


In [None]:
save_to = f"/content/drive/MyDrive/CAFO_models_applied/"
Map.save(f"{save_to}interactive_map_{where['Name']}.html")