In [1]:
%load_ext autoreload
%autoreload 2

import sys
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import drive
import ee
import geemap.foliumap as geemap
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from keras.applications.vgg16 import preprocess_input
from keras.models import load_model

In [2]:
ee.Authenticate()
ee.Initialize(project="215656163750")
drive.mount('/content/drive')
sys.path.append('/content/drive/MyDrive/Colab Notebooks/')
import utils
import explore

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# The fine-tuned model from initialModel.ipynb

model = load_model('/content/drive/MyDrive/CAFO_models/VGG16_step2.keras')

In [4]:
# Functions for data preparation

data_path = "/content/drive/MyDrive/CAFO_data/forTraining/"

def read_data(dataset):

  with open(f'{data_path}{dataset}_sentinel_images.pkl', 'rb') as f:
      image_dict = pickle.load(f)

  df = pd.read_pickle(f'{data_path}{dataset}_metadata_gdf.pkl')
  df["Sentinel"] = image_dict.values()

  return df


def prepare(df):

  images = []
  labels = []

  for idx, row in df.iterrows():
    if row['Farm type'] in ["Poultry", "Broiler", "Layer", "Pig", "Unknown CAFO"]:
      labels.append("CAFO")
      images.append(row["Sentinel"])
    elif row['Farm type'] == "Non-farm":
      labels.append("Non-farm")
      images.append(row["Sentinel"])
    else:
      print(f"Unexpected farm type, {row['Farm type']}")

  label_encoder = LabelEncoder()
  encoded_labels = label_encoder.fit_transform(labels)
  one_hot_labels = to_categorical(encoded_labels, num_classes=len(set(labels)))

  class_mapping = dict(zip(encoded_labels, labels))
  print("Class mapping:", class_mapping)

  return images, one_hot_labels

In [19]:
# Main function for finding images that need checking

def find_dodgy_images(dataset, min_prob=0.7):

  # Get the Sentinel images etc. for this country/location
  data = read_data(dataset)
  X, y = prepare(data)
  meta = data.drop(columns=["Sentinel"])

  # Apply the model to all images
  results = utils.get_predictions(model, X, y, meta)

  # Identify images labeled as not-farm but confidently predicted to be CAFOs
  results.loc[:, "CAFO prob"] = results.loc[:, "Model Probabilities"].str[0]
  to_check = results[(results["Label"] == 1) & (results["CAFO prob"] >= min_prob)]
  print(f"There are {len(to_check)} images to check")

  # Sort in descending probability order
  to_check.sort_values(by="CAFO prob", ascending=False, inplace=True)

  return data, to_check

## Mexico

In [20]:
mex, check_mex = find_dodgy_images("mex")

Class mapping: {0: 'CAFO', 1: 'Non-farm'}


  self._warn_if_super_not_called()


[1m210/210[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m426s[0m 2s/step
There are 213 images to check


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  to_check.sort_values(by="CAFO prob", ascending=False, inplace=True)


In [25]:
# Examine those images, flag ones that really are CAFOs

#mislabeled_1 = explore.loop_over_buildings(check_mex[:50])
#print(len(mislabeled_1))
#mislabeled_2 = explore.loop_over_buildings(check_mex[50:100])
#print(len(mislabeled_2))
#mislabeled_3 = explore.loop_over_buildings(check_mex[100:150])
#print(len(mislabeled_3))
mislabeled_4 = explore.loop_over_buildings(check_mex[150:])
print(len(mislabeled_4))

Working on feature 1 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3521,1,"[0.7647572, 0.23524278]",0,México,Durango,Mapimí,2281.897949,11.126208,Mexico,Non-farm,160.382219,,POINT (-104.0731 26.52756),0.764757


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 2 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
802,1,"[0.7638631, 0.23613687]",0,México,Jalisco,Atotonilco el Alto,2507.5826,1.218003,Mexico,Non-farm,55.160007,,POINT (-102.51417 20.63174),0.763863


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 3 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3485,1,"[0.7624252, 0.23757488]",0,México,Durango,Mapimí,2357.9784,10.461097,Mexico,Non-farm,158.662801,,POINT (-103.93854 25.81933),0.762425


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 4 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3637,1,"[0.7611344, 0.23886561]",0,México,Nuevo León,Montemorelos,2498.998291,7.630074,Mexico,Non-farm,139.785197,,POINT (-99.73631 25.01381),0.761134


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 5 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3238,1,"[0.76053566, 0.23946436]",0,México,Jalisco,Lagos de Moreno,1784.731201,8.403577,Mexico,Non-farm,121.846569,,POINT (-101.91571 21.49278),0.760536


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 6 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
5376,1,"[0.76008415, 0.23991589]",0,México,Morelos,Tepalcingo,1603.000488,4.118199,Mexico,Non-farm,86.467321,,POINT (-98.84751 18.51855),0.760084


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 7 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3716,1,"[0.75949854, 0.24050146]",0,México,Zacatecas,Nochistlán de Mejía,1732.22168,6.527889,Mexico,Non-farm,106.155624,,POINT (-102.7586 21.44427),0.759499


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 8 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
1283,1,"[0.75912994, 0.24087007]",0,México,Sonora,Cajeme,4556.9259,1.428891,Mexico,Non-farm,93.114489,,POINT (-110.04726 27.35285),0.75913


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 9 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
6362,1,"[0.7574414, 0.2425586]",0,México,Morelos,Yautepec,1256.266724,8.750794,Mexico,Non-farm,104.47783,,POINT (-99.05296 18.87212),0.757441


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 10 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
914,1,"[0.75662696, 0.24337305]",0,México,Puebla,Cañada Morelos,2194.6954,6.968246,Mexico,Non-farm,123.690162,,POINT (-97.49771 18.76917),0.756627


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 11 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3636,1,"[0.7523612, 0.24763885]",0,México,Chihuahua,Namiquipa,1301.4793,1.260347,Mexico,Non-farm,40.476126,,POINT (-107.39966 29.1148),0.752361


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 12 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
2247,1,"[0.7515557, 0.24844436]",0,México,Chihuahua,Guerrero,1599.479492,1.078118,Mexico,Non-farm,46.164466,,POINT (-107.34998 28.47996),0.751556


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 13 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
4374,1,"[0.7492949, 0.25070512]",0,México,Tamaulipas,Reynosa,4070.89209,1.277459,Mexico,Non-farm,76.995143,,POINT (-98.2222 26.04131),0.749295


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 14 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
612,1,"[0.7485795, 0.2514205]",0,México,Puebla,Atlixco,1581.975342,12.519758,Mexico,Non-farm,140.568978,,POINT (-98.46492 18.88775),0.74858


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 15 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
1058,1,"[0.74741447, 0.25258547]",0,México,Puebla,Chalchicomula de Sesma,4862.6495,4.2176,Mexico,Non-farm,143.594222,,POINT (-97.45998 18.89566),0.747414


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 16 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3456,1,"[0.74693626, 0.25306374]",0,México,Veracruz de Ignacio de la Llave,Medellín,2890.0311,7.567383,Mexico,Non-farm,159.707877,,POINT (-96.17251 18.98333),0.746936


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 17 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3243,1,"[0.7447254, 0.25527453]",0,México,Jalisco,Lagos de Moreno,2048.4228,3.868735,Mexico,Non-farm,90.086015,,POINT (-101.92811 21.67307),0.744725


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 18 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
5534,1,"[0.7443139, 0.25568613]",0,México,Jalisco,Teocaltiche,2608.922607,10.983015,Mexico,Non-farm,168.663099,,POINT (-102.41823 21.55458),0.744314


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 19 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
4415,1,"[0.7429314, 0.25706863]",0,México,Yucatán,Samahil,2123.0869,9.117618,Mexico,Non-farm,140.639692,,POINT (-89.8976 20.89827),0.742931


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 20 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
2915,1,"[0.7427982, 0.2572018]",0,México,Chiapas,Jiquipilas,1623.822754,5.601278,Mexico,Non-farm,95.42674,,POINT (-93.71472 16.48269),0.742798


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 21 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
801,1,"[0.74075395, 0.25924608]",0,México,Jalisco,Atotonilco el Alto,2888.740479,2.161141,Mexico,Non-farm,83.966111,,POINT (-102.55055 20.6153),0.740754


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 22 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3515,1,"[0.73962855, 0.26037148]",0,México,Durango,Mapimí,2017.644043,8.109957,Mexico,Non-farm,137.181414,,POINT (-104.04337 25.89578),0.739629


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 23 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
4669,1,"[0.73724043, 0.2627595]",0,México,Guanajuato,San José Iturbide,2560.663574,10.80788,Mexico,Non-farm,165.412347,,POINT (-100.40097 21.08898),0.73724


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 24 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
166,1,"[0.7368904, 0.26310962]",0,México,Sinaloa,Ahome,2003.709229,12.300291,Mexico,Non-farm,175.244896,,POINT (-109.04411 25.66168),0.73689


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 25 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3406,1,"[0.7351947, 0.26480535]",0,México,Hidalgo,Metepec,1724.1524,1.20858,Mexico,Non-farm,49.189212,,POINT (-98.38704 20.24511),0.735195


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 26 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
5570,1,"[0.73331517, 0.26668486]",0,México,Puebla,Tepanco de López,1351.67688,3.974883,Mexico,Non-farm,73.229089,,POINT (-97.49959 18.46474),0.733315


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 27 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
6644,1,"[0.73296136, 0.2670386]",0,México,Jalisco,Zapopan,1888.824097,2.581726,Mexico,Non-farm,70.273928,,POINT (-103.5077 20.74736),0.732961


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 28 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
2779,1,"[0.7329475, 0.2670524]",0,México,Sonora,Hermosillo,1858.803101,1.374239,Mexico,Non-farm,65.25539,,POINT (-111.1344 28.99802),0.732948


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 29 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
2945,1,"[0.73069894, 0.2693011]",0,México,Jalisco,Juanacatlán,942.377991,3.164608,Mexico,Non-farm,54.358793,,POINT (-103.18359 20.46376),0.730699


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 30 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
414,1,"[0.73049927, 0.26950076]",0,México,Aguascalientes,Aguascalientes,3948.4472,2.130551,Mexico,Non-farm,93.258012,,POINT (-102.195 22.00059),0.730499


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 31 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3747,1,"[0.72882354, 0.2711765]",0,México,Sonora,Navojoa,2269.1894,11.354199,Mexico,Non-farm,160.157429,,POINT (-109.5204 27.04329),0.728824


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 32 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
2029,1,"[0.72795284, 0.27204722]",0,México,Jalisco,Encarnación de Díaz,1271.3361,1.474426,Mexico,Non-farm,53.575219,,POINT (-102.24485 21.42693),0.727953


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 33 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
5693,1,"[0.7273166, 0.27268335]",0,México,Veracruz de Ignacio de la Llave,Tlalixcoyan,1514.282837,1.05556,Mexico,Non-farm,42.16662,,POINT (-96.18787 18.77421),0.727317


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 34 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3926,1,"[0.72685957, 0.27314043]",0,México,Durango,Pánuco de Coronado,1100.622559,2.365076,Mexico,Non-farm,50.831848,,POINT (-104.309 24.52441),0.72686


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 35 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
6236,1,"[0.7259399, 0.2740601]",0,México,Jalisco,Unión de San Antonio,1547.919434,2.346574,Mexico,Non-farm,60.235975,,POINT (-101.9317 21.17064),0.72594


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 36 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
4690,1,"[0.7255843, 0.2744157]",0,México,Jalisco,San Ignacio Cerro Gordo,1239.0129,4.938717,Mexico,Non-farm,79.93305,,POINT (-102.45623 20.73586),0.725584


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 37 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
5586,1,"[0.7250477, 0.27495226]",0,México,Puebla,Tepanco de López,3118.3659,8.390829,Mexico,Non-farm,161.525904,,POINT (-97.54496 18.59966),0.725048


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 38 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3173,1,"[0.72412306, 0.27587694]",0,México,Jalisco,Lagos de Moreno,1434.144775,7.199603,Mexico,Non-farm,101.245426,,POINT (-102.12619 21.27799),0.724123


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 39 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
2176,1,"[0.72367996, 0.27632004]",0,México,Baja California,Ensenada,1600.436523,1.064165,Mexico,Non-farm,42.407841,,POINT (-116.62131 31.69396),0.72368


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 40 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
2028,1,"[0.72338295, 0.276617]",0,México,Jalisco,Encarnación de Díaz,1231.0512,2.162446,Mexico,Non-farm,54.045953,,POINT (-102.25959 21.42504),0.723383


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 41 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
2578,1,"[0.7232287, 0.27677137]",0,México,Guanajuato,Irapuato,1407.2934,4.14404,Mexico,Non-farm,98.691455,,POINT (-101.40286 20.64001),0.723229


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 42 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
1530,1,"[0.72193617, 0.2780638]",0,México,Veracruz de Ignacio de la Llave,Cotaxtla,1902.8504,8.730274,Mexico,Non-farm,128.517126,,POINT (-96.40158 18.90177),0.721936


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 43 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3169,1,"[0.7213997, 0.27860028]",0,México,Jalisco,Lagos de Moreno,2603.280273,7.586797,Mexico,Non-farm,140.195247,,POINT (-102.0784 21.25832),0.7214


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 44 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
63,1,"[0.7197606, 0.28023937]",0,México,Hidalgo,Acaxochitlán,2423.6559,2.041242,Mexico,Non-farm,76.841183,,POINT (-98.13542 20.10749),0.719761


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 45 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
590,1,"[0.7188436, 0.28115636]",0,México,Aguascalientes,Asientos,1861.514771,8.491559,Mexico,Non-farm,125.046976,,POINT (-102.00464 22.11432),0.718844


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 46 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3525,1,"[0.7154698, 0.2845302]",0,México,Durango,Mapimí,2350.3021,2.681801,Mexico,Non-farm,94.062752,,POINT (-104.20093 26.67633),0.71547


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 47 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
5575,1,"[0.7146526, 0.28534734]",0,México,Puebla,Tepanco de López,2789.6474,2.979967,Mexico,Non-farm,151.123187,,POINT (-97.57164 18.53948),0.714653


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 48 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
1489,1,"[0.71373326, 0.28626674]",0,México,Chiapas,Comitán de Domínguez,1855.992554,2.024214,Mexico,Non-farm,61.800873,,POINT (-92.03011 16.1664),0.713733


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 49 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3357,1,"[0.7129566, 0.2870434]",0,México,Michoacán de Ocampo,Maravatío,2129.4044,2.527399,Mexico,Non-farm,73.166344,,POINT (-100.50293 19.94911),0.712957


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 50 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
722,1,"[0.7127401, 0.28725988]",0,México,Yucatán,Baca,1501.89917,9.821425,Mexico,Non-farm,120.808915,,POINT (-89.39868 21.08413),0.71274


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 51 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
72,1,"[0.71243894, 0.28756112]",0,México,Yucatán,Acanceh,2495.773926,11.087735,Mexico,Non-farm,165.42892,,POINT (-89.43075 20.79119),0.712439


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 52 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
4863,1,"[0.7108853, 0.2891147]",0,México,Guanajuato,San Miguel de Allende,2488.1721,11.133135,Mexico,Non-farm,165.611367,,POINT (-100.78824 21.04254),0.710885


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 53 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
367,1,"[0.7108005, 0.28919947]",0,México,Aguascalientes,Aguascalientes,1608.2696,13.081921,Mexico,Non-farm,165.855897,,POINT (-102.33253 21.80503),0.710801


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 54 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
4247,1,"[0.7106235, 0.28937647]",0,México,Michoacán de Ocampo,Queréndaro,1090.57605,3.133084,Mexico,Non-farm,62.70704,,POINT (-100.88606 19.81239),0.710624


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 55 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
4916,1,"[0.71023536, 0.28976464]",0,México,Hidalgo,Santiago de Anaya,1910.493042,5.096828,Mexico,Non-farm,111.282745,,POINT (-98.92702 20.35856),0.710235


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 56 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
5691,1,"[0.7088419, 0.29115814]",0,México,Veracruz de Ignacio de la Llave,Tlalixcoyan,2796.6229,8.171071,Mexico,Non-farm,166.509897,,POINT (-96.32334 18.75696),0.708842


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 57 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
4396,1,"[0.70857376, 0.29142624]",0,México,Tamaulipas,Río Bravo,1038.969116,1.172858,Mexico,Non-farm,36.933165,,POINT (-98.011 25.88338),0.708574


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 58 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
721,1,"[0.7081789, 0.29182112]",0,México,Yucatán,Baca,1704.3338,8.717924,Mexico,Non-farm,121.219862,,POINT (-89.39634 21.08368),0.708179


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 59 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
2084,1,"[0.70572954, 0.29427043]",0,México,Sonora,Etchojoa,1473.133057,3.442351,Mexico,Non-farm,81.174888,,POINT (-109.70851 27.00906),0.70573


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 60 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3516,1,"[0.705041, 0.29495898]",0,México,Durango,Mapimí,2056.158447,10.961406,Mexico,Non-farm,149.318667,,POINT (-104.00941 25.90494),0.705041


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 61 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
3829,1,"[0.7048127, 0.2951873]",0,México,Sonora,Nogales,2455.3851,15.063903,Mexico,Non-farm,212.542379,,POINT (-110.97711 31.32978),0.704813


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 62 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
2425,1,"[0.70409685, 0.29590312]",0,México,Hidalgo,Huichapan,1056.0545,6.973522,Mexico,Non-farm,85.52883,,POINT (-99.64378 20.36737),0.704097


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 63 of 63


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),Parent coords,geometry,CAFO prob
12,1,"[0.70156425, 0.29843575]",0,México,México,Acambay de Ruíz Castañeda,846.608582,1.137633,Mexico,Non-farm,38.197306,,POINT (-99.85821 19.89979),0.701564


Enter reject to reject, exit to exit, or any key to continue  
28


In [26]:
mex_mislabeled = mislabeled_1 + mislabeled_2 + mislabeled_3 + mislabeled_4
print(mex_mislabeled)

print(f"Rejected {len(mex_mislabeled)} / {len(check_mex)} images")

# Change the labels of the "bad" images in the original df, and pickle the
# "cleaned" df with a new name

df = mex.copy()
df.loc[df.index.isin(mex_mislabeled), ['Farm type']] = 'Unknown CAFO'

df.to_pickle(f'{data_path}mex_final.pkl')

[6, 1980, 1259, 4285, 5523, 2677, 4484, 4182, 2016, 4952, 3494, 4975, 4183, 4866, 6206, 3945, 6702, 1252, 4642, 3083, 210, 5062, 6372, 6222, 74, 6220, 1304, 3496, 2756, 2916, 3997, 148, 4079, 4867, 5654, 3998, 4298, 2357, 5663, 5972, 4234, 4078, 4868, 6218, 4604, 3174, 6377, 4488, 3365, 2464, 5107, 5232, 4937, 1138, 4621, 2678, 3675, 3082, 3523, 4862, 4132, 2100, 2353, 3748, 5229, 4546, 1261, 3999, 1358, 3239, 878, 71, 5658, 4861, 6240, 5574, 3524, 645, 5584, 3521, 3485, 3238, 3716, 6362, 914, 3456, 3243, 5534, 4415, 2915, 3515, 166, 5570, 6236, 5586, 3173, 2028, 3169, 590, 5575, 722, 72, 4863, 4916, 5691, 721, 3516]
Rejected 107 / 213 images


## Chile

In [27]:
chl, check_chl = find_dodgy_images("chl")

Class mapping: {1: 'Non-farm', 0: 'CAFO'}


  self._warn_if_super_not_called()


[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 2s/step
There are 18 images to check


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  to_check.sort_values(by="CAFO prob", ascending=False, inplace=True)


In [28]:
chl_mislabeled = explore.loop_over_buildings(check_chl)

Working on feature 1 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
603,1,"[0.92804223, 0.07195783]",0,Chile,Región de Ñuble,Diguillín,Chillán,1471.270142,1.35173,Chile,Non-farm,47.579889,POINT (-72.19953 -36.61672),,,0.928042


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 2 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
473,1,"[0.8821685, 0.11783156]",0,Chile,Región Metropolitana de Santiago,Maipo,Paine,2089.567139,1.351037,Chile,Non-farm,58.102094,POINT (-70.70312 -33.84452),,,0.882168


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 3 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
588,1,"[0.87119293, 0.12880707]",0,Chile,Región de Ñuble,Diguillín,Chillán Viejo,1618.207031,1.274267,Chile,Non-farm,46.230223,POINT (-72.22073 -36.6355),,,0.871193


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 4 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
157,1,"[0.8195661, 0.18043393]",0,Chile,Región del Libertador Bernardo O'Higgins,Cachapoal,Pichidegua,1244.462402,2.433129,Chile,Non-farm,59.721219,POINT (-71.39653 -34.32836),,,0.819566


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 5 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
88,1,"[0.81724143, 0.18275861]",0,Chile,Región de Valparaíso,Valparaíso,Casablanca,1966.5788,1.929675,Chile,Non-farm,63.316017,POINT (-71.46812 -33.28954),,,0.817241


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 6 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
448,1,"[0.8075675, 0.19243255]",0,Chile,Región Metropolitana de Santiago,Maipo,Buin,1759.377319,1.405564,Chile,Non-farm,53.151939,POINT (-70.75809 -33.7122),,,0.807567


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 7 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
90,1,"[0.8050341, 0.19496593]",0,Chile,Región de Valparaíso,San Antonio,Cartagena,922.72113,4.330985,Chile,Non-farm,63.414851,POINT (-71.60512 -33.55859),,,0.805034


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 8 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
381,1,"[0.7944234, 0.20557663]",0,Chile,Región Metropolitana de Santiago,Santiago,Pudahuel,1193.650879,2.736676,Chile,Non-farm,58.70455,POINT (-70.81702 -33.44872),,,0.794423


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 9 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
409,1,"[0.76963997, 0.23036005]",0,Chile,Región Metropolitana de Santiago,Maipo,San Bernardo,868.248413,5.633691,Chile,Non-farm,71.981826,POINT (-70.72459 -33.60898),,,0.76964


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 10 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
400,1,"[0.7678392, 0.23216084]",0,Chile,Región Metropolitana de Santiago,Talagante,Isla de Maipo,3540.4601,4.817206,Chile,Non-farm,132.024107,POINT (-70.90506 -33.72091),,,0.767839


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 11 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
280,1,"[0.7640001, 0.23599987]",0,Chile,Región del Maule,Talca,Maule,1487.957397,1.733594,Chile,Non-farm,51.624762,POINT (-71.69064 -35.47811),,,0.764


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 12 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
477,1,"[0.74410176, 0.25589827]",0,Chile,Región Metropolitana de Santiago,Maipo,Calera de Tango,1685.864868,2.53289,Chile,Non-farm,71.39571,POINT (-70.74597 -33.66163),,,0.744102


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 13 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
415,1,"[0.7438705, 0.25612953]",0,Chile,Región Metropolitana de Santiago,Maipo,San Bernardo,1230.481201,1.044236,Chile,Non-farm,38.011291,POINT (-70.72593 -33.57323),,,0.74387


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 14 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
403,1,"[0.7400118, 0.25998816]",0,Chile,Región Metropolitana de Santiago,Maipo,San Bernardo,2827.8146,3.641175,Chile,Non-farm,114.775327,POINT (-70.64464 -33.6406),,,0.740012


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 15 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
218,1,"[0.7307242, 0.2692758]",0,Chile,Región del Libertador Bernardo O'Higgins,Cachapoal,Rancagua,1064.588257,1.370884,Chile,Non-farm,41.707844,POINT (-70.76537 -34.14905),,,0.730724


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 16 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
93,1,"[0.7166259, 0.28337413]",0,Chile,Región de Valparaíso,Quillota,Hijuelas,1215.776123,1.907157,Chile,Non-farm,50.351988,POINT (-71.07852 -32.83059),,,0.716626


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 17 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
319,1,"[0.70420784, 0.29579216]",0,Chile,Región del Maule,Talca,Talca,1539.762451,8.092824,Chile,Non-farm,112.193256,POINT (-71.59308 -35.46068),,,0.704208


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 18 of 18


Unnamed: 0,Label,Model Probabilities,Model Class,ADM0,ADM1,ADM2,ADM3,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Number of animals,Parent coords,CAFO prob
444,1,"[0.7031826, 0.29681745]",0,Chile,Región Metropolitana de Santiago,Maipo,Buin,1871.7824,4.781655,Chile,Non-farm,101.255134,POINT (-70.71722 -33.73987),,,0.703183


Enter reject to reject, exit to exit, or any key to continue  


In [29]:
print(f"Rejected {len(chl_mislabeled)} / {len(check_chl)} images")

df = chl.copy()
df.loc[df.index.isin(chl_mislabeled), ['Farm type']] = 'Unknown CAFO'

df.to_pickle(f'{data_path}chl_final.pkl')

Rejected 2 / 18 images


## Iowa

In [30]:
iowa, check_iowa = find_dodgy_images("iowa")

Class mapping: {0: 'CAFO'}


  self._warn_if_super_not_called()


[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m192s[0m 2s/step
There are 0 images to check


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  to_check.sort_values(by="CAFO prob", ascending=False, inplace=True)


In [31]:
# There is nothing to check here, so we'll just write out the data df with a
# consistent name and format

iowa.to_pickle(f'{data_path}iowa_final.pkl')

## Romania


In [32]:
rou, check_rou = find_dodgy_images("rou")

Class mapping: {1: 'Non-farm', 0: 'CAFO'}


  self._warn_if_super_not_called()


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 2s/step
There are 19 images to check


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  to_check.sort_values(by="CAFO prob", ascending=False, inplace=True)


In [33]:
rou_mislabeled = explore.loop_over_buildings(check_rou)

Working on feature 1 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
234,1,"[0.9977755, 0.002224435]",0,GALATI,1813.208955,1.477078,Romania,Non-farm,55.760327,POINT (28.12127 45.44932),,0.997775


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 2 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
689,1,"[0.9952657, 0.00473425]",0,IZVORU BARZII,2181.708451,6.311394,Romania,Non-farm,118.130982,POINT (22.69409 44.66024),,0.995266


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 3 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
86,1,"[0.94411427, 0.055885755]",0,LUNCA CORBULUI,1700.596681,5.58252,Romania,Non-farm,97.536725,POINT (24.79112 44.65521),,0.944114


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 4 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
521,1,"[0.90515196, 0.094848014]",0,GHIDIGENI,2326.531969,3.321707,Romania,Non-farm,151.777874,POINT (27.4822 46.06709),,0.905152


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 5 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
237,1,"[0.9033034, 0.09669667]",0,LIEBLING,2503.403597,4.310781,Romania,Non-farm,114.628562,POINT (21.32021 45.56592),,0.903303


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 6 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
353,1,"[0.85641974, 0.14358027]",0,SATCHINEZ,2634.793798,2.98894,Romania,Non-farm,93.476312,POINT (21.07201 45.90818),,0.85642


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 7 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
561,1,"[0.85010654, 0.14989345]",0,COJOCNA,2617.408812,1.64174,Romania,Non-farm,78.75434,POINT (23.89721 46.73175),,0.850107


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 8 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
296,1,"[0.8374131, 0.16258688]",0,AVRIG,1669.541623,6.079828,Romania,Non-farm,107.118133,POINT (24.37799 45.76283),,0.837413


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 9 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
281,1,"[0.8352076, 0.16479237]",0,BOD,3227.379087,6.407257,Romania,Non-farm,146.81952,POINT (25.59889 45.75942),,0.835208


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 10 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
560,1,"[0.8333063, 0.16669367]",0,APAHIDA,1887.208935,3.806301,Romania,Non-farm,85.545041,POINT (23.76354 46.79751),,0.833306


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 11 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
97,1,"[0.8242057, 0.1757943]",0,POBORU,1344.596927,2.858627,Romania,Non-farm,63.680405,POINT (24.47968 44.64605),,0.824206


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 12 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
756,1,"[0.7959703, 0.20402968]",0,HARAU,3003.436479,1.914072,Romania,Non-farm,108.85575,POINT (22.98838 45.88922),,0.79597


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 13 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
677,1,"[0.78080636, 0.21919365]",0,AFUMATI,3089.760344,2.730454,Romania,Non-farm,111.940434,POINT (26.21899 44.49414),,0.780806


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 14 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
406,1,"[0.7552206, 0.24477941]",0,GORNESTI,1752.317099,3.596883,Romania,Non-farm,88.990074,POINT (24.6641 46.68872),,0.755221


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 15 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
11,1,"[0.739491, 0.26050904]",0,MAVRODIN,924.489549,3.604423,Romania,Non-farm,74.279131,POINT (25.22932 44.03923),,0.739491


Enter reject to reject, exit to exit, or any key to continue  reject
Working on feature 16 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
87,1,"[0.73875326, 0.2612468]",0,LUNCA CORBULUI,1060.716083,11.061496,Romania,Non-farm,109.292988,POINT (24.75393 44.68845),,0.738753


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 17 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
723,1,"[0.7164633, 0.28353667]",0,SOMOVA,1628.133705,1.980533,Romania,Non-farm,66.366101,POINT (28.72511 45.16671),,0.716463


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 18 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
375,1,"[0.71094984, 0.28905022]",0,PECICA,1751.062537,3.683799,Romania,Non-farm,87.050315,POINT (21.01784 46.18279),,0.71095


Enter reject to reject, exit to exit, or any key to continue  
Working on feature 19 of 19


Unnamed: 0,Label,Model Probabilities,Model Class,ADM2,Area (sq m),Aspect ratio,Dataset name,Farm type,Length (m),geometry,Parent coords,CAFO prob
62,1,"[0.710131, 0.28986895]",0,SUDITI,1247.666173,3.022514,Romania,Non-farm,63.020964,POINT (27.56 44.57077),,0.710131


Enter reject to reject, exit to exit, or any key to continue  


In [34]:
# Relabel the probably-CAFOs and write file

print(f"Rejected {len(rou_mislabeled)} / {len(check_rou)} images")

df = rou.copy()
df.loc[df.index.isin(rou_mislabeled), ['Farm type']] = 'Unknown CAFO'

df.to_pickle(f'{data_path}rou_final.pkl')

Rejected 7 / 19 images
