# Prepare Google Earth Engine Dataset

Author: Ivan Zvonkov

Last modified: Feb 14, 2024

Description: Takes processed csv files from data/datasets and converts to simple csv to be uploaded as a GEE table asset.



In [2]:
import pandas as pd
from pathlib import Path

In [3]:
df_2022 = pd.read_csv("../../data/datasets/SudanGedarefDarfurAlJazirah2022.csv")
df_2023 = pd.read_csv("../../data/datasets/SudanGedarefDarfurAlJazirah2023.csv")

In [4]:
df_2022.columns

Index(['lon', 'lat', 'class_probability', 'num_labelers', 'plotid', 'sampleid',
       'email', 'start_date', 'end_date', 'subset', 'eo_status', 'eo_data',
       'eo_lat', 'eo_lon', 'eo_file'],
      dtype='object')

In [5]:
df_2023.columns

Index(['lon', 'lat', 'class_probability', 'num_labelers', 'plotid', 'sampleid',
       'email', 'start_date', 'end_date', 'subset', 'eo_status', 'eo_data',
       'eo_lat', 'eo_lon', 'eo_file'],
      dtype='object')

In [7]:
renames = {"class_probability": "label", "lat": "latitude", "lon": "longitude"}
df_2022 = df_2022.rename(columns=renames)
df_2023 = df_2023.rename(columns=renames)

In [8]:
clean_df_2022 = df_2022[(df_2022["label"] != 0.5) & (df_2022["num_labelers"] > 1)]
clean_df_2023 = df_2023[(df_2023["label"] != 0.5) & (df_2023["num_labelers"] > 1)]

In [9]:
clean_df_2022["label"].value_counts()

0.0    637
1.0    366
Name: label, dtype: int64

In [10]:
clean_df_2023["label"].value_counts()

0.0    591
1.0     70
Name: label, dtype: int64

In [13]:
keep_columns = ["latitude", "longitude", "subset", "label"]
clean_df_2022[keep_columns].to_csv("GedarefDarfurAlJazirah2022_gee.csv", index=False)
clean_df_2023[keep_columns].to_csv("GedarefDarfurAlJazirah2023_gee.csv", index=False)

Once the csv files are saved, they are manually uploaded to GEE. The API is not used as it requires additional upload to Google Cloud.