# Approved Crop KMZ to CSV

**Author**: Ivan Zvonkov

**Last Modified**: July 7, 2024

**Description**: Converts approved KMZ to csv (for upload to Google Earth Engine)


In [None]:
!pip install simplekml -q

In [None]:
from bs4 import BeautifulSoup
from google.colab import drive
from pathlib import Path

import json
import pandas as pd
import simplekml
import xml.etree.ElementTree as ET
import zipfile


drive.mount('/content/drive')

## 1. Convert each KMZ into a CSV file

In [None]:
!ls "/content/drive/MyDrive/SatLabel Squad/Diana Reviewed Points"

In [None]:
# Specify file
src_folder = "/content/drive/MyDrive/SatLabel Squad/Diana Reviewed Points"
src_file = "Kenya_ADM1_51331_Rift_Valley_ADM2_68770_Nandi_South_batch2_95_background_300_400.kmz"

dest_folder = "/content/drive/MyDrive/SatLabel Squad/Helmets Approved Points (CSV)"
dest_file = src_file.replace(".kmz", ".csv")

kmz_file_path = f"{src_folder}/{src_file}"

# Unzip KMZ file
with zipfile.ZipFile(kmz_file_path, 'r') as kmz:
    kml_filename = [name for name in kmz.namelist() if name.endswith('.kml')][0]
    kml_data = kmz.read(kml_filename)

# Convert KMZ file to KMZ points list
namespace = {'kml': 'http://www.opengis.net/kml/2.2'}
kmz_points = []

root = ET.fromstring(kml_data)
for placemark in root.findall('.//kml:Placemark', namespace):
    kmz_element = {}
    for child in placemark.iter():
        key = child.tag.replace('{http://www.opengis.net/kml/2.2}', '')
        kmz_element[key] = child.text

    kmz_points.append(kmz_element)

In [None]:
# Convert KMZ points list to crop/non-crop point list

points = []

for kmz_point in kmz_points:

    row = {}

    soup = BeautifulSoup(kmz_point["description"], 'html.parser')
    row["capture_info"] = soup.find_all('h2')[0].text
    row["capture_time"] = soup.find_all('p')[0].text.split(": ")[1]
    row["image_url"] = soup.find('a')['href']

    # Driving direction details
    direction_data = soup.find_all('h2')[2].next_siblings
    direction_data = [item for item in direction_data if item.name == 'p']

    row["driving_northing"] = float(direction_data[0].text.split(": ")[1])
    row["driving_easting"] = float(direction_data[1].text.split(": ")[1])
    row["is_right_hand_drive"] = direction_data[2].text.split(": ")[1] == 'True'

    # Location details
    location_data = soup.find_all('h2')[1].next_siblings
    location_data = [item for item in location_data if item.name == 'p']
    row["adm1"] = location_data[0].text.split(": ")[1]
    row["adm2"] = location_data[1].text.split(": ")[1]

    road_lat_lon = json.loads(location_data[2].text.split(": ")[1].replace('(', '[').replace(')', ']'))
    field_lat_lon = json.loads(location_data[3].text.split(": ")[1].replace('(', '[').replace(')', ']'))

    crop_point = {
        "latitude": field_lat_lon[0],
        "longitude": field_lat_lon[1],
        "is_crop": 1,
        "crop_type": kmz_point["name"],
        **row,
    }

    non_crop_point = {
        "latitude": road_lat_lon[0],
        "longitude": road_lat_lon[1],
        "is_crop": 0,
        "crop_type": "",
        **row,
    }

    points.append(crop_point)
    points.append(non_crop_point)

df = pd.DataFrame(points)
df["gcloud_folder"] = df["image_url"].str.extract(r'street2sat-uploaded/([^/]+/[^/]+/[^/]+)')
#df["gcloud_folder"] = df["image_url"].str.extract(r'street2sat-uploaded/([^/]+/[^/]+)')
df["gcloud_folder"].value_counts()

In [None]:
df.to_csv(f"{dest_folder}/{dest_file}", index=False)
dest_file

## 2. Merge CSV files into single file

In [None]:
csvs = list(Path(dest_folder).glob("*.csv"))
len(csvs)

In [None]:
df = pd.concat([pd.read_csv(csv) for csv in csvs], ignore_index=True)

In [None]:
df["gcloud_folder"].value_counts()

In [None]:
df.to_csv(f"/content/drive/MyDrive/SatLabel Squad/Kenya_2021_batch202404.csv", index=False)