# Kenya, Rift Valley, Uasin Gishu Field Point Preparation

**Analyzer:** Ivan Zvonkov

**Last modified:** May 13, 2024

**Description**: Generate field offset points for region and creates geojsons for analysis.

## 1. Load data for region

In [1]:
import numpy as np
import geopandas as gpd
import math
import pandas as pd
import simplekml
import utm

from pathlib import Path
from tqdm import tqdm
from datetime import datetime, timedelta


In [2]:
df_all = pd.read_csv("../../data/street2sat-v2.csv")
# 5 seconds
input_img_str_split = df_all["input_img"].str.split("/")
df_all["country_folder"] = input_img_str_split.str[3]
df_all["country_subfolder"] = input_img_str_split.str[4]

  df_all = pd.read_csv("../../data/street2sat-v2.csv")


In [3]:
df_country = df_all[df_all["country_folder"] == "KENYA_v2"]

In [15]:
df = pd.read_csv("../../data/Kenya_crops_w_admin_zones_CLEAN.csv")

  df = pd.read_csv("../../data/Kenya_crops_w_admin_zones_CLEAN.csv")


In [16]:
df[(df["ADM1_CODE"] == 51331)][["ADM2_CODE", "ADM2 Label"]].value_counts()

ADM2_CODE  ADM2 Label       
51386.0    51386 Nakuru         30600
51388.0    51388 Narok          10468
51391.0    51391 Trans Nzoia     9547
51382.0    51382 Kericho         2844
68770.0    68770 Nandi South     1801
68769.0    68769 Nandi North     1175
51393.0    51393 Uasin Gishu      818
51378.0    51378 Bomet            339
51379.0    51379 Buret             96
51392.0    51392 Turkana           14
Name: count, dtype: int64


## Admin Zone selection 

Nandi South, Nandi North complete,

Skipping Kericho because it is along UTM bound, resolve issue first 

Uasin Gishu

In [17]:
df = df[(df["ADM1_CODE"] == 51331) & (df["ADM2_CODE"] == 51393)]

In [18]:
print(f"Total points: {len(df)}")

Total points: 818


In [20]:
PREFIX = "../Kenya_ADM1_51331_Rift_Valley_ADM2_51393_Uasin_Gishu"

## 2. Compute Field Points using Offset Method

In [21]:
is_right_hand_drive = False

In [22]:
# Copied and pasted from field_coord_distance_offset.ipynb

floor10 = lambda x: x//10 * 10
to_pixel_centroid = lambda coord: (floor10(coord[0]) + 5, floor10(coord[1]) + 5)

def generate_offset_point_wgs84(coord0, coord1, is_right_hand_drive=True, meters=20):
    utm_coord0 = utm.from_latlon(coord0[0], coord0[1])
    utm_coord1 = utm.from_latlon(coord1[0], coord1[1])

    for i, zone_type in [(2, "number"), (3, "letter")]:
        if utm_coord1[i] != utm_coord0[i]:
            print(utm_coord0)
            print(utm_coord1)
            raise ValueError(f"UTM Zone {zone_type} mismatch: {utm_coord0[i]} and {utm_coord1[i]}")
    
    
    delta_east = utm_coord1[0] - utm_coord0[0]
    delta_north = utm_coord1[1] - utm_coord0[1]

    # Offset for meters change in offset point distance 
    x_offset = np.abs(meters * math.cos(math.atan(delta_east / delta_north)))

    # Direction of offset
    x_direction = np.sign(delta_north) if is_right_hand_drive else -np.sign(delta_north)
    x_offset *= x_direction
        
    orthogonal_slope = -delta_east / delta_north
    orthogonal_b = utm_coord1[1] - (orthogonal_slope * utm_coord1[0])
    orthogonal_y = lambda x: orthogonal_slope*x + orthogonal_b

    field_point_x = utm_coord1[0] + x_offset
    field_point_y = orthogonal_y(field_point_x)

    field_latlon = utm.to_latlon(field_point_x, field_point_y, utm_coord1[2], utm_coord1[3])

    pixel_centroid_x, pixel_centroid_y  = to_pixel_centroid((field_point_x, field_point_y))
    pixel_centroid_field_latlon = utm.to_latlon(pixel_centroid_x, pixel_centroid_y, utm_coord1[2], utm_coord1[3])
        
    return field_latlon, pixel_centroid_field_latlon, (delta_east, delta_north)

def road_pixel_centroid(coord):
    utm_coord = utm.from_latlon(coord[0], coord[1])
    utm_pixel_centroid = to_pixel_centroid(utm_coord)
    return utm.to_latlon(*utm_pixel_centroid, utm_coord[2], utm_coord[3])

In [23]:
field_points = []

for i in tqdm(range(0, len(df))):
    if df.iloc[i]["input_img"].endswith(".THM"):
        continue

    # Get road coordinate
    current_record = df.iloc[i]
    road_coord = eval(current_record["coord"])
    road_10m_centroid = road_pixel_centroid(road_coord)

    # Get prior coordinate
    time1 = datetime.fromisoformat(current_record["time"])
    before_time_interval = time1 - timedelta(seconds=10)
    time_filter = (df_country["time"] < str(time1)) & (df_country["time"] > str(before_time_interval))
    prior_records = df_country[time_filter].sort_values(by=['time'])
    if len(prior_records) == 0:
        print(f"No prior records found for {i}")
        continue
    
    prior_record = prior_records.iloc[-1]
    prior_coord = eval(prior_record["coord"])

    # Get direction and field offset
    try:
        output = generate_offset_point_wgs84(prior_coord, road_coord, is_right_hand_drive) 
        offset_field_coord, offset_field_pixel_centroid, driving_direction = output 

        field_points.append({
            "road_pixel_centroid": road_10m_centroid,
            "is_right_hand_drive": is_right_hand_drive,
            "driving_easting": driving_direction[0],
            "driving_northing": driving_direction[1],
            "offset_field_coord": offset_field_coord,
            "offset_field_pixel_centroid": offset_field_pixel_centroid,
            "time_computed": datetime.now(),
            **df.iloc[i],   
        })
    except Exception as e:
        print(e)
    

  1%|▎                                          | 5/818 [00:00<00:34, 23.89it/s]

No prior records found for 0
No prior records found for 1


  4%|█▋                                        | 32/818 [00:01<00:29, 26.34it/s]

No prior records found for 26


 10%|████▎                                     | 83/818 [00:03<00:31, 23.07it/s]

No prior records found for 78


 15%|█████▉                                   | 119/818 [00:04<00:26, 26.76it/s]

No prior records found for 114


 17%|███████                                  | 140/818 [00:05<00:25, 26.77it/s]

No prior records found for 135
No prior records found for 136


 18%|███████▎                                 | 146/818 [00:05<00:27, 24.39it/s]

No prior records found for 142


 21%|████████▌                                | 170/818 [00:06<00:24, 26.69it/s]

No prior records found for 165
No prior records found for 166


 28%|███████████▋                             | 233/818 [00:08<00:21, 27.06it/s]

No prior records found for 229
No prior records found for 230
No prior records found for 233


 32%|█████████████▏                           | 263/818 [00:09<00:20, 27.27it/s]

No prior records found for 258
No prior records found for 259


 39%|███████████████▉                         | 317/818 [00:11<00:18, 26.83it/s]

No prior records found for 312
No prior records found for 317


 41%|████████████████▉                        | 338/818 [00:12<00:17, 26.92it/s]

No prior records found for 333
No prior records found for 334


 46%|███████████████████                      | 380/818 [00:14<00:16, 26.93it/s]

No prior records found for 374


 55%|██████████████████████▌                  | 449/818 [00:16<00:13, 27.07it/s]

No prior records found for 444
No prior records found for 445


 61%|█████████████████████████                | 500/818 [00:18<00:11, 27.21it/s]

No prior records found for 495


 79%|████████████████████████████████▌        | 650/818 [00:24<00:06, 27.17it/s]

No prior records found for 644
No prior records found for 645


 83%|█████████████████████████████████▉       | 677/818 [00:25<00:05, 27.17it/s]

No prior records found for 673


 87%|███████████████████████████████████▋     | 713/818 [00:26<00:03, 26.66it/s]

No prior records found for 707


 91%|█████████████████████████████████████▏   | 743/818 [00:27<00:02, 27.19it/s]

No prior records found for 737


 95%|██████████████████████████████████████▉  | 776/818 [00:29<00:01, 27.25it/s]

No prior records found for 770
No prior records found for 771


100%|█████████████████████████████████████████| 818/818 [00:30<00:00, 26.77it/s]


## Filter Background

In [24]:
field_df = pd.DataFrame(field_points)
print(f"Before background filter: {len(field_df)}")

# Only points with less than 95% background kept
field_df = field_df[field_df["background"] < 0.95 ].copy()
print(f"After background filter: {len(field_df)}")

Before background filter: 788
After background filter: 286


## Download images from Google Cloud

**Assumption**: Unique file names.

In [25]:
Path(PREFIX).mkdir(exist_ok=True)

In [26]:
file_list_str = "\n".join(field_df["input_img"].tolist())

In [27]:
Path(f"{PREFIX}/images").mkdir(exist_ok=True)

In [28]:
!echo "$file_list_str" | gsutil -m cp -n -I $PREFIX/images



Updates are available for some Google Cloud CLI components.  To install them,
please run:
  $ gcloud components update

If you experience problems with multiprocessing on MacOS, they might be related to https://bugs.python.org/issue33725. You can disable multiprocessing by editing your .boto config or by adding the following flag to your command: `-o "GSUtil:parallel_process_count=1"`. Note that multithreading is still available even if you disable multiprocessing.

Copying gs://street2sat-uploaded/KENYA_v2/2021_07_06_T2/GPAN9031.JPG...
Copying gs://street2sat-uploaded/KENYA_v2/2021_07_06_T2/GPAN9033.JPG...         
Copying gs://street2sat-uploaded/KENYA_v2/2021_07_06_T2/GPAN9035.JPG...         
Copying gs://street2sat-uploaded/KENYA_v2/2021_07_06_T2/GPAO9245.JPG...         
Copying gs://street2sat-uploaded/KENYA_v2/2021_07_06_T2/GPAO9246.JPG...         
Copying gs://street2sat-uploaded/KENYA_v2/2021_07_06_T2/GPAO9248.JPG...         
Copying gs://street2sat-uploaded/KENYA_v2/2021_07_

## Create KML

In [29]:
PREFIX

'../Kenya_ADM1_51331_Rift_Valley_ADM2_51393_Uasin_Gishu'

In [30]:
field_df.to_csv(PREFIX + ".csv", index=False)

In [33]:
range_start = 200
range_end = 286
kml_document_name = PREFIX + f"_95_background_{range_start}_{range_end}"

def create_description(record, image_path):
    # Example input_img: gs://street2sat-uploaded/KENYA_v2/2021_07_16_T2/100GOPRO/GPAJ9576.JPG
    endpoint = record["input_img"].replace("gs://", "")
    
    return f"""
<img src='files/{Path(image_path).name}' width='900px'/>
<br/>
<h2>{record['name']}</h2>
<p>Capture Time: {record['time']}</p>
<a href='https://storage.cloud.google.com/{endpoint}'> 
    https://storage.cloud.google.com/{endpoint}
</a>

<h2>Location</h2>
<p>ADM1: {record['ADM1 Label']}</p>
<p>ADM2: {record['ADM2 Label']}</p>
<p>Road Lat Lon: {record['coord']}</p>
<p>Field Lat Lon:  {record["offset_field_pixel_centroid"]}</p>


<h2>Driving Direction</h2>
<p>Northing: {record['driving_northing']}</p>
<p>Easting: {record['driving_easting']}</p>
<p>Is Right Hand Drive: {record['is_right_hand_drive']}</p>

<h2>Crop Model Prediction</h2>
<p>{record['results']}</p>

"""

kml = simplekml.Kml()
kml.document.name = kml_document_name

for _, record in tqdm(field_df[range_start:range_end].iterrows()):
    latlon = record["offset_field_pixel_centroid"]
    image_path = f"{PREFIX}/images/{Path(record['input_img']).name}"
    kml.newpoint(
        coords=[(latlon[1], latlon[0])],  # lon, lat optional height
        description=create_description(record, image_path),     
        name=record["dominant_crop"], 
        timestamp=record["time"]
    )  
    kml.addfile(image_path)

kml.savekmz(f"{kml_document_name}.kmz", format=False) 

86it [00:00, 1922.81it/s]


In [34]:
!open .

## [Optional] Create Geodataframes for Debugging 

In [13]:
field_df["key"] = field_df["name"]
field_df["name"] = field_df["dominant_crop"]

In [14]:
def generate_gdf(cols, coords_col):
    coords = field_df[coords_col]
    if type(coords.iloc[0]) == str:
        coords = coords.apply(eval)
          
    latlons = pd.DataFrame(coords.tolist(), columns=["lat", "lon"])
    points = gpd.points_from_xy(latlons["lon"], latlons["lat"], crs="EPSG:4326")
    gdf = gpd.GeoDataFrame(field_df[cols], geometry=points)
    return gdf

road_coord_cols = ["input_img", "name", "time", "driving_easting", "driving_northing", "is_right_hand_drive"] 
road_pixel_cols = ["input_img", "name", "time"] 
field_coord_cols = ["input_img", "name", "time", "driving_easting", "driving_northing", "is_right_hand_drive"] 
field_pixel_cols = ["input_img", "name", "time"]

gdf_road_coord = generate_gdf(cols=road_coord_cols, coords_col="coord")
gdf_road_pixel = generate_gdf(cols=road_pixel_cols, coords_col="road_pixel_centroid")
gdf_field_coord = generate_gdf(cols=field_coord_cols, coords_col="offset_field_coord")
gdf_field_pixel = generate_gdf(cols=field_pixel_cols, coords_col="offset_field_pixel_centroid")

In [15]:
gdf_road_coord.to_file(f"{PREFIX}/{PREFIX}_road_coord.geojson", driver="GeoJSON")
gdf_road_pixel.to_file(f"{PREFIX}/{PREFIX}_road_pixel.geojson", driver="GeoJSON")
gdf_field_coord.to_file(f"{PREFIX}/{PREFIX}_field_coord.geojson", driver="GeoJSON")
gdf_field_pixel.to_file(f"{PREFIX}/{PREFIX}_field_pixel.geojson", driver="GeoJSON")