# Kenya, Rift Valley, Nandi North Field Point Preparation

**Analyzer:** Ivan Zvonkov

**Last modified:** May 6, 2024

**Description**: Generate field offset points for region and creates geojsons for analysis.

## 1. Load data for region

In [1]:
import numpy as np
import geopandas as gpd
import math
import pandas as pd
import simplekml
import utm

from pathlib import Path
from tqdm import tqdm
from datetime import datetime, timedelta


In [2]:
df_all = pd.read_csv("../../data/street2sat-v2.csv")
# 5 seconds
input_img_str_split = df_all["input_img"].str.split("/")
df_all["country_folder"] = input_img_str_split.str[3]
df_all["country_subfolder"] = input_img_str_split.str[4]

  df_all = pd.read_csv("../../data/street2sat-v2.csv")


In [3]:
df_country = df_all[df_all["country_folder"] == "KENYA_v2"]

In [4]:
df = pd.read_csv("../../data/Kenya_crops_w_admin_zones_CLEAN.csv")

  df = pd.read_csv("../../data/Kenya_crops_w_admin_zones_CLEAN.csv")


In [7]:
df[(df["ADM1_CODE"] == 51331)][["ADM2_CODE", "ADM2 Label"]].value_counts()

ADM2_CODE  ADM2 Label       
51386.0    51386 Nakuru         30600
51388.0    51388 Narok          10468
51391.0    51391 Trans Nzoia     9547
51382.0    51382 Kericho         2844
68770.0    68770 Nandi South     1801
68769.0    68769 Nandi North     1175
51393.0    51393 Uasin Gishu      818
51378.0    51378 Bomet            339
51379.0    51379 Buret             96
51392.0    51392 Turkana           14
Name: count, dtype: int64


## Admin Zone selection 

Nandi South complete,

Moving on to Nandi North

In [8]:
df = df[(df["ADM1_CODE"] == 51331) & (df["ADM2_CODE"] == 68769)]

In [9]:
print(f"Total points: {len(df)}")

Total points: 1175


In [10]:
PREFIX = "../Kenya_ADM1_51331_Rift_Valley_ADM2_68769_Nandi_North"

## 2. Compute Field Points using Offset Method

In [11]:
is_right_hand_drive = False

In [12]:
# Copied and pasted from field_coord_distance_offset.ipynb

floor10 = lambda x: x//10 * 10
to_pixel_centroid = lambda coord: (floor10(coord[0]) + 5, floor10(coord[1]) + 5)

def generate_offset_point_wgs84(coord0, coord1, is_right_hand_drive=True, meters=20):
    utm_coord0 = utm.from_latlon(coord0[0], coord0[1])
    utm_coord1 = utm.from_latlon(coord1[0], coord1[1])

    for i, zone_type in [(2, "number"), (3, "letter")]:
        if utm_coord1[i] != utm_coord0[i]:
            print(utm_coord0)
            print(utm_coord1)
            raise ValueError(f"UTM Zone {zone_type} mismatch: {utm_coord0[i]} and {utm_coord1[i]}")
    
    
    delta_east = utm_coord1[0] - utm_coord0[0]
    delta_north = utm_coord1[1] - utm_coord0[1]

    # Offset for meters change in offset point distance 
    x_offset = np.abs(meters * math.cos(math.atan(delta_east / delta_north)))

    # Direction of offset
    x_direction = np.sign(delta_north) if is_right_hand_drive else -np.sign(delta_north)
    x_offset *= x_direction
        
    orthogonal_slope = -delta_east / delta_north
    orthogonal_b = utm_coord1[1] - (orthogonal_slope * utm_coord1[0])
    orthogonal_y = lambda x: orthogonal_slope*x + orthogonal_b

    field_point_x = utm_coord1[0] + x_offset
    field_point_y = orthogonal_y(field_point_x)

    field_latlon = utm.to_latlon(field_point_x, field_point_y, utm_coord1[2], utm_coord1[3])

    pixel_centroid_x, pixel_centroid_y  = to_pixel_centroid((field_point_x, field_point_y))
    pixel_centroid_field_latlon = utm.to_latlon(pixel_centroid_x, pixel_centroid_y, utm_coord1[2], utm_coord1[3])
        
    return field_latlon, pixel_centroid_field_latlon, (delta_east, delta_north)

def road_pixel_centroid(coord):
    utm_coord = utm.from_latlon(coord[0], coord[1])
    utm_pixel_centroid = to_pixel_centroid(utm_coord)
    return utm.to_latlon(*utm_pixel_centroid, utm_coord[2], utm_coord[3])

In [13]:
field_points = []

for i in tqdm(range(0, len(df))):
    if df.iloc[i]["input_img"].endswith(".THM"):
        continue

    # Get road coordinate
    current_record = df.iloc[i]
    road_coord = eval(current_record["coord"])
    road_10m_centroid = road_pixel_centroid(road_coord)

    # Get prior coordinate
    time1 = datetime.fromisoformat(current_record["time"])
    before_time_interval = time1 - timedelta(seconds=10)
    time_filter = (df_country["time"] < str(time1)) & (df_country["time"] > str(before_time_interval))
    prior_records = df_country[time_filter].sort_values(by=['time'])
    if len(prior_records) == 0:
        print(f"No prior records found for {i}")
        continue
    
    prior_record = prior_records.iloc[-1]
    prior_coord = eval(prior_record["coord"])

    # Get direction and field offset
    output = generate_offset_point_wgs84(prior_coord, road_coord, is_right_hand_drive) 
    offset_field_coord, offset_field_pixel_centroid, driving_direction = output 

    field_points.append({
        "road_pixel_centroid": road_10m_centroid,
        "is_right_hand_drive": is_right_hand_drive,
        "driving_easting": driving_direction[0],
        "driving_northing": driving_direction[1],
        "offset_field_coord": offset_field_coord,
        "offset_field_pixel_centroid": offset_field_pixel_centroid,
        "time_computed": datetime.now(),
        **df.iloc[i],   
    })
    

  6%|██▎                                      | 66/1175 [00:02<00:40, 27.31it/s]

No prior records found for 60
No prior records found for 61


 16%|██████▏                                 | 183/1175 [00:06<00:37, 26.57it/s]

No prior records found for 179


 18%|███████▎                                | 213/1175 [00:08<00:39, 24.47it/s]

No prior records found for 208


 22%|████████▋                               | 255/1175 [00:09<00:35, 26.23it/s]

No prior records found for 250


 23%|█████████                               | 267/1175 [00:10<00:33, 27.14it/s]

No prior records found for 261


 26%|██████████▌                             | 309/1175 [00:11<00:31, 27.42it/s]

No prior records found for 304


 30%|███████████▊                            | 348/1175 [00:13<00:30, 27.24it/s]

No prior records found for 342


 32%|████████████▊                           | 375/1175 [00:14<00:29, 27.00it/s]

No prior records found for 370


 41%|████████████████▎                       | 480/1175 [00:17<00:25, 26.88it/s]

No prior records found for 476
No prior records found for 477


 42%|████████████████▊                       | 495/1175 [00:18<00:24, 27.21it/s]

No prior records found for 491


 43%|█████████████████▎                      | 507/1175 [00:18<00:24, 27.03it/s]

No prior records found for 503
No prior records found for 504


 45%|██████████████████▏                     | 534/1175 [00:19<00:23, 27.08it/s]

No prior records found for 532
No prior records found for 533


 47%|██████████████████▉                     | 558/1175 [00:20<00:23, 26.68it/s]

No prior records found for 554


 50%|███████████████████▊                    | 582/1175 [00:21<00:21, 27.21it/s]

No prior records found for 576
No prior records found for 581


 52%|████████████████████▋                   | 606/1175 [00:22<00:21, 26.89it/s]

No prior records found for 601


 56%|██████████████████████▌                 | 663/1175 [00:24<00:19, 26.60it/s]

No prior records found for 658


 61%|████████████████████████▌               | 720/1175 [00:27<00:16, 27.45it/s]

No prior records found for 716


 64%|█████████████████████████▋              | 753/1175 [00:28<00:15, 27.16it/s]

No prior records found for 748


 65%|██████████████████████████▏             | 768/1175 [00:28<00:14, 27.40it/s]

No prior records found for 764
No prior records found for 765


 67%|██████████████████████████▊             | 789/1175 [00:29<00:14, 27.32it/s]

No prior records found for 783


 74%|█████████████████████████████▋          | 873/1175 [00:32<00:10, 27.62it/s]

No prior records found for 868
No prior records found for 869


 75%|██████████████████████████████          | 882/1175 [00:33<00:10, 27.54it/s]

No prior records found for 877


 83%|█████████████████████████████████       | 972/1175 [00:36<00:07, 27.55it/s]

No prior records found for 966


 85%|█████████████████████████████████▊      | 993/1175 [00:37<00:06, 27.37it/s]

No prior records found for 989


 90%|███████████████████████████████████    | 1056/1175 [00:39<00:04, 27.57it/s]

No prior records found for 1052
No prior records found for 1053


100%|██████████████████████████████████████▉| 1173/1175 [00:43<00:00, 27.36it/s]

No prior records found for 1167
No prior records found for 1171
No prior records found for 1172


100%|███████████████████████████████████████| 1175/1175 [00:43<00:00, 26.81it/s]


In [15]:
field_df = pd.DataFrame(field_points)
len(field_df)

1139

## Filter Background

In [22]:
field_df = pd.DataFrame(field_points)
print(f"Before background filter: {len(field_df)}")

# Only points with less than 95% background kept, upped to 97
field_df = field_df[field_df["background"] < 0.97 ].copy()
print(f"After background filter: {len(field_df)}")

Before background filter: 1139
After background filter: 326


## Download images from Google Cloud

**Assumption**: Unique file names.

In [23]:
Path(PREFIX).mkdir(exist_ok=True)

In [24]:
file_list_str = "\n".join(field_df["input_img"].tolist())

In [25]:
Path(f"{PREFIX}/images").mkdir(exist_ok=True)

In [28]:
!echo "$file_list_str" | gsutil -m cp -n -I $PREFIX/images



Updates are available for some Google Cloud CLI components.  To install them,
please run:
  $ gcloud components update

If you experience problems with multiprocessing on MacOS, they might be related to https://bugs.python.org/issue33725. You can disable multiprocessing by editing your .boto config or by adding the following flag to your command: `-o "GSUtil:parallel_process_count=1"`. Note that multithreading is still available even if you disable multiprocessing.

Copying gs://street2sat-uploaded/KENYA_v2/2021_07_05_T2/GPAL4200.JPG...
Copying gs://street2sat-uploaded/KENYA_v2/2021_07_05_T2/GPAL4201.JPG...         
Copying gs://street2sat-uploaded/KENYA_v2/2021_07_05_T2/GPAL4202.JPG...         
Copying gs://street2sat-uploaded/KENYA_v2/2021_07_05_T2/GPAL4203.JPG...         
Copying gs://street2sat-uploaded/KENYA_v2/2021_07_05_T2/GPAL4204.JPG...         
Copying gs://street2sat-uploaded/KENYA_v2/2021_07_05_T2/GPAL4205.JPG...         
Copying gs://street2sat-uploaded/KENYA_v2/2021_07_

## Create KML

In [29]:
PREFIX

'../Kenya_ADM1_51331_Rift_Valley_ADM2_68769_Nandi_North'

In [30]:
field_df.to_csv(PREFIX + ".csv", index=False)

In [35]:
range_start = 200
range_end = 327
kml_document_name = PREFIX + f"_97_background_{range_start}_{range_end}"

def create_description(record, image_path):
    # Example input_img: gs://street2sat-uploaded/KENYA_v2/2021_07_16_T2/100GOPRO/GPAJ9576.JPG
    endpoint = record["input_img"].replace("gs://", "")
    
    return f"""
<img src='files/{Path(image_path).name}' width='900px'/>
<br/>
<h2>{record['name']}</h2>
<p>Capture Time: {record['time']}</p>
<a href='https://storage.cloud.google.com/{endpoint}'> 
    https://storage.cloud.google.com/{endpoint}
</a>

<h2>Location</h2>
<p>ADM1: {record['ADM1 Label']}</p>
<p>ADM2: {record['ADM2 Label']}</p>
<p>Road Lat Lon: {record['coord']}</p>
<p>Field Lat Lon:  {record["offset_field_pixel_centroid"]}</p>


<h2>Driving Direction</h2>
<p>Northing: {record['driving_northing']}</p>
<p>Easting: {record['driving_easting']}</p>
<p>Is Right Hand Drive: {record['is_right_hand_drive']}</p>

<h2>Crop Model Prediction</h2>
<p>{record['results']}</p>

"""

kml = simplekml.Kml()
kml.document.name = kml_document_name

for _, record in tqdm(field_df[range_start:range_end].iterrows()):
    latlon = record["offset_field_pixel_centroid"]
    image_path = f"{PREFIX}/images/{Path(record['input_img']).name}"
    kml.newpoint(
        coords=[(latlon[1], latlon[0])],  # lon, lat optional height
        description=create_description(record, image_path),     
        name=record["dominant_crop"], 
        timestamp=record["time"]
    )  
    kml.addfile(image_path)

# Will take time if many images
# Does not work with all images 1800
kml.savekmz(f"{kml_document_name}.kmz", format=False) 

126it [00:00, 1298.84it/s]


In [26]:
!open .

## [Optional] Create Geodataframes for Debugging 

In [13]:
field_df["key"] = field_df["name"]
field_df["name"] = field_df["dominant_crop"]

In [14]:
def generate_gdf(cols, coords_col):
    coords = field_df[coords_col]
    if type(coords.iloc[0]) == str:
        coords = coords.apply(eval)
          
    latlons = pd.DataFrame(coords.tolist(), columns=["lat", "lon"])
    points = gpd.points_from_xy(latlons["lon"], latlons["lat"], crs="EPSG:4326")
    gdf = gpd.GeoDataFrame(field_df[cols], geometry=points)
    return gdf

road_coord_cols = ["input_img", "name", "time", "driving_easting", "driving_northing", "is_right_hand_drive"] 
road_pixel_cols = ["input_img", "name", "time"] 
field_coord_cols = ["input_img", "name", "time", "driving_easting", "driving_northing", "is_right_hand_drive"] 
field_pixel_cols = ["input_img", "name", "time"]

gdf_road_coord = generate_gdf(cols=road_coord_cols, coords_col="coord")
gdf_road_pixel = generate_gdf(cols=road_pixel_cols, coords_col="road_pixel_centroid")
gdf_field_coord = generate_gdf(cols=field_coord_cols, coords_col="offset_field_coord")
gdf_field_pixel = generate_gdf(cols=field_pixel_cols, coords_col="offset_field_pixel_centroid")

In [15]:
gdf_road_coord.to_file(f"{PREFIX}/{PREFIX}_road_coord.geojson", driver="GeoJSON")
gdf_road_pixel.to_file(f"{PREFIX}/{PREFIX}_road_pixel.geojson", driver="GeoJSON")
gdf_field_coord.to_file(f"{PREFIX}/{PREFIX}_field_coord.geojson", driver="GeoJSON")
gdf_field_pixel.to_file(f"{PREFIX}/{PREFIX}_field_pixel.geojson", driver="GeoJSON")