# Kenya, Rift Valley, Nandi South Field Point Preparation

**Analyzer:** Ivan Zvonkov

**Last modified:** Mar 16, 2024

**Description**: Generate field offset points for region and creates geojsons for analysis.

## 1. Load data for region

In [20]:
!pip install simplekml -q

You should consider upgrading via the '/Users/izvonkov/nasaharvest/street2sat/venv/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [1]:
PREFIX = "../Kenya_ADM1_51331_Rift_Valley_ADM2_68770_Nandi_South"

In [2]:
import numpy as np
import geopandas as gpd
import math
import pandas as pd
import simplekml
import utm

from pathlib import Path
from tqdm import tqdm
from datetime import datetime, timedelta


In [3]:
df_all = pd.read_csv("../../data/street2sat-v2.csv")
# 5 seconds
input_img_str_split = df_all["input_img"].str.split("/")
df_all["country_folder"] = input_img_str_split.str[3]
df_all["country_subfolder"] = input_img_str_split.str[4]

  df_all = pd.read_csv("../../data/street2sat-v2.csv")


In [4]:
df_country = df_all[df_all["country_folder"] == "KENYA_v2"]

In [5]:
df = pd.read_csv("../../data/Kenya_crops_w_admin_zones_CLEAN.csv")

  df = pd.read_csv("../../data/Kenya_crops_w_admin_zones_CLEAN.csv")


In [6]:
df = df[(df["ADM1_CODE"] == 51331) & (df["ADM2_CODE"] == 68770)]

In [7]:
print(f"Total points: {len(df)}")

Total points: 1801


## 2. Compute Field Points using Offset Method

In [8]:
is_right_hand_drive = False

In [9]:
# Copied and pasted from field_coord_distance_offset.ipynb

floor10 = lambda x: x//10 * 10
to_pixel_centroid = lambda coord: (floor10(coord[0]) + 5, floor10(coord[1]) + 5)

def generate_offset_point_wgs84(coord0, coord1, is_right_hand_drive=True, meters=20):
    utm_coord0 = utm.from_latlon(coord0[0], coord0[1])
    utm_coord1 = utm.from_latlon(coord1[0], coord1[1])

    for i, zone_type in [(2, "number"), (3, "letter")]:
        if utm_coord1[i] != utm_coord0[i]:
            print(utm_coord0)
            print(utm_coord1)
            raise ValueError(f"UTM Zone {zone_type} mismatch: {utm_coord0[i]} and {utm_coord1[i]}")
    
    
    delta_east = utm_coord1[0] - utm_coord0[0]
    delta_north = utm_coord1[1] - utm_coord0[1]

    # Offset for meters change in offset point distance 
    x_offset = np.abs(meters * math.cos(math.atan(delta_east / delta_north)))

    # Direction of offset
    x_direction = np.sign(delta_north) if is_right_hand_drive else -np.sign(delta_north)
    x_offset *= x_direction
        
    orthogonal_slope = -delta_east / delta_north
    orthogonal_b = utm_coord1[1] - (orthogonal_slope * utm_coord1[0])
    orthogonal_y = lambda x: orthogonal_slope*x + orthogonal_b

    field_point_x = utm_coord1[0] + x_offset
    field_point_y = orthogonal_y(field_point_x)

    field_latlon = utm.to_latlon(field_point_x, field_point_y, utm_coord1[2], utm_coord1[3])

    pixel_centroid_x, pixel_centroid_y  = to_pixel_centroid((field_point_x, field_point_y))
    pixel_centroid_field_latlon = utm.to_latlon(pixel_centroid_x, pixel_centroid_y, utm_coord1[2], utm_coord1[3])
        
    return field_latlon, pixel_centroid_field_latlon, (delta_east, delta_north)

def road_pixel_centroid(coord):
    utm_coord = utm.from_latlon(coord[0], coord[1])
    utm_pixel_centroid = to_pixel_centroid(utm_coord)
    return utm.to_latlon(*utm_pixel_centroid, utm_coord[2], utm_coord[3])

In [10]:
field_points = []

for i in tqdm(range(0, len(df))):
    if df.iloc[i]["input_img"].endswith(".THM"):
        continue

    # Get road coordinate
    current_record = df.iloc[i]
    road_coord = eval(current_record["coord"])
    road_10m_centroid = road_pixel_centroid(road_coord)

    # Get prior coordinate
    time1 = datetime.fromisoformat(current_record["time"])
    before_time_interval = time1 - timedelta(seconds=10)
    time_filter = (df_country["time"] < str(time1)) & (df_country["time"] > str(before_time_interval))
    prior_records = df_country[time_filter].sort_values(by=['time'])
    if len(prior_records) == 0:
        print(f"No prior records found for {i}")
        continue
    
    prior_record = prior_records.iloc[-1]
    prior_coord = eval(prior_record["coord"])

    # Get direction and field offset
    output = generate_offset_point_wgs84(prior_coord, road_coord, is_right_hand_drive) 
    offset_field_coord, offset_field_pixel_centroid, driving_direction = output 

    field_points.append({
        "road_pixel_centroid": road_10m_centroid,
        "is_right_hand_drive": is_right_hand_drive,
        "driving_easting": driving_direction[0],
        "driving_northing": driving_direction[1],
        "offset_field_coord": offset_field_coord,
        "offset_field_pixel_centroid": offset_field_pixel_centroid,
        "time_computed": datetime.now(),
        **df.iloc[i],   
    })
    

  2%|▊                                        | 33/1801 [00:01<01:04, 27.42it/s]

No prior records found for 28
No prior records found for 29


  3%|█▏                                       | 51/1801 [00:01<01:05, 26.69it/s]

No prior records found for 47
No prior records found for 48


  7%|██▋                                     | 123/1801 [00:05<01:01, 27.34it/s]

No prior records found for 119


  7%|██▉                                     | 135/1801 [00:06<01:02, 26.61it/s]

No prior records found for 131
No prior records found for 132


  8%|███▎                                    | 147/1801 [00:06<01:01, 26.72it/s]

No prior records found for 141


 18%|███████▏                                | 324/1801 [00:12<00:53, 27.49it/s]

No prior records found for 320
No prior records found for 321


 24%|█████████▋                              | 438/1801 [00:17<00:50, 27.21it/s]

No prior records found for 434
No prior records found for 435


 27%|██████████▋                             | 480/1801 [00:18<00:48, 27.00it/s]

No prior records found for 477


 31%|████████████▎                           | 552/1801 [00:21<00:46, 26.75it/s]

No prior records found for 547
No prior records found for 548


 34%|█████████████▌                          | 612/1801 [00:23<00:43, 27.40it/s]

No prior records found for 607


 40%|███████████████▉                        | 720/1801 [00:27<00:39, 27.57it/s]

No prior records found for 716


 47%|██████████████████▊                     | 846/1801 [00:32<00:35, 27.26it/s]

No prior records found for 842


 54%|█████████████████████▍                  | 966/1801 [00:36<00:32, 25.84it/s]

No prior records found for 961


 55%|█████████████████████▊                  | 984/1801 [00:37<00:31, 26.27it/s]

No prior records found for 978


 60%|███████████████████████▌               | 1089/1801 [00:41<00:26, 27.13it/s]

No prior records found for 1084


 64%|█████████████████████████▏             | 1161/1801 [00:44<00:23, 27.50it/s]

No prior records found for 1155


 66%|█████████████████████████▌             | 1182/1801 [00:44<00:22, 27.13it/s]

No prior records found for 1178


 68%|██████████████████████████▌            | 1224/1801 [00:46<00:21, 27.23it/s]

No prior records found for 1219
No prior records found for 1222


 71%|███████████████████████████▌           | 1275/1801 [00:48<00:19, 27.48it/s]

No prior records found for 1270
No prior records found for 1271


 71%|███████████████████████████▊           | 1287/1801 [00:48<00:18, 27.51it/s]

No prior records found for 1281


 73%|████████████████████████████▍          | 1311/1801 [00:49<00:17, 27.60it/s]

No prior records found for 1305


 74%|████████████████████████████▋          | 1326/1801 [00:50<00:17, 27.57it/s]

No prior records found for 1321


 78%|██████████████████████████████▌        | 1410/1801 [00:53<00:14, 27.25it/s]

No prior records found for 1407


 79%|██████████████████████████████▊        | 1422/1801 [00:53<00:14, 25.81it/s]

No prior records found for 1416


 82%|████████████████████████████████       | 1482/1801 [00:55<00:11, 27.09it/s]

No prior records found for 1478


 83%|████████████████████████████████▍      | 1500/1801 [00:56<00:11, 27.26it/s]

No prior records found for 1495


 84%|████████████████████████████████▉      | 1521/1801 [00:57<00:10, 27.57it/s]

No prior records found for 1517


 85%|█████████████████████████████████▏     | 1530/1801 [00:57<00:10, 26.85it/s]

No prior records found for 1524


 88%|██████████████████████████████████▍    | 1590/1801 [01:00<00:11, 18.97it/s]

No prior records found for 1586
No prior records found for 1587


 95%|████████████████████████████████████▉  | 1707/1801 [01:04<00:03, 26.81it/s]

No prior records found for 1703
No prior records found for 1704


 95%|█████████████████████████████████████▏ | 1716/1801 [01:04<00:03, 27.31it/s]

No prior records found for 1710


 98%|██████████████████████████████████████▏| 1764/1801 [01:06<00:01, 26.69it/s]

No prior records found for 1759
No prior records found for 1760


 99%|██████████████████████████████████████▊| 1791/1801 [01:07<00:00, 26.95it/s]

No prior records found for 1787


100%|███████████████████████████████████████| 1801/1801 [01:08<00:00, 26.41it/s]

No prior records found for 1795





In [11]:
field_df = pd.DataFrame(field_points)

## Clean already processed points

In [12]:
field_df = field_df[100:].copy()

## Filter Background

In [13]:
field_df = field_df[field_df["background"] < 0.95].copy()

# 95% eliminates 600 points

## Download images from Google Cloud

**Assumption**: Unique file names.

In [14]:
Path(PREFIX).mkdir(exist_ok=True)

In [15]:
file_list_str = "\n".join(field_df["input_img"].tolist())

In [16]:
Path(f"{PREFIX}/images").mkdir(exist_ok=True)

In [None]:
!echo "$file_list_str" | gsutil -m cp -n -I $PREFIX/images

## Create KML

In [18]:
PREFIX

'../Kenya_ADM1_51331_Rift_Valley_ADM2_68770_Nandi_South'

In [19]:
field_df.to_csv(PREFIX + ".csv", index=False)

In [17]:
len(field_df)

933

In [24]:
def create_description(record, image_path):
    # Example input_img: gs://street2sat-uploaded/KENYA_v2/2021_07_16_T2/100GOPRO/GPAJ9576.JPG
    endpoint = record["input_img"].replace("gs://", "")
    
    return f"""
<img src='files/{Path(image_path).name}' width='900px'/>
<br/>
<h2>{record['name']}</h2>
<p>Capture Time: {record['time']}</p>
<a href='https://storage.cloud.google.com/{endpoint}'> 
    https://storage.cloud.google.com/{endpoint}
</a>

<h2>Location</h2>
<p>ADM1: {record['ADM1 Label']}</p>
<p>ADM2: {record['ADM2 Label']}</p>
<p>Road Lat Lon: {record['coord']}</p>
<p>Field Lat Lon:  {record["offset_field_pixel_centroid"]}</p>


<h2>Driving Direction</h2>
<p>Northing: {record['driving_northing']}</p>
<p>Easting: {record['driving_easting']}</p>
<p>Is Right Hand Drive: {record['is_right_hand_drive']}</p>

<h2>Crop Model Prediction</h2>
<p>{record['results']}</p>

"""

kml = simplekml.Kml()
range_start = 900
range_end = 1000
kml_document_name = PREFIX + f"_batch2_95_background_{range_start}_{range_end}"
kml.document.name = kml_document_name

for _, record in tqdm(field_df[range_start:range_end].iterrows()):
    latlon = record["offset_field_pixel_centroid"]
    image_path = f"{PREFIX}/images/{Path(record['input_img']).name}"
    kml.newpoint(
        coords=[(latlon[1], latlon[0])],  # lon, lat optional height
        description=create_description(record, image_path),     
        name=record["dominant_crop"], 
        timestamp=record["time"]
    )  
    kml.addfile(image_path)
    

33it [00:00, 2308.37it/s]


In [25]:
# Will take time if many images
# Does not work with all images 1800
kml.savekmz(f"{kml_document_name}.kmz", format=False) 

In [26]:
!open .

## [Optional] Create Geodataframes for Debugging 

In [13]:
field_df["key"] = field_df["name"]
field_df["name"] = field_df["dominant_crop"]

In [14]:
def generate_gdf(cols, coords_col):
    coords = field_df[coords_col]
    if type(coords.iloc[0]) == str:
        coords = coords.apply(eval)
          
    latlons = pd.DataFrame(coords.tolist(), columns=["lat", "lon"])
    points = gpd.points_from_xy(latlons["lon"], latlons["lat"], crs="EPSG:4326")
    gdf = gpd.GeoDataFrame(field_df[cols], geometry=points)
    return gdf

road_coord_cols = ["input_img", "name", "time", "driving_easting", "driving_northing", "is_right_hand_drive"] 
road_pixel_cols = ["input_img", "name", "time"] 
field_coord_cols = ["input_img", "name", "time", "driving_easting", "driving_northing", "is_right_hand_drive"] 
field_pixel_cols = ["input_img", "name", "time"]

gdf_road_coord = generate_gdf(cols=road_coord_cols, coords_col="coord")
gdf_road_pixel = generate_gdf(cols=road_pixel_cols, coords_col="road_pixel_centroid")
gdf_field_coord = generate_gdf(cols=field_coord_cols, coords_col="offset_field_coord")
gdf_field_pixel = generate_gdf(cols=field_pixel_cols, coords_col="offset_field_pixel_centroid")

In [15]:
gdf_road_coord.to_file(f"{PREFIX}/{PREFIX}_road_coord.geojson", driver="GeoJSON")
gdf_road_pixel.to_file(f"{PREFIX}/{PREFIX}_road_pixel.geojson", driver="GeoJSON")
gdf_field_coord.to_file(f"{PREFIX}/{PREFIX}_field_coord.geojson", driver="GeoJSON")
gdf_field_pixel.to_file(f"{PREFIX}/{PREFIX}_field_pixel.geojson", driver="GeoJSON")