# Generates field coordinate using offset method

**Author**: Ivan Zvonkov

**Last modified**: Mar 11, 2024

**Description**: Computes field coordinates  by
1) Going through crop db entries
2) For each crop db entry using method developed in `field_coord_distance_offset.ipynb` to compute
    - driving direction,
    - passenger direction,
    - initial field coordinate,
    - road pixel centroid coordinate, and
    - field pixel centroid.
3) Merging all points representing crop field and take majority vote for crop in field.
4) Saving all computed information into an analyzable csv.

Later plans:
- Saving computed information to the firestore database.

## 1. Get crop db entries

In [None]:
!gcloud auth application-default login

In [12]:
import math
import numpy as np
import pandas as pd
import geopandas as gpd
import utm

from datetime import datetime, timedelta
from tqdm import tqdm

In [13]:
df_all = pd.read_csv("../data/street2sat-v2.csv")

  df_all = pd.read_csv("../data/street2sat-v2.csv")


In [14]:
# 5 seconds
input_img_str_split = df_all["input_img"].str.split("/")
df_all["country_folder"] = input_img_str_split.str[3]
df_all["country_subfolder"] = input_img_str_split.str[4]

In [15]:
df_crops = df_all[df_all["is_crop"] == True].copy()

In [16]:
df_crops["country_folder"].value_counts()

country_folder
NIGERIA_v2             502615
ZAMBIA_v2              254999
Uganda_v2              252731
KENYA_v2               143288
USA_v2                  54253
TANZANIA_v2             11130
MPONGWE_v2               1021
test_set                  695
test_set_divided_v3       695
test_set_divided_v2       200
Name: count, dtype: int64

In [17]:
country_df = df_crops[df_crops["country_folder"] == "KENYA_v2"]

### 1.1 Get Admin Zones for each point

This will make it possible to do analysis on a per admin zone basis

In [32]:
!pip install earthengine-api geemap -q

You should consider upgrading via the '/Users/izvonkov/nasaharvest/street2sat/venv/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [33]:
import ee
import geemap

In [None]:
!earthengine authenticate

In [7]:
ee.Initialize()

In [9]:
ee_country_filter = ee.Filter.eq('ADM0_NAME', "Kenya")
admin_level2_fc = ee.FeatureCollection("FAO/GAUL/2015/level2").filter(ee_country_filter)

In [29]:
country_df = country_df[~country_df["results"].isna()]
country_df = country_df[~country_df["coord"].isna()].copy().reset_index()

In [69]:
adm1_image = admin_level2_fc.reduceToImage(ee.List([f'ADM1_CODE']), ee.Reducer.mean()).clip(admin_level2_fc.geometry())
adm2_image = admin_level2_fc.reduceToImage(ee.List([f'ADM2_CODE']), ee.Reducer.mean()).clip(admin_level2_fc.geometry())

In [75]:
start = 0
adm1_list = []
adm2_list = []

## TOO long will run tomorrow

def ee_feature_from_row(coord_str):
    coord = eval(coord_str)
    return ee.Feature(ee.Geometry.Point(coord[1], coord[0]), {})

# Loop necessary so ee_to_gdf doesn't time out
for i in tqdm(range(0, len(country_df), 1000)):
    ee_fc = ee.FeatureCollection(country_df.iloc[i:i+1000]["coord"].apply(ee_feature_from_row).to_list())
    # Using small scale=10 to ensure most points don't fall between boundaries
    ee_points_adm1 = adm1_image.sampleRegions(collection=ee_fc, scale=10)  
    ee_points_adm2 = adm2_image.sampleRegions(collection=ee_fc, scale=10) 
    adm1_list += geemap.ee_to_gdf(ee_points_adm1)["mean"].to_list()
    adm2_list += geemap.ee_to_gdf(ee_points_adm2)["mean"].to_list()

100%|███████████████████████████████████████| 143/143 [1:11:00<00:00, 29.80s/it]


In [77]:
country_df["ADM1_CODE"] = adm1_list
country_df["ADM2_CODE"] = adm2_list

In [84]:
adm2_df = pd.read_csv("../data/gaul_adm2.csv")
adm1_df = pd.read_csv("../data/gaul_adm1.csv")

adm1_code_to_name = pd.Series(adm1_df["adm1_name"].values, index=adm1_df["gaul_adm1"]).to_dict()
adm2_code_to_name = pd.Series(adm2_df["adm2_name"].values, index=adm2_df["gaul_adm2"]).to_dict()

In [120]:
country_df = pd.read_csv("../data/Kenya_crops_w_admin_zones.csv")

  country_df = pd.read_csv("../data/Kenya_crops_w_admin_zones.csv")


In [121]:
# Points that fell between two admin zones 
no_matching_adm2 = ~country_df["ADM2_CODE"].isin(adm2_code_to_name.keys())
no_matching_adm2.sum()

54

In [122]:
# Points between admin zones removed for simplicity
country_df = country_df[country_df["ADM2_CODE"].isin(adm2_code_to_name.keys())].copy().reset_index()

In [123]:
country_df["ADM1 Label"] = country_df["ADM1_CODE"].apply(lambda x: str(int(x)) + " " + adm1_code_to_name[int(x)])
country_df["ADM2 Label"] = country_df["ADM2_CODE"].apply(lambda x: str(int(x)) + " " + adm2_code_to_name[int(x)])

In [125]:
predictions = pd.json_normalize(country_df["results"].apply(eval))
crops = list(predictions.columns[1:])
if not all([c in country_df.columns for c in crops]):
    country_df = pd.concat([country_df, predictions], axis=1)
if "dominant_crop" not in country_df.columns:
    country_df["dominant_crop"] = country_df[crops].apply(lambda x: max(dict(x), key=dict(x).get), axis=1)

In [126]:
country_df.to_csv("../data/Kenya_crops_w_admin_zones.csv", index=False)

## 2. Compute Field Coord

In [169]:
# Copied and pasted from field_coord_distance_offset.ipynb

floor10 = lambda x: x//10 * 10
to_pixel_centroid = lambda coord: (floor10(coord[0]) + 5, floor10(coord[1]) + 5)

def generate_offset_point_wgs84(coord0, coord1, is_right_hand_drive=True, meters=20):
    utm_coord0 = utm.from_latlon(coord0[0], coord0[1])
    utm_coord1 = utm.from_latlon(coord1[0], coord1[1])

    for i, zone_type in [(2, "number"), (3, "letter")]:
        if utm_coord1[i] != utm_coord0[i]:
            print(utm_coord0)
            print(utm_coord1)
            raise ValueError(f"UTM Zone {zone_type} mismatch: {utm_coord0[i]} and {utm_coord1[i]}")
    
    
    delta_east = utm_coord1[0] - utm_coord0[0]
    delta_north = utm_coord1[1] - utm_coord0[1]

    # Offset for meters change in offset point distance 
    x_offset = np.abs(meters * math.cos(math.atan(delta_east / delta_north)))

    # Direction of offset
    x_direction = np.sign(delta_north) if is_right_hand_drive else -np.sign(delta_north)
    x_offset *= x_direction
        
    orthogonal_slope = -delta_east / delta_north
    orthogonal_b = utm_coord1[1] - (orthogonal_slope * utm_coord1[0])
    orthogonal_y = lambda x: orthogonal_slope*x + orthogonal_b

    field_point_x = utm_coord1[0] + x_offset
    field_point_y = orthogonal_y(field_point_x)

    field_latlon = utm.to_latlon(field_point_x, field_point_y, utm_coord1[2], utm_coord1[3])

    pixel_centroid_x, pixel_centroid_y  = to_pixel_centroid((field_point_x, field_point_y))
    pixel_centroid_field_latlon = utm.to_latlon(pixel_centroid_x, pixel_centroid_y, utm_coord1[2], utm_coord1[3])
        
    return field_latlon, pixel_centroid_field_latlon, (delta_east, delta_north)

def road_pixel_centroid(coord):
    utm_coord = utm.from_latlon(coord[0], coord[1])
    utm_pixel_centroid = to_pixel_centroid(utm_coord)
    return utm.to_latlon(*utm_pixel_centroid, utm_coord[2], utm_coord[3])

In [170]:
field_points = []

In [171]:
is_right_hand_drive = False
country_filter = df_all["country_folder"] == "KENYA_v2"

for i in tqdm(range(0, 1000)):
    if country_df.iloc[i]["input_img"].endswith(".THM"):
        continue

    # Get road coordinate
    current_record = country_df.iloc[i]
    road_coord = eval(current_record["coord"])
    road_10m_centroid = road_pixel_centroid(road_coord)

    # Get prior coordinate
    time1 = datetime.fromisoformat(current_record["time"])
    before_time_interval = time1 - timedelta(seconds=10)
    time_filter = (df_all["time"] < str(time1)) & (df_all["time"] > str(before_time_interval))
    prior_records = df_all[time_filter & country_filter].sort_values(by=['time'])
    if len(prior_records) == 0:
        print(f"No prior records found for {i}")
        continue
    
    prior_record = prior_records.iloc[-1]
    prior_coord = eval(prior_record["coord"])

    # Get direction and field offset
    output = generate_offset_point_wgs84(prior_coord, road_coord, is_right_hand_drive) 
    offset_field_coord, offset_field_pixel_centroid, driving_direction = output 

    field_points.append({
        "road_pixel_centroid": road_10m_centroid,
        "is_right_hand_drive": is_right_hand_drive,
        "driving_easting": driving_direction[0],
        "driving_northing": driving_direction[1],
        "offset_field_coord": offset_field_coord,
        "offset_field_pixel_centroid": offset_field_pixel_centroid,
        "time_computed": datetime.now(),
        **country_df.iloc[i],   
    })
    

  2%|▋                                        | 17/1000 [00:03<03:33,  4.61it/s]

No prior records found for 16


  5%|██                                       | 50/1000 [00:10<03:17,  4.81it/s]

No prior records found for 49


  9%|███▊                                     | 93/1000 [00:19<03:11,  4.74it/s]

No prior records found for 92


 35%|██████████████                          | 353/1000 [01:13<02:13,  4.85it/s]

No prior records found for 352


 49%|███████████████████▌                    | 489/1000 [01:42<02:05,  4.09it/s]

No prior records found for 488


 56%|██████████████████████▍                 | 560/1000 [02:00<01:32,  4.77it/s]

No prior records found for 559


 56%|██████████████████████▍                 | 561/1000 [02:00<01:31,  4.81it/s]

No prior records found for 560


 57%|██████████████████████▉                 | 574/1000 [02:03<01:30,  4.68it/s]

No prior records found for 573


 57%|███████████████████████                 | 575/1000 [02:03<01:29,  4.74it/s]

No prior records found for 574


 84%|█████████████████████████████████▊      | 844/1000 [03:00<00:32,  4.79it/s]

No prior records found for 843


100%|███████████████████████████████████████| 1000/1000 [03:34<00:00,  4.67it/s]


In [172]:
df = pd.DataFrame(field_points)
df

## 3. Save points to files

In [175]:
prefix = "Kenya1000"

In [176]:
df.to_csv(f"../data/{prefix}_street2sat_v3_points_in_field.csv", index=False)

In [177]:
df["key"] = df["name"]
df["name"] = df["dominant_crop"]

In [178]:
def generate_gdf(cols, coords_col):
    coords = df[coords_col]
    if type(coords.iloc[0]) == str:
        coords = coords.apply(eval)
          
    latlons = pd.DataFrame(coords.tolist(), columns=["lat", "lon"])
    points = gpd.points_from_xy(latlons["lon"], latlons["lat"], crs="EPSG:4326")
    gdf = gpd.GeoDataFrame(df[cols], geometry=points)
    return gdf

road_coord_cols = ["input_img", "name", "time", "driving_easting", "driving_northing", "is_right_hand_drive"] 
road_pixel_cols = ["input_img", "name", "time"] 
field_coord_cols = ["input_img", "name", "time", "driving_easting", "driving_northing", "is_right_hand_drive"] + crops
field_pixel_cols = ["input_img", "name", "time"]

gdf_road_coord = generate_gdf(cols=road_coord_cols, coords_col="coord")
gdf_road_pixel = generate_gdf(cols=road_pixel_cols, coords_col="road_pixel_centroid")
gdf_field_coord = generate_gdf(cols=field_coord_cols, coords_col="offset_field_coord")
gdf_field_pixel = generate_gdf(cols=field_pixel_cols, coords_col="offset_field_pixel_centroid")

In [179]:
gdf_road_coord.to_file(f"../data/points/{prefix}_road_coord.geojson", driver="GeoJSON")
gdf_road_pixel.to_file(f"../data/points/{prefix}_road_pixel.geojson", driver="GeoJSON")
gdf_field_coord.to_file(f"../data/points/{prefix}_field_coord.geojson", driver="GeoJSON")
gdf_field_pixel.to_file(f"../data/points/{prefix}_field_pixel.geojson", driver="GeoJSON")