In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import BallTree

evi = pd.read_csv('EVI.csv')
root_wetness = pd.read_csv('root_wetness.csv')
danger_ratings = pd.read_csv('2023_danger_ratings.csv')

In [2]:
print("##### EVI #####")
print(evi.head(5))
print("\n##### Root Wetness #####")
print(root_wetness.head(5))
print("\n##### Danger Ratings #####")
print(danger_ratings.head(5))

##### EVI #####
          lon        lat  evi        date
0 -139.062500  60.004167 -554  2023-03-22
1 -139.054167  60.004167 -554  2023-03-22
2 -139.045833  60.004167 -299  2023-03-22
3 -139.037500  60.004167 -299  2023-03-22
4 -139.029167  60.004167 -221  2023-03-22

##### Root Wetness #####
          lon        lat  root_wetness        date
0 -139.043207  60.029287      0.858716  2023-03-22
1 -138.924922  60.029287      0.563794  2023-03-22
2 -138.806638  60.029287      0.565503  2023-03-22
3 -138.688354  60.029287      0.566968  2023-03-22
4 -138.570070  60.029287      0.577222  2023-03-22

##### Danger Ratings #####
   STATION_CODE   DATE_TIME  DANGER_RATING   LATITUDE  LONGITUDE  ELEVATION_M
0            11  2023-05-05            1.0  48.927567  -124.6469          671
1            11  2023-05-06            1.0  48.927567  -124.6469          671
2            11  2023-05-07            1.0  48.927567  -124.6469          671
3            11  2023-05-08            1.0  48.927567  -124.

In [3]:
#Filter Danger Ratings to evi satellite data date
target_datetime = root_wetness['date'].iloc[0]
filtered = danger_ratings[danger_ratings['DATE_TIME'] == target_datetime]
print(filtered.head())

       STATION_CODE   DATE_TIME  DANGER_RATING   LATITUDE   LONGITUDE  \
1917             72  2023-03-22            1.0  49.264481 -122.573202   
2179             75  2023-03-22            2.0  50.571080 -124.077719   
2435             82  2023-03-22            2.0  51.189111 -125.028056   
22360           280  2023-03-22            2.0  50.672017 -121.888183   
23601           298  2023-03-22            2.0  50.206667 -119.480000   

       ELEVATION_M  
1917           146  
2179            49  
2435           122  
22360          408  
23601          670  


In [4]:
filtered = filtered.rename(columns={'LATITUDE': 'lat', 'LONGITUDE': 'lon'})
def to_radians(df):
    return np.radians(df[['lat', 'lon']].values)

df1_rad = to_radians(filtered)
df2_rad = to_radians(root_wetness)

In [5]:
# Create K-D tree or Ball Tree and join with closest latitude/longitude
tree = BallTree(df2_rad, metric='haversine')

distances, indices = tree.query(df1_rad, k=1)

filtered['match_index'] = indices.flatten()
filtered['distance_km'] = distances.flatten() * 6371

df2_reset = root_wetness.reset_index(drop=True)
joined = filtered.join(df2_reset, on='match_index', rsuffix='_df2')

joined = joined.drop(columns='match_index')

In [6]:
# append results to an output csv
print(joined.head())
print(len(joined))

       STATION_CODE   DATE_TIME  DANGER_RATING        lat         lon  \
1917             72  2023-03-22            1.0  49.264481 -122.573202   
2179             75  2023-03-22            2.0  50.571080 -124.077719   
2435             82  2023-03-22            2.0  51.189111 -125.028056   
22360           280  2023-03-22            2.0  50.672017 -121.888183   
23601           298  2023-03-22            2.0  50.206667 -119.480000   

       ELEVATION_M  distance_km     lon_df2    lat_df2  root_wetness  \
1917           146     2.069570 -122.601688  49.265415      0.880689   
2179            49     4.030499 -124.021099  50.566543      0.355908   
2435           122     5.302961 -125.085658  51.157964      0.351392   
22360          408     1.449362 -121.891982  50.684827      0.448194   
23601          670     3.341552 -119.526296  50.211690      0.389234   

             date  
1917   2023-03-22  
2179   2023-03-22  
2435   2023-03-22  
22360  2023-03-22  
23601  2023-03-22  
14
