In [134]:
import pandas as pd
import geopandas as gpd
from shapely import wkt
from shapely.geometry import Point
from tqdm import tqdm
import matplotlib.pyplot as plt
from dataclasses import dataclass
from sklearn.neighbors import KDTree

# Number of Birds observed in each cell

In [129]:
df = pd.read_csv("gen_data/temp_covariates.csv")
df.geometry = df.geometry.apply(wkt.loads)

longitudes = df.geometry.apply(lambda x: x.x)
latitudes = df.geometry.apply(lambda x: x.y)

In [133]:
gen_df = pd.DataFrame({"longitude": longitudes, "latitude": latitudes})
gen_df.index.values

array([    0,     1,     2, ..., 24308, 24309, 24310])

In [144]:
tree = KDTree(gen_df[["latitude", "longitude"]].values, leaf_size=2)
index = tree.query([[10, 15], [9, 5]], k=1, return_distance=False)
print(index)
latitudes[index[0][0]], longitudes[index[0][0]]

[[10816]
 [ 9975]]


(10.5, 14.5)

# Get Country Label For Each Cell

In [78]:
world = gpd.read_file("source_data/world-administrative-boundaries.geojson")
world.head(5)

Unnamed: 0,geo_point_2d,iso3,status,color_code,name,continent,region,iso_3166_1_alpha_2_codes,french_short,geometry
0,"{'lon': 32.38621827281175, 'lat': 1.2799634451...",UGA,Member State,UGA,Uganda,Africa,Eastern Africa,UG,Ouganda,"POLYGON ((33.92110 -1.00194, 33.92027 -1.00111..."
1,"{'lon': 63.169364370421164, 'lat': 41.75044402...",UZB,Member State,UZB,Uzbekistan,Asia,Central Asia,UZ,Ouzbékistan,"POLYGON ((70.97081 42.25467, 70.98054 42.26205..."
2,"{'lon': -8.150578960214018, 'lat': 53.17638162...",IRL,Member State,IRL,Ireland,Europe,Northern Europe,IE,Irlande,"MULTIPOLYGON (((-9.97014 54.02083, -9.93833 53..."
3,"{'lon': 38.84128573461554, 'lat': 15.373203130...",ERI,Member State,ERI,Eritrea,Africa,Eastern Africa,ER,Érythrée,"MULTIPOLYGON (((40.13583 15.75250, 40.12861 15..."
4,"{'lon': -36.38236149455586, 'lat': -54.4881509...",,UK Territory,GBR,South Georgia & the South Sandwich Islands,Antarctica,,,South Georgia & the South Sandwich Islands,"MULTIPOLYGON (((-26.24361 -58.49473, -26.24889..."


In [124]:
@dataclass(frozen=True)
class Country:
    geometry: None
    code: None

    def __hash__(self):
        return hash(self.code)

countries = []
for code, geometry in zip(world.iso_3166_1_alpha_2_codes, world.geometry):
    countries.append(Country(geometry, code))

In [127]:
by_count = {0: set(countries)}

codes = []
for lon, lat in tqdm(list(zip(longitudes, latitudes))):
    located = False
    for score in reversed(sorted(by_count.keys())):
        for country in by_count[score]:
            if country.geometry.contains(Point(lon, lat)):
                codes.append(country.code)
                located = True
                by_count[score].remove(country)
                if score + 1 not in by_count:
                    by_count[score + 1] = set()
                by_count[score + 1].add(country)
                break
        
        if located:
            break
    
    if not located:
        codes.append(None)

  0%|          | 0/24311 [00:00<?, ?it/s]

100%|██████████| 24311/24311 [03:38<00:00, 111.23it/s]


In [128]:
new_df = pd.DataFrame()
new_df["latitude"] = latitudes
new_df["longitude"] = longitudes
new_df["country_code"] = codes
new_df.to_csv("gen_data/grid_country_codes.csv", index=False)