In [1]:
from datetime import datetime, timedelta
from meteostat import Point, Daily

import pandas as pd
import geopandas as gpd

In [3]:
eea_shapefile_path = 'D:\Simon\Documents\GP\\feature_generation_info_files\eea_grid_files\eea_europe_grids_50km\inspire_compatible_grid_50km.shp'
eea_grid = gpd.read_file(eea_shapefile_path)
eea_grid = eea_grid.to_crs('EPSG:4326')

In [3]:
df_path = 'D:\Simon\Documents\GP\data\datasets\\selected_bird_species_with_grids_50km.csv'
df = pd.read_csv(df_path, index_col=0)
df = df.head(10)



In [4]:
df.head(5)

Unnamed: 0,id_sighting,id_species,name_species,date,timing,coord_lat,coord_lon,precision,altitude,total_count,atlas_code,id_observer,country,eea_grid_id
0,29666972,8.0,Haubentaucher,2018-01-01,,53.15776,8.676993,place,-1.05101,0.0,0,37718.0,de,50kmE4200N3300
1,29654244,397.0,Schwarzkehlchen,2018-01-01,,53.127639,8.957263,square,0.760781,2.0,0,37803.0,de,50kmE4250N3300
2,29654521,463.0,Wiesenpieper,2018-01-01,,50.850941,12.146953,place,270.8313,2.0,0,39627.0,de,50kmE4450N3050
3,29666414,8.0,Haubentaucher,2018-01-01,,51.076006,11.038316,place,158.94101,8.0,0,38301.0,de,50kmE4350N3100
4,29656211,8.0,Haubentaucher,2018-01-01,,51.38938,7.067282,place,52.36216,10.0,0,108167.0,de,50kmE4100N3100


## Weather with EE

In [9]:
import ee
import pandas as pd

In [15]:
ee.Authenticate()

True

In [16]:
ee.Initialize()

EEException: Not signed up for Earth Engine or project is not registered. Visit https://developers.google.com/earth-engine/guides/access

In [None]:
def get_weather_data(dataset, date, lat, lon):
    point = ee.Geometry.Point([lon, lat])
    image = ee.ImageCollection(dataset).filterDate(date, date).mean()
    data = image.reduceRegion(ee.Reducer.mean(), point, 500).getInfo()
    return data

In [None]:
df['weather_data'] = df.apply(lambda row: get_weather_data('dataset', row['date'], row['coord_lat'], row['coord_lon']), axis=1)


### Get center of every grid

In [5]:
germany_switzerland_bbox = eea_grid.cx[5.210942:15.669926, 45.614516:55.379499]
eea_grid_filtered = eea_grid[eea_grid.intersects(germany_switzerland_bbox.unary_union)]
eea_grid_filtered.reset_index(drop=True, inplace=True)

centroid_gdf = eea_grid_filtered.copy()
centroid_gdf['centroid'] = eea_grid_filtered['geometry'].centroid

centroid_gdf['centroid_lon'] = centroid_gdf['centroid'].x
centroid_gdf['centroid_lat'] = centroid_gdf['centroid'].y
centroid_gdf.drop(columns=['geometry', 'noforigin', 'eoforigin', 'gid', 'centroid'], inplace=True)
centroid_gdf.rename(columns={'cellcode': 'eea_grid_id'}, inplace=True)
centroid_gdf




Unnamed: 0,eea_grid_id,centroid_lon,centroid_lat
0,50kmE3850N2450,4.323137,45.234056
1,50kmE3900N2450,4.957784,45.266404
2,50kmE3950N2450,5.593060,45.294926
3,50kmE4000N2450,6.228888,45.319616
4,50kmE4050N2450,6.865188,45.340468
...,...,...,...
431,50kmE4650N3600,15.620184,55.604916
432,50kmE4700N3600,16.410233,55.567411
433,50kmE4600N3650,14.884182,56.085675
434,50kmE4650N3650,15.684511,56.052664


In [6]:
eea_grid_id = centroid_gdf.eea_grid_id
lat = centroid_gdf.centroid_lat
lon = centroid_gdf.centroid_lon

start_date = pd.Timestamp('2018-01-01')
end_date = pd.Timestamp('2022-12-31')
# end_date = pd.Timestamp('2018-01-1')
date = pd.date_range(start_date, end_date)


all_combinations = pd.MultiIndex.from_product([date, eea_grid_id], names=['date', 'eea_grid_id'])
all_combinations = pd.DataFrame(index=all_combinations).reset_index()
all_combinations = pd.merge(all_combinations, centroid_gdf[['eea_grid_id', 'centroid_lat', 'centroid_lon']], on='eea_grid_id', how='right')
all_combinations

Unnamed: 0,date,eea_grid_id,centroid_lat,centroid_lon
0,2018-01-01,50kmE3850N2450,45.234056,4.323137
1,2018-01-02,50kmE3850N2450,45.234056,4.323137
2,2018-01-03,50kmE3850N2450,45.234056,4.323137
3,2018-01-04,50kmE3850N2450,45.234056,4.323137
4,2018-01-05,50kmE3850N2450,45.234056,4.323137
...,...,...,...,...
796131,2022-12-27,50kmE4700N3650,56.014671,16.483487
796132,2022-12-28,50kmE4700N3650,56.014671,16.483487
796133,2022-12-29,50kmE4700N3650,56.014671,16.483487
796134,2022-12-30,50kmE4700N3650,56.014671,16.483487


In [None]:
def fetch_weather(row):
    location = Point(row.centroid_lat, row.centroid_lon)
    data = Daily(location, row.date, row.date).fetch()
    if data.empty:
        return None
    print(round((row.name / 796136), 3), '%')
    return data.iloc[0]

weather_df = all_combinations.copy()
weather_df[['tavg', 'tmin', 'tmax', 'prcp', 'snow', 'wdir', 'wspd', 'wpgt', 'pres', 'tsun']] = weather_df.apply(fetch_weather, axis=1)
weather_df

In [None]:
from concurrent.futures import ThreadPoolExecutor

num_threads = 8

with ThreadPoolExecutor(max_workers=num_threads) as executor:
    def fetch_weather_concurrent(row):
        location = Point(row['centroid_lat'], row['centroid_lon'])
        data = Daily(location, row['date'], row['date']).fetch()
        if data.empty:
            return None
        return data.iloc[0]

    weather_data_list = list(executor.map(fetch_weather_concurrent, all_combinations.to_dict('records')))

weather_df = all_combinations.copy()
weather_df[['tavg', 'tmin', 'tmax', 'prcp', 'snow', 'wdir', 'wspd', 'wpgt', 'pres', 'tsun']] = weather_data_list


In [None]:
weather_df_path = 'D:\Simon\Documents\GP\Raw_Data\weather_per_day_per_grid.csv'
weather_df.to_csv(weather_df_path)

In [None]:
# merged_df = df.merge(centroid_gdf[['eea_grid_id', 'centroid_lon', 'centroid_lat']], on='eea_grid_id', how='left')
# merged_df

### Add weather features and merge with dataframe

In [None]:
# df = pd.merge(df, df_weather, left_index=True, right_index=True)