In [None]:
import sys
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import geopandas as gpd
from datetime import datetime
import xgboost as xgb
import seaborn as sns
import itertools
import xarray as xr

import sklearn.metrics
from tqdm import tqdm

In [None]:
df = pd.read_feather("data/merged_data.feather")

# Full Run

In [None]:

timefomat = '%Y-%m-%d %H:%M:%S'

months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
date_path = lambda yr, mth: "data/raw/era5/era5_" + str(yr) + "_" + months[int(mth) - 1] + "_Europe.nc"

year = 2014
month = 1

while True:
    print("Doing", str(year), months[int(month) - 1])

    # Load weather data
    print("Loading weather")
    wdf = pd.DataFrame()
    dataset = xr.open_dataset(date_path(year, month))
    wdf = pd.concat([wdf, dataset.to_dataframe().reset_index()])
    wdf


    # Filter only relevant parts of data frame
    print("Filter dataframe")
    lat_range = (min(wdf['latitude']), max(wdf['latitude']))
    lon_range = (min(wdf['longitude']), max(wdf['longitude']))
    time_range = (min(wdf['time']), max(wdf['time']))

    dates = df['datetime']#pd.to_datetime(df['datetime'], format='%Y-%m-%d %H:%M:%S')
    df18 = df[(dates > time_range[0].strftime(timefomat)) & (dates < time_range[1].strftime(timefomat))]
    df18 = df18[(lat_range[0] < df18['lat']) & (df18['lat'] < lat_range[1])]
    df18 = df18[(lon_range[0] < df18['lon']) & (df18['lon'] < lon_range[1])]
    df18

    # Get closest dates
    print("Find Nearest weather")
    mn_lat = min(wdf['latitude'])
    lat = np.round((df18['lat'] - mn_lat)*4)/4 + mn_lat
    mn_lon = min(wdf['longitude'])
    lon = np.round((df18['lon'] - mn_lon)*4)/4 + mn_lon
    dates = pd.to_datetime(df18['datetime'], format="%Y-%m-%d %H:%M:%S")
    date = dates.dt.round("H").dt.strftime("%Y-%m-%d %H:%M:%S")

    masks = {'latitude': {}, 'longitude': {}, 'time': {}}
    cols = [d for d in wdf.columns if d not in ["longitude", "latitude", "time"]]

    def get_mask(kind, key):
        if key in masks[kind]: return masks[kind][key]
        mask = wdf[kind] == key
        masks[kind][key] = mask
        return mask

    for idx in tqdm(df18.index):
        mini_df = wdf.loc[get_mask('latitude', lat.loc[idx]) & get_mask('longitude', lon.loc[idx]) & get_mask('time', date.loc[idx])]
        df18.loc[idx, cols] = mini_df.loc[:, cols].values

    print("Saving data")
    df.loc[df18.index, df18.columns] = df18

    df.to_feather("data/merged_with_weather.feather")

    month += 1
    if month > 12: 
        month = 1
        year += 1
        if year >= 2017:
            break