In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import ee
from tqdm import tqdm
from dateutil import relativedelta

ee.Authenticate()
ee.Initialize()


from download_utils import download_monthly_ndvi, era5_grid_to_points_df



In [3]:
save = False


# Overview
This notebook is run every 16 days. NDVI data is downloaded over 16 day periods.

# Download NDVI data

In [4]:
sweden_bbox = ee.Geometry.Rectangle([
    10.5, 55.0,   # lon_min, lat_min
    24.5, 69.5    # lon_max, lat_max
])

In [5]:
end = pd.Timestamp.today()
start = (end - relativedelta.relativedelta(months=2)).replace(day=1) ## -2 months to be able to create lagged features
print("Start of forecast estimate: ", start)
print("End of forecast estimate: ", end)

Start of forecast estimate:  2025-10-01 12:48:07.786232
End of forecast estimate:  2025-12-31 12:48:07.786232


In [6]:
months = [
    (pd.Timestamp.today() - pd.DateOffset(months=i)).replace(day=1)
    for i in range(3)
]

modis = ee.ImageCollection("MODIS/061/MOD13Q1").select("NDVI")

### Prepare dataframe of points to sample NDVI values from

In [74]:
months_df = pd.DataFrame({"Month": pd.to_datetime(months)})
months_df["Month"] = pd.to_datetime(months_df["Month"].dt.date)
months_df["key"] = 1

era5_points_df = pd.read_csv('../data/country/era5points_df.csv')
era5_points_df["key"] = 1
era5_points_df = era5_points_df.merge(months_df, on="key").drop(columns="key")
era5_points_df

Unnamed: 0,row_id,Lat,Lon,Month
0,0,69.5,10.5,2025-12-01
1,0,69.5,10.5,2025-11-01
2,0,69.5,10.5,2025-10-01
3,1,69.5,10.6,2025-12-01
4,1,69.5,10.6,2025-11-01
...,...,...,...,...
61753,20584,55.0,24.4,2025-11-01
61754,20584,55.0,24.4,2025-10-01
61755,20585,55.0,24.5,2025-12-01
61756,20585,55.0,24.5,2025-11-01


In [63]:
ndvi_df = download_monthly_ndvi(
    dataset=modis,
    points_df=era5_points_df,
    months=era5_points_df["Month"].unique(),
    chunk_size=4000
)

ndvi_df["Month"] = pd.to_datetime(ndvi_df["Month"])
ndvi_df

Months: 100%|██████████| 3/3 [07:46<00:00, 155.46s/it]



Unnamed: 0,row_id,Month,NDVI,Lat,Lon
0,9652,2025-12-01,-0.0743,62.699038,16.900679
1,9652,2025-12-01,-0.0743,62.699038,16.900679
2,9652,2025-12-01,-0.0743,62.699038,16.900679
3,9653,2025-12-01,-0.1233,62.699038,16.999494
4,9653,2025-12-01,-0.1233,62.699038,16.999494
...,...,...,...,...,...
94828,20584,2025-10-01,0.7327,55.000476,24.399366
94829,20584,2025-10-01,0.7327,55.000476,24.399366
94830,20585,2025-10-01,0.6654,55.000476,24.500426
94831,20585,2025-10-01,0.6654,55.000476,24.500426


In [64]:
# Average NDVI values for "duplicate" lat-lon-month combinations
ndvi_df = ndvi_df.groupby(['Lat', 'Lon', 'Month','row_id'], as_index=False)['NDVI'].mean()
ndvi_df

Unnamed: 0,Lat,Lon,Month,row_id,NDVI
0,55.000476,10.598997,2025-10-01,20446,0.7329
1,55.000476,10.598997,2025-11-01,20446,0.5882
2,55.000476,10.598997,2025-12-01,20446,0.1572
3,55.000476,10.899933,2025-10-01,20449,0.5063
4,55.000476,10.899933,2025-11-01,20449,0.0533
...,...,...,...,...,...
31606,69.499285,24.300551,2025-10-01,138,0.1092
31607,69.499285,24.300551,2025-11-01,138,-0.1063
31608,69.499285,24.399366,2025-10-01,139,-0.0869
31609,69.499285,24.399366,2025-11-01,139,-0.0698


In [65]:
ndvi_features = ndvi_df.copy()
ndvi_features = ndvi_features.sort_values(["Lat", "Lon", "Month"])


MAX_LAG = 2

for lag in range(1, MAX_LAG + 1):
    ndvi_features[f"NDVI_lag{lag}"] = (
        ndvi_features
        .groupby(["Lat", "Lon"])["NDVI"]
        .shift(lag)
    )

In [66]:
ndvi_features.dropna(inplace=True)
ndvi_features

Unnamed: 0,Lat,Lon,Month,row_id,NDVI,NDVI_lag1,NDVI_lag2
2,55.000476,10.598997,2025-12-01,20446,0.1572,0.58820,0.7329
5,55.000476,10.899933,2025-12-01,20449,-0.0251,0.05330,0.5063
8,55.000476,11.899309,2025-12-01,20459,0.1877,0.21690,0.2507
11,55.000476,12.000369,2025-12-01,20460,0.5405,0.62095,0.6857
14,55.000476,12.099184,2025-12-01,20461,0.2024,0.70430,0.7595
...,...,...,...,...,...,...,...
18017,62.600223,17.599119,2025-12-01,9800,0.3549,0.36050,0.7708
18222,62.699038,16.900679,2025-12-01,9652,-0.0743,0.63200,0.7321
18225,62.699038,16.999494,2025-12-01,9653,-0.1233,0.47085,0.7940
18230,62.699038,17.199369,2025-12-01,9655,0.3156,0.65870,0.8148


In [77]:
ndvi_final = era5_points_df.merge(ndvi_features.drop(columns=['Lat', 'Lon']), on=['row_id','Month'], how='left')
ndvi_final.dropna()

Unnamed: 0,row_id,Lat,Lon,Month,NDVI,NDVI_lag1,NDVI_lag2
28956,9652,62.7,16.9,2025-12-01,-0.0743,0.63200,0.7321
28959,9653,62.7,17.0,2025-12-01,-0.1233,0.47085,0.7940
28965,9655,62.7,17.2,2025-12-01,0.3156,0.65870,0.8148
28968,9656,62.7,17.3,2025-12-01,0.2734,0.40175,0.6062
29325,9775,62.6,15.1,2025-12-01,0.2693,0.44165,0.7275
...,...,...,...,...,...,...,...
61743,20581,55.0,24.1,2025-12-01,0.1611,0.29850,0.6477
61746,20582,55.0,24.2,2025-12-01,0.0859,0.40210,0.6992
61749,20583,55.0,24.3,2025-12-01,0.1033,0.45150,0.7640
61752,20584,55.0,24.4,2025-12-01,0.1044,0.33725,0.7327


In [None]:
if save: ndvi_final.to_csv('../data/ndvi/ndvi_' + str(end.month) + '_' + str(end.year) + '_.csv', index=False)

