In [1]:
import pandas as pd
import ee
from tqdm import tqdm
import os

from download_utils import download_monthly_ndvi

import hopsworks

ee.Authenticate()
ee.Initialize()

In [None]:
save = False


# Open obs data

In [None]:
obs = pd.read_csv('../data/beetle/artportalen/artportalen_final.csv')
obs["Month"] = pd.to_datetime(obs['Date']).dt.to_period("M").dt.to_timestamp()

obs.drop(columns=['Kommun', 'Lan','Quantity','Date','Pressence'], inplace=True)

print(f"Obs has {len(obs)} datapoints")
obs.head(5)

# Download NDVI data
We will use Google Earth Engine to get this data

In [None]:
# MODIS is a dataset containing a time series of satellite images 
try:
    ndvi_df = pd.read_csv('../data/ndvi/ndvi_raw.csv')
    ndvi_df['Month'] = pd.to_datetime(ndvi_df['Month'])

except:
    modis = ee.ImageCollection("MODIS/061/MOD13Q1").select("NDVI")
    ndvi_df = download_monthly_ndvi(
        dataset=modis, points_df=obs[["row_id", "Lat", "Lon"]], months=obs["Month"].unique()
        )
    
ndvi_df

In [None]:
if save: ndvi_df.to_csv('../data/ndvi/backfill_ndvi_raw.csv', index=False)


## 2. Create features

### 2.1 Anomalies: compare NDVI for a month vs. climatology (average of that month for previous years)

In [None]:
ndvi_df["Month_num"] = ndvi_df["Month"].dt.month

ndvi_climatology = (
    ndvi_df
    .groupby(["Lat", "Lon", "Month_num"], as_index=False)
    .agg(NDVI_clim=("NDVI", "mean"))
)
ndvi_climatology

In [None]:
ndvi_features = ndvi_df.merge(ndvi_climatology, on=["Lat", "Lon", "Month_num"],how="left")
ndvi_features

In [None]:
ndvi_features["NDVI_anom"] = ndvi_features["NDVI"] - ndvi_features["NDVI_clim"]
ndvi_features

### 2.2 Lagged features

In [None]:
ndvi_features = ndvi_features.sort_values(["Lat", "Lon", "Month"])

MAX_LAG = 2

for lag in range(1, MAX_LAG + 1):
    ndvi_features[f"NDVI_lag{lag}"] = (
        ndvi_features
        .groupby(["Lat", "Lon"])["NDVI"]
        .shift(lag)
    )

    ndvi_features[f"NDVI_anom_lag{lag}"] = (
        ndvi_features
        .groupby(["Lat", "Lon"])["NDVI_anom"]
        .shift(lag)
    )

ndvi_features


In [None]:
if save: ndvi_features.to_csv('../data/ndvi/backfill_ndvi_features.csv', index=False)

## Map back to beetle dataset schema (1 row = 1 observation, 1 coordinate, 1 day)

In [None]:
ndvi_features = pd.read_csv('../data/ndvi/ndvi_features.csv')
ndvi_features["Month"] = pd.to_datetime(ndvi_features['Month'])

ndvi_features

In [None]:
ndvi_final = obs.merge(ndvi_features.drop(columns=["Lat", "Lon"]), on=["row_id", "Month"], how="left")
ndvi_final.drop(columns=['Month_num'], inplace=True)
ndvi_final

In [None]:
if save: ndvi_final.to_csv('../data/ndvi/backfill_ndvi_final.csv', index=False)

# Save to Hopsworks

In [2]:
ndvi_final = pd.read_csv('../data/ndvi/backfill_ndvi_final.csv')
ndvi_final['Month'] = pd.to_datetime(ndvi_final['Month'])
ndvi_final.drop(columns=["NDVI_clim", "NDVI", "row_id","NDVI_anom", "NDVI_anom_lag1", "NDVI_anom_lag2"], inplace=True)
ndvi_final

Unnamed: 0,Lat,Lon,Month,NDVI_lag1,NDVI_lag2
0,58.788614,15.821428,2021-04-01,0.71975,0.45875
1,58.788632,15.817157,2021-04-01,0.71860,0.45815
2,58.786980,15.816672,2021-04-01,0.66210,0.66210
3,58.786361,15.815842,2021-04-01,0.64620,0.66210
4,58.835413,15.509221,2021-05-01,0.51110,0.61880
...,...,...,...,...,...
5017,59.276611,17.976767,2024-10-01,0.55590,0.60900
5018,63.828780,20.293920,2024-10-01,0.61105,0.68690
5019,57.668491,12.210855,2024-09-01,0.80535,0.80110
5020,60.596110,17.358899,2024-09-01,0.84515,0.83440


In [3]:
project = hopsworks.login(api_key_value=os.getenv('HOPSWORKS_API_KEY'))
fs = project.get_feature_store() 

2026-01-05 11:58:18,918 INFO: Initializing external client
2026-01-05 11:58:18,918 INFO: Base URL: https://c.app.hopsworks.ai:443




To ensure compatibility please install the latest bug fix release matching the minor version of your backend (4.2) by running 'pip install hopsworks==4.2.*'


2026-01-05 11:58:20,331 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1286351


In [5]:
ndvi_fg = fs.get_or_create_feature_group(
    name='ndvi',
    description='NDVI values for different months at specific coordinates.',
    version=1,
    primary_key=['Lat', 'Lon','Month'],
    event_time="Month"
)

ndvi_fg.insert(ndvi_final)



Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1286351/fs/1273971/fg/1890743


Uploading Dataframe: 100.00% |██████████| Rows 5022/5022 | Elapsed Time: 00:02 | Remaining Time: 00:00


Launching job: ndvi_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1286351/jobs/named/ndvi_1_offline_fg_materialization/executions


(Job('ndvi_1_offline_fg_materialization', 'SPARK'), None)