In [11]:
import pandas as pd
import numpy as np

df = pd.read_csv("../data/raw/vineyard-ndvi-GV.csv")

# Earth Engine exports lon/lat as geometry columns sometimes; we only need these:
# Expect columns like: NDVI, date, point_id, .geo (or longitude/latitude)
df = df.rename(columns={"NDVI": "ndvi"}) if "NDVI" in df.columns else df

df["date"] = pd.to_datetime(df["date"])
df = df.sort_values(["point_id", "date"])

df["year"] = df["date"].dt.year
df["doy"] = df["date"].dt.dayofyear
df = df[df["date"].dt.month.between(3, 10)]

features = []

for (pid, year), g in df.groupby(["point_id", "year"]):
    g = g.dropna(subset=["ndvi"]).sort_values("date")
    if len(g) < 8:
        # too few observations (cloudy point/year), skip
        continue

    ndvi = g["ndvi"].values
    doy = g["doy"].values

    slope = np.polyfit(doy, ndvi, 1)[0]

    features.append({
        "point_id": pid,
        "year": year,
        "n_obs": len(g),
        "ndvi_mean": float(ndvi.mean()),
        "ndvi_max": float(ndvi.max()),
        "ndvi_min": float(ndvi.min()),
        "ndvi_std": float(ndvi.std()),
        "ndvi_slope": float(slope),
        "ndvi_drop": float(ndvi.max() - ndvi[-1]),
    })

features_df = pd.DataFrame(features)

print("rows:", len(features_df))
features_df.head()


rows: 828


Unnamed: 0,point_id,year,n_obs,ndvi_mean,ndvi_max,ndvi_min,ndvi_std,ndvi_slope,ndvi_drop
0,102054180122,2022,37,0.306133,0.434287,0.122684,0.077606,0.000862,0.156438
1,102054180122,2023,32,0.298501,0.384086,0.139375,0.066961,0.000744,0.0183
2,102054180122,2024,31,0.29934,0.382573,0.203962,0.048465,0.000436,0.027209
3,102054180123,2022,36,0.360147,0.515406,0.136428,0.099844,0.001069,0.207621
4,102054180123,2023,32,0.364664,0.455383,0.233659,0.057459,0.000596,0.060964


In [None]:
features_df.to_csv("../data/processed/vineyard_features_points.csv", index=False)
