In [1]:
import ipywidgets as widgets
from IPython.display import display
import requests
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime, timedelta

# Interaktiiviset widgetit
date_picker = widgets.DatePicker(
    description='Päivämäärä',
    value=datetime.today().date()
)

place_dropdown = widgets.Dropdown(
    options=['Helsinki', 'Oulu', 'Piippola', 'Rovaniemi', 'Tampere'],
    value='Piippola',
    description='Paikka:'
)

param_select = widgets.SelectMultiple(
    options=['t2m', 'ws_10min', 'rh', 'pressure', 'precipitation1h'],
    value=('t2m', 'rh'),
    description='Parametrit:'
)

fetch_button = widgets.Button(description='Hae säätiedot')
output = widgets.Output()

def fetch_weather(b):
    with output:
        output.clear_output()
        # API-kyselyn rakentaminen widgettien arvoista
        start = date_picker.value.strftime('%Y-%m-%dT00:00:00Z')
        end = (date_picker.value + timedelta(days=1)).strftime('%Y-%m-%dT00:00:00Z')
        
        url = f"https://opendata.fmi.fi/wfs?service=WFS&version=2.0.0&request=getFeature&storedquery_id=fmi::observations::weather::timevaluepair&place={place_dropdown.value}&starttime={start}&endtime={end}&parameters={','.join(param_select.value)}"
        
        # Data hakeminen ja visualisointi...
        print(f"Haetaan dataa: {url}")
        # ... lisää koodia datan käsittelyyn

fetch_button.on_click(fetch_weather)

# Asettelu
ui = widgets.VBox([
    widgets.HBox([date_picker, place_dropdown]),
    param_select,
    fetch_button,
    output
])

display(ui)

VBox(children=(HBox(children=(DatePicker(value=datetime.date(2025, 12, 9), description='Päivämäärä', step=1), …

In [12]:
import os
import json
import pandas as pd
import numpy as np
from sentinelhub import (
    SentinelHubStatistical, Geometry, DataCollection, CRS, SHConfig
)
from scipy.signal import savgol_filter
import requests

# ---------------- CONFIG -------------------
os.environ["SH_CLIENT_ID"] = "94f737da-8c45-4b28-a4f4-9b53fb00b6cc"
os.environ["SH_CLIENT_SECRET"] = "yRGfObu8gGoDTKOnxVj1S5t7xwBTyOzG"

# ---------------- NDVI CLIENT -------------------
class NDVIStatisticsClient:
    def __init__(self, client_id, client_secret, resolution=10, data_dir="data"):
        self.client_id = client_id
        self.client_secret = client_secret
        self.resolution = resolution

        # Ensure data folder exists
        self.data_dir = data_dir
        os.makedirs(self.data_dir, exist_ok=True)

        # Load SH credentials
        self.config = SHConfig()
        self.config.sh_client_id = client_id
        self.config.sh_client_secret = client_secret
        self.config.instance_id = os.getenv("SH_INSTANCE_ID")

        self.token = self._authenticate()
        print("Authenticated token:", self.token)

    def _authenticate(self):
        url = "https://services.sentinel-hub.com/oauth/token"
        data = {
            "grant_type": "client_credentials",
            "client_id": self.client_id,
            "client_secret": self.client_secret
        }
        resp = requests.post(url, data=data)
        resp.raise_for_status()
        return resp.json()["access_token"]

    def _build_stat_request(self, geometry_geojson, date_from, date_to):
        ndvi_evalscript = """
        //VERSION=3
        function setup() {
            return {
                input: ["B04","B08","dataMask"],
                output: [
                    {id:"NDVI", bands:1, sampleType:"FLOAT32"},
                    {id:"dataMask", bands:1}
                ]
            };
        }
        function evaluatePixel(sample) {
            let ndvi = (sample.B08 - sample.B04)/(sample.B08 + sample.B04);
            return {NDVI:[ndvi], dataMask:[sample.dataMask]};
        }
        """
        geom_obj = Geometry(geometry_geojson, crs=CRS.WGS84)

        aggregation = SentinelHubStatistical.aggregation(
            evalscript=ndvi_evalscript,
            time_interval=(date_from, date_to),
            aggregation_interval="P1D",
            resolution=(self.resolution, self.resolution)
        )

        input_data = SentinelHubStatistical.input_data(DataCollection.SENTINEL2_L2A)

        request = SentinelHubStatistical(
            aggregation=aggregation,
            input_data=[input_data],
            geometry=geom_obj,
            config=self.config
        )
        return request

    def _get_save_paths(self, date_from, date_to):
        base = f"ndvi_{date_from}_{date_to}"
        csv_file = os.path.join(self.data_dir, base + ".csv")
        json_file = os.path.join(self.data_dir, base + ".json")
        return csv_file, json_file

    def _load_from_disk(self, csv_file, json_file):
        if os.path.exists(csv_file):
            print(f"Loaded NDVI from CSV cache: {csv_file}")
            df = pd.read_csv(csv_file)
            return df
        if os.path.exists(json_file):
            print(f"Loaded NDVI from JSON cache: {json_file}")
            df = pd.read_json(json_file)
            return df
        return None

    def _save_to_disk(self, df, csv_file, json_file):
        df.to_csv(csv_file, index=False)
        df.to_json(json_file, orient="records", indent=2)
        print(f"Saved NDVI to: {csv_file}")
        print(f"Saved NDVI to: {json_file}")

    # ---------------- MAIN METHOD -------------------
    def fetch_statistics(
        self, geometry_geojson, date_from, date_to, smooth_method='savgol',
        window=7, polyorder=2, fill_missing=True
    ):
        # Build save paths
        csv_file, json_file = self._get_save_paths(date_from, date_to)

        # Load cache if exists
        df_cached = self._load_from_disk(csv_file, json_file)
        if df_cached is not None:
            return df_cached

        # Fetch new data
        req = self._build_stat_request(geometry_geojson, date_from, date_to)
        try:
            data = req.get_data()
        except Exception as e:
            print("Request FAILED:", e)
            return pd.DataFrame()
        if not data:
            print("No data returned")
            return pd.DataFrame()

        # Parse into dataframe
        df = pd.DataFrame([
            {
                'date': pd.to_datetime(entry['interval']['from']),
                'min': stats['min'],
                'max': stats['max'],
                'mean': stats['mean'],
                'std': stats.get('stDev', stats.get('std', 0))
            }
            for entry in data[0]['data']
            for stats in [list(entry['outputs']['NDVI']['bands'].values())[0]['stats']]
        ])

        # Fill missing NDVI values
        if fill_missing:
            df['mean'] = df['mean'].astype(float).interpolate(method='linear', limit_direction='both')

        # Smooth NDVI
        if smooth_method == 'savgol' and len(df) >= window:
            df['mean_smooth'] = savgol_filter(df['mean'], window_length=window, polyorder=polyorder)
        else:
            df['mean_smooth'] = df['mean']

        # Growing season: May–September
        df = df[(df['date'].dt.month >= 5) & (df['date'].dt.month <= 9)].copy()
        df.sort_values('date', inplace=True)

        # Add year and day_of_year for ML
        df['year'] = df['date'].dt.year
        df['day_of_year'] = df['date'].dt.dayofyear

        # Drop full date column to simplify ML usage
        df = df[['year', 'day_of_year', 'min', 'max', 'mean', 'std', 'mean_smooth']]

        # Save results
        self._save_to_disk(df, csv_file, json_file)

        return df

# ---------------- USAGE EXAMPLE -------------------
geometry = {"type":"MultiPolygon","coordinates":[[[[26.878322063887,63.311237791188],[26.878210383755,63.311127053264],[26.878350385446,63.311033106844],[26.879191192544,63.310979667176],[26.879464866906,63.310962379787],[26.879701680754,63.310941138748],[26.879847289018,63.310917837033],[26.880211445045,63.31078830355],[26.881206228635,63.31039664547],[26.881288579858,63.310353284512],[26.881532572942,63.310360712245],[26.881700829469,63.310370410216],[26.881797056049,63.310339750986],[26.882334568098,63.310095692354],[26.882797119888,63.309911387811],[26.883108448744,63.309807121688],[26.883463636969,63.309756533726],[26.883510383732,63.309757936112],[26.883673739027,63.309918450969],[26.883738674074,63.310041199805],[26.883784434878,63.310094278776],[26.883952850997,63.310257534773],[26.884078174829,63.310359313368],[26.884149337208,63.31041777986],[26.884321316128,63.310559408797],[26.884318846541,63.310598016814],[26.883935071027,63.310748599924],[26.883509168981,63.31095297904],[26.883317387935,63.311025572764],[26.881677637236,63.311344490757],[26.881007972799,63.311457744175],[26.880906097594,63.311386120194],[26.880730006524,63.311324898811],[26.880563538684,63.311354242012],[26.880334914207,63.311392193646],[26.880353635611,63.311545303198],[26.880425469348,63.311556241036],[26.879749228825,63.311612277019],[26.87902085187,63.311685030682],[26.87870141608,63.311691993064]]]]}

CLIENT_ID = os.getenv('SH_CLIENT_ID')
CLIENT_SECRET = os.getenv('SH_CLIENT_SECRET')

client = NDVIStatisticsClient(CLIENT_ID, CLIENT_SECRET)

df = client.fetch_statistics(
    geometry,
    "2017-05-01",
    "2025-09-30"
)

print(df.head())


Authenticated token: eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJ3dE9hV1o2aFJJeUowbGlsYXctcWd4NzlUdm1hX3ZKZlNuMW1WNm5HX0tVIn0.eyJleHAiOjE3NjUxNjk4ODIsImlhdCI6MTc2NTE2NjI4MiwianRpIjoiMzc1MjI3YTUtYmExMi00ZmE2LWEyZmMtNjc1ODNjZDU1NTJhIiwiaXNzIjoiaHR0cHM6Ly9zZXJ2aWNlcy5zZW50aW5lbC1odWIuY29tL2F1dGgvcmVhbG1zL21haW4iLCJhdWQiOiJodHRwczovL2FwaS5wbGFuZXQuY29tLyIsInN1YiI6ImVjMmJjZjg1LTM3MGYtNDRiMS05M2UzLTVlYTdlY2FkYTIwYyIsInR5cCI6IkJlYXJlciIsImF6cCI6Ijk0ZjczN2RhLThjNDUtNGIyOC1hNGY0LTliNTNmYjAwYjZjYyIsInNjb3BlIjoiZW1haWwgcHJvZmlsZSIsImNsaWVudEhvc3QiOiI0Ni4xMzIuODIuMjQ1IiwiZW1haWxfdmVyaWZpZWQiOmZhbHNlLCJwbF9wcm9qZWN0IjoiZjE3MGRiZDMtMTNlZi00M2UyLTk5ZjAtMmFlNzllODliZTNiIiwicHJlZmVycmVkX3VzZXJuYW1lIjoic2VydmljZS1hY2NvdW50LTk0ZjczN2RhLThjNDUtNGIyOC1hNGY0LTliNTNmYjAwYjZjYyIsImNsaWVudEFkZHJlc3MiOiI0Ni4xMzIuODIuMjQ1IiwiY2xpZW50X2lkIjoiOTRmNzM3ZGEtOGM0NS00YjI4LWE0ZjQtOWI1M2ZiMDBiNmNjIiwiYWNjb3VudCI6ImYxNzBkYmQzLTEzZWYtNDNlMi05OWYwLTJhZTc5ZTg5YmUzYiIsInBsX3dvcmtzcGFjZSI6ImY2MDQxNGFiLTk5ZWQtNDA5MS1iM2U

In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Suppose df_all contains all years 2017-2025
# Columns: ['year', 'day_of_year', 'min', 'max', 'mean', 'std', 'mean_smooth']

# Use only relevant columns
df_ml = df[['year', 'day_of_year', 'mean_smooth']].copy()

# Features and target
X = df_ml[['year', 'day_of_year']]
y = df_ml['mean_smooth']

# Split into training (80%) and test (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [15]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Initialize
rf = RandomForestRegressor(n_estimators=200, random_state=42)

# Train
rf.fit(X_train, y_train)

# Predict
y_pred = rf.predict(X_test)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MSE: {mse:.4f}, R2: {r2:.4f}")


MSE: 0.0165, R2: 0.6489


In [16]:
import numpy as np

# All days of growing season (May–Sept)
days = np.arange(121, 274)  # May 1 = day 121, Sep 30 = day 273
years = np.array([2026]*len(days))

X_future = pd.DataFrame({'year': years, 'day_of_year': days})

ndvi_pred = rf.predict(X_future)

df_future = pd.DataFrame({
    'year': years,
    'day_of_year': days,
    'predicted_ndvi': ndvi_pred
})

print(df_future.head())


   year  day_of_year  predicted_ndvi
0  2026          121        0.148504
1  2026          122        0.127646
2  2026          123        0.113221
3  2026          124        0.082679
4  2026          125        0.052179


In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, r2_score

# ----------------- Load NDVI JSON -----------------
json_file = "data/ndvi_2017-05-01_2025-09-30.json"
df = pd.read_json(json_file)

# Ensure proper types
df['year'] = df['year'].astype(int)
df['day_of_year'] = df['day_of_year'].astype(int)
df['mean_smooth'] = df['mean_smooth'].astype(float)

# Sort by year and day_of_year
df = df.sort_values(['year', 'day_of_year']).reset_index(drop=True)

# ----------------- Feature Engineering -----------------
# Lag feature: previous day's NDVI
df['NDVI_lag1'] = df['mean_smooth'].shift(1)

# Remove NaN (first row of each year may have missing lag)
df = df.dropna().reset_index(drop=True)

# Features and target
X = df[['day_of_year', 'NDVI_lag1']]
y = df['mean_smooth']

# ----------------- Train/Test Split -----------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)

# ----------------- Random Forest with hyperparameter tuning -----------------
param_dist = {
    'n_estimators': [100, 200, 500],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf = RandomForestRegressor(random_state=42)
random_search = RandomizedSearchCV(
    rf, param_distributions=param_dist,
    n_iter=20, cv=3, scoring='r2', n_jobs=-1, random_state=42
)
random_search.fit(X_train, y_train)

best_rf = random_search.best_estimator_
print("Best Hyperparameters:", random_search.best_params_)

# ----------------- Evaluate -----------------
y_pred = best_rf.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"MSE: {mse:.4f}, R2: {r2:.4f}")

# ----------------- Predict for 2026 -----------------
days_2026 = np.arange(121, 274)  # DOY for May–Sept
# Use last known NDVI of 2025 as lag for first day
last_ndvi_2025 = df[df['year'] == 2025]['mean_smooth'].iloc[-1]

# Initialize lag list
lag_ndvi = [last_ndvi_2025]

# Iteratively predict day by day using lag feature
y_pred_2026 = []
for day in days_2026:
    X_pred = pd.DataFrame({'day_of_year': [day], 'NDVI_lag1': [lag_ndvi[-1]]})
    y_day = best_rf.predict(X_pred)[0]
    y_pred_2026.append(y_day)
    lag_ndvi.append(y_day)

df_pred_2026 = pd.DataFrame({'year': 2026, 'day_of_year': days_2026, 'NDVI_pred': y_pred_2026})
print(df_pred_2026.head())
