# F-VICE 🏔 ️

In [1]:
import pandas as pd
import os
import plotly.express as px
import xgboost as xgb
import json
from sklearn.metrics import mean_squared_error
import numpy as np

In [2]:
from utils import get_itslive, get_processed_data

with open("glaciares.json") as f:
    glaciares = json.load(f)
    print (f"Found {len(glaciares)} glaciers in the JSON file.")

coords = glaciares["Groenlandia - Sermeq Kujalleq [Jakobshavn Isbræ]"]

df = get_itslive([coords])
glacier = get_processed_data(df)

Found 9 glaciers in the JSON file.
original xy [-49.55383, 69.13788] 4326 maps to datacube (-181358.1596550405, -2277021.305723809) EPSG:3413


  ins3xr = xr.open_dataset(


In [3]:
glacier

Unnamed: 0,mid_date,v,v_error,vx,vx_error,vy,vy_error,date_dt,satellite_img1,mission_img1,x,y,lat,lon,year,month,dayofyear
0,1985-04-06 14:30:57.954894976+00:00,5340,196.0,-3604.0,173.500000,3940.0,212.500000,15 days 23:59:55.220947265,5,L,-181387.5,-2277052.5,69.13788,-49.55383,1985,4,96
1,1985-04-11 02:27:50.807382016+00:00,5044,178.0,-3362.0,138.500000,3760.0,204.000000,24 days 23:53:40.971679687,5,L,-181387.5,-2277052.5,69.13788,-49.55383,1985,4,101
2,1985-04-14 14:30:54.476366976+00:00,4675,144.0,-3017.0,123.900002,3571.0,157.199997,31 days 23:59:48.299560547,5,L,-181387.5,-2277052.5,69.13788,-49.55383,1985,4,104
3,1985-04-18 02:33:57.912401024+00:00,4425,98.0,-2737.0,75.900002,3477.0,110.199997,39 days 00:05:54.968261719,5,L,-181387.5,-2277052.5,69.13788,-49.55383,1985,4,108
4,1985-04-22 14:30:52.080955072+00:00,4754,305.0,-2967.0,249.600006,3714.0,336.000000,15 days 23:59:53.078613281,5,L,-181387.5,-2277052.5,69.13788,-49.55383,1985,4,112
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3644,2024-10-02 15:19:49.240922112+00:00,13620,32.0,-9607.0,24.000000,9655.0,38.000000,20 days 00:02:20.075683593,2B,S,-181387.5,-2277052.5,69.13788,-49.55383,2024,10,276
3645,2024-10-03 03:30:25.240924928+00:00,13682,32.0,-9571.0,25.400000,9778.0,37.500000,15 days 00:02:32.023315429,2B,S,-181387.5,-2277052.5,69.13788,-49.55383,2024,10,277
3646,2024-10-03 14:54:21.893806080+00:00,13648,40.0,-9514.0,36.099998,9785.0,43.000000,16 days 00:00:06.921386718,8,L,-181387.5,-2277052.5,69.13788,-49.55383,2024,10,277
3647,2024-10-03 14:54:45.780610048+00:00,13508,39.0,-9415.0,35.900002,9687.0,41.299999,16 days 00:00:06.921386718,8,L,-181387.5,-2277052.5,69.13788,-49.55383,2024,10,277


In [4]:
split_idx = int(len(glacier) * 0.70)

X = glacier[['mid_date','year', 'month', 'dayofyear']]

y = glacier['v']

X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]

In [5]:
plot_df = pd.DataFrame({
    'date': pd.concat([X_train['mid_date'], X_test['mid_date']]),
    'v [m/yr]': pd.concat([y_train, y_test]),
    'split': ['train'] * len(X_train) + ['test'] * len(X_test)
})

fig = px.scatter(
    plot_df,
    x='date',
    y='v [m/yr]',
    color='split',
    title='Train/Test Split: v [m/yr] over Time',
    opacity=0.7,
    trendline='lowess',
)
fig.show()

In [6]:
from sklearn.model_selection import TimeSeriesSplit, cross_val_score

# Definir el modelo
model = xgb.XGBRegressor(
    n_estimators=1000,
    learning_rate=0.01,
    max_depth=3,
    subsample=0.7,
    colsample_bytree=0.9,
    random_state=42
)

model.fit(X_train[['year', 'month', 'dayofyear']], y_train)

tscv = TimeSeriesSplit(n_splits=5)
scores = cross_val_score(model, X_train[['year', 'month', 'dayofyear']], y_train, cv=tscv, scoring='neg_mean_squared_error')

rmse_scores = (-scores) ** 0.5
print(f"RMSE por fold: {rmse_scores}")
print(f"RMSE promedio: {rmse_scores.mean():.2f}")

RMSE por fold: [2481.23416872  994.74202058 1889.05790012  741.08143109 3900.58213604]
RMSE promedio: 2001.34


In [7]:
# Predecir hasta 2030

start_date = (X_test['mid_date'].iloc[-1] + pd.Timedelta(days=1))
end_date = pd.Timestamp('2030-12-31', tz='UTC')

future_dates = pd.DataFrame(pd.date_range(start=start_date, end=end_date, freq='D'))
future_dates.columns = ['mid_date']
future_dates['year'] = future_dates['mid_date'].dt.year
future_dates['month'] = future_dates['mid_date'].dt.month
future_dates['dayofyear'] = future_dates['mid_date'].dt.dayofyear

future_predictions = model.predict(future_dates[['year', 'month', 'dayofyear']])

In [8]:
# Plot predictions

# Crear DataFrame para las predicciones
pred_df = pd.DataFrame({
    'date': X_test['mid_date'],
    'v_pred [m/yr]': y_test,
})

# Graficar puntos reales y predicciones
fig = px.scatter(
    plot_df,
    x='date',
    y='v [m/yr]',
    color='split',
    title='Train/Test Split & Predictions: v [m/yr] over Time',
    opacity=0.4,
    trendline='lowess',
)

# Agregar las predicciones como línea
fig.add_scatter(
    x=pred_df['date'],
    y=pred_df['v_pred [m/yr]'],
    mode='lines',
    name='Predicción (test)',
    line=dict(color='black', width=2),
)

# Agregar las predicciones futuras
fig.add_scatter(
    x=future_dates['mid_date'],
    y=future_predictions,
    mode='lines',
    name='Predicción (futuro)',
    line=dict(color='red', width=2),
)

fig.show()