# F-VICE 🏔 ️

In [31]:
import pandas as pd
import os
import plotly.express as px
import xgboost as xgb
from sklearn.metrics import mean_squared_error

In [None]:
from utils import get_itslive

glacier = get_itslive()

Archivos cargados: ['sermeq', 'trinity']


In [34]:
glacier

Unnamed: 0,mid_date,v [m/yr],satellite,dt (days)
0,2021-11-30T15:28:41.000Z,907,Sentinel 2,250
1,1998-04-27T14:31:36.209Z,4430,Landsat 5,32
2,2020-05-11T10:00:19.663Z,8479,Sentinel 1,6
3,2019-09-03T09:52:11.335Z,12159,Sentinel 1,6
4,2021-04-19T09:52:16.087Z,10859,Sentinel 1,6
...,...,...,...,...
6755,2023-12-24T15:08:06.000Z,831,Sentinel 2,250
6756,2022-11-14T15:00:11.494Z,563,Landsat 8,384
6757,2023-06-06T03:28:55.000Z,9530,Sentinel 2,25
6758,2021-10-24T14:48:19.059Z,625,Landsat 8,408


In [44]:

cols = glacier.columns.tolist()

glacier[cols[0]] = pd.to_datetime(glacier[cols[0]])
glacier[cols[0]] = glacier[cols[0]].dt.tz_convert('UTC')

glacier[cols[1]] = glacier[cols[1]].astype(int)

data_sorted = glacier.sort_values(by=cols[0])
X = data_sorted[cols[0]]
y = data_sorted[cols[1]]

# Dividir respetando el orden temporal
split_idx = int(len(X) * 0.66)
X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]


In [45]:
# Create a DataFrame for plotting
plot_df = pd.DataFrame({
    'date': pd.concat([X_train, X_test]),
    'v [m/yr]': pd.concat([y_train, y_test]),
    'split': ['train'] * len(X_train) + ['test'] * len(X_test)
})

fig = px.scatter(
    plot_df,
    x='date',
    y='v [m/yr]',
    color='split',
    title='Train/Test Split: v [m/yr] over Time',
    opacity=0.7,
    trendline='lowess',
)
fig.show()

In [46]:
# Crear y entrenar el modelo XGBoost

X_train_df = pd.DataFrame({'date': X_train})
X_train_df['year'] = X_train_df['date'].dt.year
X_train_df['month'] = X_train_df['date'].dt.month
X_train_df['dayofyear'] = X_train_df['date'].dt.dayofyear

X_test_df = pd.DataFrame({'date': X_test})
X_test_df['year'] = X_test_df['date'].dt.year
X_test_df['month'] = X_test_df['date'].dt.month
X_test_df['dayofyear'] = X_test_df['date'].dt.dayofyear

X_train_num = X_train_df[['year', 'month', 'dayofyear']]
X_test_num = X_test_df[['year', 'month', 'dayofyear']]

model = xgb.XGBRegressor(
    n_estimators=1000,
    learning_rate=0.01,
    max_depth=3,
    subsample=0.7,
    colsample_bytree=0.9,
    random_state=42
)
model.fit(X_train_num, y_train)

# Realizar predicciones
y_pred = model.predict(X_test_num)

# Calcular el error cuadrático medio
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")

Mean Squared Error: 36757080.00


In [47]:
# Predecir hasta 2030

start_date = (X_test.iloc[-1] + pd.Timedelta(days=1))
end_date = pd.Timestamp('2030-12-31', tz='UTC')

future_dates = pd.DataFrame(pd.date_range(start=start_date, end=end_date, freq='D'))
future_dates.columns = ['mid_date']
future_dates['year'] = future_dates['mid_date'].dt.year
future_dates['month'] = future_dates['mid_date'].dt.month
future_dates['dayofyear'] = future_dates['mid_date'].dt.dayofyear

future_predictions = model.predict(future_dates[['year', 'month', 'dayofyear']])

In [48]:
# Plot predictions

# Crear DataFrame para las predicciones
pred_df = pd.DataFrame({
    'date': X_test,
    'v_pred [m/yr]': y_pred
})

# Graficar puntos reales y predicciones
fig = px.scatter(
    plot_df,
    x='date',
    y='v [m/yr]',
    color='split',
    title='Train/Test Split & Predictions: v [m/yr] over Time',
    opacity=0.4,
    trendline='lowess',
)

# Agregar las predicciones como línea
fig.add_scatter(
    x=pred_df['date'],
    y=pred_df['v_pred [m/yr]'],
    mode='lines',
    name='Predicción (test)',
    line=dict(color='black', width=2),
)

# Agregar las predicciones futuras
fig.add_scatter(
    x=future_dates['mid_date'],
    y=future_predictions,
    mode='lines',
    name='Predicción (futuro)',
    line=dict(color='red', width=2),
)

fig.show()