# F-VICE üèî Ô∏è

In [1]:
import pandas as pd
import os
import plotly.express as px
import xgboost as xgb
from sklearn.metrics import mean_squared_error

In [2]:
dataframes = {}
for filename in os.listdir('data/'):
    if filename.endswith('.csv'):
        filepath = os.path.join('data/', filename)
        df = pd.read_csv(filepath)
        dataframes[filename] = df

# Mostrar los nombres de los archivos cargados
print("Archivos cargados:", list(dataframes.keys()))

Archivos cargados: ['trinity.csv']


In [3]:
dataframes['trinity.csv']

Unnamed: 0,mid_date,v [m/yr],satellite,dt (days)
0,2021-05-19T18:49:19.000Z,974,Sentinel 2,30
1,2019-07-30T07:00:10.000Z,955,Sentinel 2,105
2,2020-02-21T18:39:59.000Z,913,Sentinel 2,310
3,2020-08-14T06:30:19.999Z,648,Sentinel 2,85
4,2020-01-03T06:49:25.000Z,828,Sentinel 2,285
...,...,...,...,...
15077,2023-09-07T18:19:19.000Z,554,Sentinel 2,10
15078,2023-05-03T18:29:19.000Z,1034,Sentinel 2,30
15079,2023-07-06T18:03:05.396Z,1339,Landsat 9,32
15080,2024-05-21T00:39:31.508Z,948,Landsat 8,64


In [37]:
data = dataframes['trinity.csv']
cols = data.columns.tolist()

data[cols[0]] = pd.to_datetime(data[cols[0]])
data[cols[0]] = data[cols[0]].dt.tz_convert('UTC')

data[cols[1]] = data[cols[1]].astype(int)

data_sorted = data.sort_values(by=cols[0])
X = data_sorted[cols[0]]
y = data_sorted[cols[1]]

# Dividir respetando el orden temporal
split_idx = int(len(X) * 0.66)
X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]


In [38]:
# Create a DataFrame for plotting
plot_df = pd.DataFrame({
    'date': pd.concat([X_train, X_test]),
    'v [m/yr]': pd.concat([y_train, y_test]),
    'split': ['train'] * len(X_train) + ['test'] * len(X_test)
})

fig = px.scatter(
    plot_df,
    x='date',
    y='v [m/yr]',
    color='split',
    title='Train/Test Split: v [m/yr] over Time',
    opacity=0.7,
    trendline='lowess',
)
fig.show()

In [39]:
# Crear y entrenar el modelo XGBoost

X_train_df = pd.DataFrame({'date': X_train})
X_train_df['year'] = X_train_df['date'].dt.year
X_train_df['month'] = X_train_df['date'].dt.month
X_train_df['dayofyear'] = X_train_df['date'].dt.dayofyear

X_test_df = pd.DataFrame({'date': X_test})
X_test_df['year'] = X_test_df['date'].dt.year
X_test_df['month'] = X_test_df['date'].dt.month
X_test_df['dayofyear'] = X_test_df['date'].dt.dayofyear

X_train_num = X_train_df[['year', 'month', 'dayofyear']]
X_test_num = X_test_df[['year', 'month', 'dayofyear']]

model = xgb.XGBRegressor(
    n_estimators=200,
    learning_rate=0.01,
    max_depth=3,
    subsample=0.5,
    colsample_bytree=1,
    random_state=42
)
model.fit(X_train_num, y_train)

# Realizar predicciones
y_pred = model.predict(X_test_num)

# Calcular el error cuadr√°tico medio
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")

Mean Squared Error: 32086.12


In [40]:
# Predecir hasta 2030

start_date = (X_test.iloc[-1] + pd.Timedelta(days=1))
end_date = pd.Timestamp('2030-12-31', tz='UTC')

future_dates = pd.DataFrame(pd.date_range(start=start_date, end=end_date, freq='D'))
future_dates.columns = ['mid_date']
future_dates['year'] = future_dates['mid_date'].dt.year
future_dates['month'] = future_dates['mid_date'].dt.month
future_dates['dayofyear'] = future_dates['mid_date'].dt.dayofyear

future_predictions = model.predict(future_dates[['year', 'month', 'dayofyear']])

In [41]:
# Plot predictions

# Crear DataFrame para las predicciones
pred_df = pd.DataFrame({
    'date': X_test,
    'v_pred [m/yr]': y_pred
})

# Graficar puntos reales y predicciones
fig = px.scatter(
    plot_df,
    x='date',
    y='v [m/yr]',
    color='split',
    title='Train/Test Split & Predictions: v [m/yr] over Time',
    opacity=0.4,
    trendline='lowess',
)

# Agregar las predicciones como l√≠nea
fig.add_scatter(
    x=pred_df['date'],
    y=pred_df['v_pred [m/yr]'],
    mode='lines',
    name='Predicci√≥n (test)',
    line=dict(color='black', width=2),
)

# Agregar las predicciones futuras
fig.add_scatter(
    x=future_dates['mid_date'],
    y=future_predictions,
    mode='lines',
    name='Predicci√≥n (futuro)',
    line=dict(color='red', width=2),
)

fig.show()