In [1]:
# Se importan librerías y dependencias
from datetime import date, datetime, timedelta
import matplotlib.pyplot as plt
from meteostat import Stations, Daily
import pycaret
from pycaret.regression import *
import pandas as pd

In [2]:
# Se obtiene los datos
# El ID 87582 correponde a la estación meteorológica Aeroparque Bs. As. Aerodrome
start = datetime(1973, 1, 1) # fecha a partir de la que se tienen datos para esa estación
end = datetime.now() - timedelta(days=1)

data = Daily('87582', start, end)
data = data.fetch()

In [3]:
# Se crean features adicionales
def create_features(data):
    data['day'] = data.index.day
    data['month'] = data.index.month
    data['year'] = data.index.year
    
    Y = 2000 # dummy leap year to allow input X-02-29 (leap day)
    seasons = [('verano', (date(Y,  1,  1),  date(Y,  3, 20))),
               ('otonio', (date(Y,  3, 21),  date(Y,  6, 20))),
               ('invierno', (date(Y,  6, 21),  date(Y,  9, 20))),
               ('primavera', (date(Y,  9, 21),  date(Y, 12, 20))),
               ('verano', (date(Y, 12, 21),  date(Y, 12, 31)))]
    
    def get_season(now):
        now = now.date()
        now = now.replace(year=Y)
        return next(season for season, (start, end) in seasons
                    if start <= now <= end)
    
    data['season'] = pd.Categorical(data.index.map(get_season))

create_features(data)
# Se agregan las temperaturas minima y maxima de dias anteriores como features
DAYS = 5
for i in range(1, DAYS + 1):
    data['tmin-' + str(i)] = data['tmin'].shift(periods=i)
    data['tmax-' + str(i)] = data['tmax'].shift(periods=i)

In [4]:
data

Unnamed: 0_level_0,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun,...,tmin-1,tmax-1,tmin-2,tmax-2,tmin-3,tmax-3,tmin-4,tmax-4,tmin-5,tmax-5
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1973-01-01,26.7,,,,,,,,,,...,,,,,,,,,,
1973-01-02,28.7,23.0,,,,,,,,,...,,,,,,,,,,
1973-01-03,24.4,20.0,,,,,,,,,...,23.0,,,,,,,,,
1973-01-04,26.7,22.0,,,,,,,,,...,20.0,,23.0,,,,,,,
1973-01-05,27.1,24.0,,,,,,,,,...,22.0,,20.0,,23.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-16,27.3,25.0,30.0,0.0,,37.0,14.1,,1006.7,,...,25.0,28.0,21.0,26.0,23.0,31.3,22.0,31.0,20.9,28.0
2023-12-17,22.3,19.0,28.0,26.7,,94.0,19.8,,1007.4,,...,25.0,30.0,25.0,28.0,21.0,26.0,23.0,31.3,22.0,31.0
2023-12-18,19.2,16.0,22.0,19.7,,151.0,17.3,,1014.4,,...,19.0,28.0,25.0,30.0,25.0,28.0,21.0,26.0,23.0,31.3
2023-12-19,16.7,13.0,20.0,0.0,,146.0,21.8,,1020.0,,...,16.0,22.0,19.0,28.0,25.0,30.0,25.0,28.0,21.0,26.0


In [5]:
data_to_predict_tmin = data[['tmin', 'tmin-1', 'tmax-1', 'day', 'month', 'year', 'season']]
data_to_predict_tmin.dropna(inplace=True)
data_to_predict_tmin
s = setup(data_to_predict_tmin, target = 'tmin', session_id = 124)
# se crea el modelo
model_tmin = create_model('gbr')
model_tmin = finalize_model(model_tmin)

Unnamed: 0,Description,Value
0,Session id,124
1,Target,tmin
2,Target type,Regression
3,Original data shape,"(7607, 7)"
4,Transformed data shape,"(7607, 10)"
5,Transformed train set shape,"(5324, 10)"
6,Transformed test set shape,"(2283, 10)"
7,Numeric features,5
8,Categorical features,1
9,Preprocess,True


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1.782,5.1663,2.2729,0.8366,0.2035,0.1834
1,1.7596,5.0116,2.2387,0.8407,0.1987,0.1771
2,1.8013,5.4978,2.3447,0.8112,0.2233,0.1845
3,1.7658,5.118,2.2623,0.8178,0.1984,0.1758
4,1.8363,5.4723,2.3393,0.8067,0.1997,0.1734
5,1.8248,5.2784,2.2975,0.828,0.1951,0.1712
6,1.6361,4.2416,2.0595,0.8493,0.1772,0.152
7,1.773,5.1602,2.2716,0.8187,0.1927,0.1661
8,1.7129,5.0516,2.2476,0.8297,0.1972,0.1715
9,1.6925,4.7176,2.172,0.8387,0.2014,0.1651


In [6]:
data_to_predict_tmax = data[['tmin', 'tmax', 'tmin-1', 'tmax-1', 'day', 'month', 'year', 'season']]
data_to_predict_tmax.dropna(inplace=True)
data_to_predict_tmax
s = setup(data_to_predict_tmax, target = 'tmax', session_id = 125)
# se crea el modelo
model_tmax = create_model('catboost')
model_tmax = finalize_model(model_tmax)

Unnamed: 0,Description,Value
0,Session id,125
1,Target,tmax
2,Target type,Regression
3,Original data shape,"(6011, 8)"
4,Transformed data shape,"(6011, 11)"
5,Transformed train set shape,"(4207, 11)"
6,Transformed test set shape,"(1804, 11)"
7,Numeric features,6
8,Categorical features,1
9,Preprocess,True


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1.4419,3.5804,1.8922,0.8919,0.088,0.0709
1,1.4327,3.5643,1.8879,0.8937,0.0913,0.0704
2,1.4497,3.7652,1.9404,0.8921,0.0923,0.0717
3,1.3599,3.4597,1.86,0.8902,0.0903,0.0683
4,1.5752,4.2077,2.0513,0.8726,0.0957,0.0759
5,1.4248,3.4366,1.8538,0.9079,0.0892,0.0707
6,1.5526,4.3421,2.0838,0.8759,0.1022,0.0795
7,1.6494,4.8025,2.1915,0.8628,0.1061,0.084
8,1.4442,3.7066,1.9252,0.8932,0.0851,0.0674
9,1.56,4.0704,2.0175,0.8803,0.0944,0.0756


In [7]:
#### PREDICCION ####
# Se crea el dataframe con los datos a predecir
today = date.today()
dates = pd.date_range(today, periods=1)

d = {}

#for i in range(1, DAYS + 1):
for i in range(1, 2):
    d['tmin-' + str(i)] = [data.loc[str(today - timedelta(days=i))]['tmin']]
    d['tmax-' + str(i)] = [data.loc[str(today - timedelta(days=i))]['tmax']]

new_data = pd.DataFrame(data=d, index=dates)
create_features(new_data)

new_data

Unnamed: 0,tmin-1,tmax-1,day,month,year,season
2023-12-21,17.0,23.0,21,12,2023,verano


In [8]:
# se predice tmin
predictions = predict_model(model_tmin, data=new_data)
predictions.rename(columns={"prediction_label": "tmin"}, inplace=True)
predictions

Unnamed: 0,tmin-1,tmax-1,day,month,year,season,tmin
2023-12-21,17.0,23.0,21,12,2023,verano,18.183105


In [9]:
# se predice tmax
predictions = predict_model(model_tmax, data=predictions)
predictions.rename(columns={"prediction_label": "tmax"}, inplace=True)
predictions

Unnamed: 0,tmin-1,tmax-1,day,month,year,season,tmin,tmax
2023-12-21,17.0,23.0,21,12,2023,verano,18.183105,24.738251
