In [6]:
import numpy as np
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
from sklearn.model_selection import GridSearchCV

In [2]:
X = np.load('np-array-h0.npy')
Y = np.load('np-array-h6.npy')
X_test = np.load('np-array-h0-2023.npy')
Y_test = np.load('np-array-h6-2023.npy')

# No tuning

In [3]:
LR = MultiOutputRegressor(LinearRegression())
LR.fit(X,Y)
print(f'R^2: {LR.score(X_test,Y_test)}')
print(f'MAE: {mean_absolute_error(Y_test,LR.predict(X_test))}')
print(f'MAPE: {mean_absolute_percentage_error(Y_test,LR.predict(X_test))}')

R^2: 0.5665391437294972
MAE: 0.5008983016014099
MAPE: 0.0016757517587393522


In [7]:
RF = MultiOutputRegressor(RandomForestRegressor(n_jobs=-1))
RF.fit(X,Y)
print(f'R^2: {RF.score(X_test,Y_test)}')
print(f'MAE: {mean_absolute_error(Y_test,RF.predict(X_test))}')
print(f'MAPE: {mean_absolute_percentage_error(Y_test,RF.predict(X_test))}')

R^2: 0.5670968634005283
MAE: 0.5019145787708343
MAPE: 0.0016794237840225493


In [8]:
KNN = MultiOutputRegressor(KNeighborsRegressor())
KNN.fit(X,Y)
print(f'R^2: {KNN.score(X_test,Y_test)}')
print(f'MAE: {mean_absolute_error(Y_test,KNN.predict(X_test))}')
print(f'MAPE: {mean_absolute_percentage_error(Y_test,KNN.predict(X_test))}')

R^2: 0.38619797489725133
MAE: 0.5971375107765198
MAPE: 0.0019977844785898924


In [9]:
DT = MultiOutputRegressor(DecisionTreeRegressor())
DT.fit(X,Y)
print(f'R^2: {DT.score(X_test,Y_test)}')
print(f'MAE: {mean_absolute_error(Y_test,DT.predict(X_test))}')
print(f'MAPE: {mean_absolute_percentage_error(Y_test,DT.predict(X_test))}')

R^2: 0.14707454217028645
MAE: 0.7016523588271368
MAPE: 0.0023476242051652494


# Tunning

In [12]:
LR_T = MultiOutputRegressor(LinearRegression(copy_X=True, fit_intercept=True, positive=False))
LR_T.fit(X,Y)
print(f'R^2: {LR.score(X_test,Y_test)}')
print(f'MAE: {mean_absolute_error(Y_test,LR_T.predict(X_test))}')
print(f'MAPE: {mean_absolute_percentage_error(Y_test,LR_T.predict(X_test))}')

R^2: 0.5665391437294972
MAE: 0.5008983016014099
MAPE: 0.0016757517587393522


In [142]:
RF_T = MultiOutputRegressor(RandomForestRegressor(n_jobs=-1, n_estimators=200, max_features=30, min_samples_leaf=3, criterion='friedman_mse' ))
RF_T.fit(X,Y)
print(f'R^2: {RF_T.score(X_test,Y_test)}')
print(f'MAE: {mean_absolute_error(Y_test,RF_T.predict(X_test))}')
print(f'MAPE: {mean_absolute_percentage_error(Y_test,RF_T.predict(X_test))}')

R^2: 0.5719131441700631
MAE: 0.4990948318161503
MAPE: 0.0016699927727830018


In [10]:
KNN_T = MultiOutputRegressor(KNeighborsRegressor(algorithm='ball_tree', leaf_size=25, metric='euclidean', n_neighbors=13, p=1, weights='distance'))
KNN_T.fit(X,Y)
print(f'R^2: {KNN_T.score(X_test,Y_test)}')
print(f'MAE: {mean_absolute_error(Y_test,KNN_T.predict(X_test))}')
print(f'MAPE: {mean_absolute_percentage_error(Y_test,KNN_T.predict(X_test))}')

R^2: 0.41973497645117364
MAE: 0.580835492674114
MAPE: 0.0019429772568749228


In [11]:
DT_T = MultiOutputRegressor(DecisionTreeRegressor(criterion='squared_error',max_depth=10, min_samples_leaf=3, min_samples_split=4, splitter='random'))
DT_T.fit(X,Y)
print(f'R^2: {DT_T.score(X_test,Y_test)}')
print(f'MAE: {mean_absolute_error(Y_test,DT_T.predict(X_test))}')
print(f'MAPE: {mean_absolute_percentage_error(Y_test,DT_T.predict(X_test))}')

R^2: 0.35619771202959416
MAE: 0.6137887485632039
MAPE: 0.0020535893532511555


In [74]:
latitudes = np.array([12.5, 10. ,  7.5 , 5.  , 2.5 , 0.,  -2.5 ,-5. ]).astype(np.float32)
longitudes = np.array([280.0, 282.5, 285.0, 287.5, 290.0, 292.5, 295.0]).astype(np.float32)
lats, lons = np.meshgrid(latitudes, longitudes)
points = np.vstack((lats.flatten(), lons.flatten())).T
points_selected = points[np.random.choice(len(points), 5, replace=False)]
points_selected

array([[  7.5, 280. ],
       [ -2.5, 290. ],
       [ 10. , 280. ],
       [  5. , 295. ],
       [  0. , 285. ]], dtype=float32)

In [115]:
def select_points(points):
    return points[np.random.choice(len(points), 5, replace=False)]

In [133]:
def get_selected_points_indx(selected_points, points):
    inde = []
    for point_selected in selected_points:
        condition_array = np.array(point_selected)
        # Find indices where the matrix matches the condition
        inde.append(np.where(np.all(points == condition_array, axis=1))[0][0])
    return inde

In [76]:
inde = []
for point_selected in points_selected:
    condition_array = np.array(point_selected)
    # Find indices where the matrix matches the condition
    inde.append(np.where(np.all(points == condition_array, axis=1))[0][0])
inde

[2, 38, 1, 51, 21]

In [77]:
X[0][inde]

array([297.59998, 298.09998, 297.71   , 300.59998, 297.8    ],
      dtype=float32)

In [73]:
N = 4 * 2 * 3
N

24

In [78]:
X_test.shape

(120, 56)

In [85]:
days = np.random.choice(len(X_test), 24)
days

array([ 90,  89,  81, 109,  67,  52,  98,  91, 113,  66,  96,  77,  98,
       112, 103,  26,  26,  82,  67, 116,  56,  69,  40,  23])

In [111]:
del select_points

In [145]:
list_pd = []

In [146]:
for day in days:
    selected_points = select_points(points)
    index_sel = get_selected_points_indx(selected_points, points)
    X_day = X_test[day]
    Y_day = Y_test[day]
    print('No')
    Y_pred = LR.predict([X_day])[0]
    print(f'error LR: {mean_absolute_error(y_true=Y_day[index_sel], y_pred=Y_pred[index_sel])}')
    list_pd.append(['LR','NO',mean_absolute_error(y_true=Y_day[index_sel], y_pred=Y_pred[index_sel])])
    Y_pred = RF.predict([X_day])[0]
    print(f'error RF: {mean_absolute_error(y_true=Y_day[index_sel], y_pred=Y_pred[index_sel])}')
    list_pd.append(['RF','NO',mean_absolute_error(y_true=Y_day[index_sel], y_pred=Y_pred[index_sel])])
    Y_pred = DT.predict([X_day])[0]
    print(f'error DT: {mean_absolute_error(y_true=Y_day[index_sel], y_pred=Y_pred[index_sel])}')
    list_pd.append(['DT','NO',mean_absolute_error(y_true=Y_day[index_sel], y_pred=Y_pred[index_sel])])
    Y_pred = KNN.predict([X_day])[0]
    print(f'error KNN: {mean_absolute_error(y_true=Y_day[index_sel], y_pred=Y_pred[index_sel])}')
    list_pd.append(['KNN','NO',mean_absolute_error(y_true=Y_day[index_sel], y_pred=Y_pred[index_sel])])
    print('Sí')
    Y_pred_T = LR.predict([X_day])[0]
    print(f'error LR_T: {mean_absolute_error(y_true=Y_day[index_sel], y_pred=Y_pred_T[index_sel])}')
    list_pd.append(['LR','SI',mean_absolute_error(y_true=Y_day[index_sel], y_pred=Y_pred_T[index_sel])])
    Y_pred_T = RF.predict([X_day])[0]
    print(f'error RF_T: {mean_absolute_error(y_true=Y_day[index_sel], y_pred=Y_pred_T[index_sel])}')
    list_pd.append(['RF','SI',mean_absolute_error(y_true=Y_day[index_sel], y_pred=Y_pred_T[index_sel])])
    Y_pred_T = DT.predict([X_day])[0]
    print(f'error DT_T: {mean_absolute_error(y_true=Y_day[index_sel], y_pred=Y_pred_T[index_sel])}')
    list_pd.append(['DT','SI',mean_absolute_error(y_true=Y_day[index_sel], y_pred=Y_pred_T[index_sel])])
    Y_pred_T = KNN.predict([X_day])[0]
    print(f'error KNN_T: {mean_absolute_error(y_true=Y_day[index_sel], y_pred=Y_pred_T[index_sel])}')
    list_pd.append(['KNN','SI',mean_absolute_error(y_true=Y_day[index_sel], y_pred=Y_pred_T[index_sel])])
    print('****************************')

No
error LR: 0.509082019329071
error RF: 0.3399773559570122
error DT: 0.850006103515625
error KNN: 0.7268005609512329
Sí
error LR_T: 0.509082019329071
error RF_T: 0.3399773559570122
error DT_T: 0.850006103515625
error KNN_T: 0.7268005609512329
****************************
No
error LR: 0.5725036859512329
error RF: 0.3041829223632817
error DT: 0.835009765625
error KNN: 0.3562072813510895
Sí
error LR_T: 0.5725036859512329
error RF_T: 0.3041829223632817
error DT_T: 0.835009765625
error KNN_T: 0.3562072813510895
****************************
No
error LR: 0.4940185546875
error RF: 0.5769204711914085
error DT: 1.04500732421875
error KNN: 0.47800904512405396
Sí
error LR_T: 0.4940185546875
error RF_T: 0.5769204711914085
error DT_T: 1.04500732421875
error KNN_T: 0.47800904512405396
****************************
No
error LR: 0.578387439250946
error RF: 0.498681640625
error DT: 1.029498291015625
error KNN: 0.639910876750946
Sí
error LR_T: 0.578387439250946
error RF_T: 0.498681640625
error DT_T: 1.02

In [148]:
import pandas as pd

In [150]:
df = pd.DataFrame(list_pd,columns=['A','B','y'])
df

Unnamed: 0,A,B,y
0,LR,NO,0.509082
1,RF,NO,0.339977
2,DT,NO,0.850006
3,KNN,NO,0.726801
4,LR,SI,0.509082
...,...,...,...
187,KNN,NO,0.544501
188,LR,SI,0.445044
189,RF,SI,0.561181
190,DT,SI,0.634497


In [155]:
df.to_excel('results.xlsx', index=None)

In [None]:
for day in days:
    X_day = X_test[day]
    Y_day = Y_test[day]
    Y_pred = LR.predict([X_day])[0]
    selected_points = select_points(points)
    index_sel = get_selected_points_indx(selected_points, points)
    print(index_sel)
    print(mean_absolute_error(y_true=Y_day[index_sel], y_pred=Y_pred[index_sel]))