In [2]:
import pandas as pd
from sklearn.model_selection import ParameterGrid
from neuralprophet import NeuralProphet, set_log_level, set_random_seed
from src.model.features import add_stock_price_feature

# Disable logging messages unless there is an error
set_log_level("ERROR")

df = pd.read_csv('data/stocks/2330_stock_data_0317.csv', parse_dates=['ds'])
df = add_stock_price_feature(df)
# plt = df.plot(x="ds", y="y", figsize=(15, 5))
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 566 entries, 4 to 569
Data columns (total 17 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Unnamed: 0                566 non-null    int64         
 1   ds                        566 non-null    datetime64[ns]
 2   open_price                566 non-null    float64       
 3   high_price                566 non-null    float64       
 4   low_price                 566 non-null    float64       
 5   y                         566 non-null    float64       
 6   volume                    566 non-null    int64         
 7   foreign                   566 non-null    int64         
 8   investment                566 non-null    int64         
 9   dealer                    566 non-null    int64         
 10  ratio_over_400_shares     566 non-null    float64       
 11  shareholders_400_to_600   566 non-null    int64         
 12  shareholders_600_to_80

In [3]:
from src.model.utils import val_mape

param_grid = {
    'foreign': [0, 5, 10],
    'investment': [0, 5, 10],
    'dealer': [0, 5, 10]
}

results = []
# Iterate over each combination of hyperparameters
for params in ParameterGrid(param_grid):
    # Initialize the NeuralProphet model with current hyperparameters
    print([params])
    m = NeuralProphet(
        yearly_seasonality=False,
        weekly_seasonality=True,
        n_lags=3,
    )
    m = m.add_country_holidays("TW")
    
    lag_regs = (
        # ('volume', 8),
        # ('high_low_diff', 10),
        ('foreign', params['foreign']),
        ('investment', params['investment']),
        ('dealer', params['dealer'])
    )
    columes = ['ds', 'y']
    for col, lag in lag_regs:
        if lag > 0:
            m.add_lagged_regressor(col, n_lags=lag)
            columes.append(col)
    
    df_train, df_val = m.split_df(df[columes], valid_p=0.2)
    set_random_seed(0)
    metrics = m.fit(df_train, validation_df=df_val)
    
    # Create a new dataframe reaching 365 into the future for our forecast, n_historic_predictions also shows historic data
    df_future = m.make_future_dataframe(df[columes], n_historic_predictions=True, periods=7)

    # Predict the future
    forecast = m.predict(df_future)
    
    rmse = metrics.iloc[-1]['RMSE_val']
    mape = val_mape(df_val, forecast)
    results.append({**params, 'RMSE': rmse, 'MAPE': mape})

[{'dealer': 0, 'foreign': 0, 'investment': 0}]
Training: |          | 0/? [00:00<?, ?it/s]



Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 266.02it/s]


Training: |          | 0/? [00:09<?, ?it/s, v_num=1440, MAE_val=14.80, RMSE_val=18.10, Loss_val=0.00162, RegLoss_val=0.000, train_loss=0.000691, reg_loss=0.000, MAE=8.240, RMSE=11.90, Loss=0.000666, RegLoss=0.000]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 223.14it/s]
[{'dealer': 0, 'foreign': 0, 'investment': 5}]
Training: |          | 0/? [00:00<?, ?it/s]


Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 272.53it/s]


Training: |          | 0/? [00:09<?, ?it/s, v_num=1441, MAE_val=14.50, RMSE_val=17.70, Loss_val=0.00157, RegLoss_val=0.000, train_loss=0.00068, reg_loss=0.000, MAE=8.430, RMSE=11.90, Loss=0.000678, RegLoss=0.000] 
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 193.15it/s]
[{'dealer': 0, 'foreign': 0, 'investment': 10}]
Training: |          | 0/? [00:00<?, ?it/s]


Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 277.26it/s]


Training: |          | 0/? [00:09<?, ?it/s, v_num=1442, MAE_val=15.30, RMSE_val=18.20, Loss_val=0.00165, RegLoss_val=0.000, train_loss=0.000701, reg_loss=0.000, MAE=8.220, RMSE=11.70, Loss=0.000667, RegLoss=0.000]
Epoch 140: 100%|██████████| 140/140 [00:21<00:00,  6.42it/s]    
Epoch 140: 100%|██████████| 140/140 [00:10<00:00, 12.79it/s]    
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 244.75it/s]
[{'dealer': 0, 'foreign': 5, 'investment': 0}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 275.72it/s]


Training: |          | 0/? [00:09<?, ?it/s, v_num=1443, MAE_val=15.00, RMSE_val=18.50, Loss_val=0.0017, RegLoss_val=0.000, train_loss=0.000663, reg_loss=0.000, MAE=8.290, RMSE=11.90, Loss=0.000663, RegLoss=0.000] 
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 322.59it/s]
[{'dealer': 0, 'foreign': 5, 'investment': 5}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 282.73it/s]


Training: |          | 0/? [00:09<?, ?it/s, v_num=1444, MAE_val=14.90, RMSE_val=18.20, Loss_val=0.00166, RegLoss_val=0.000, train_loss=0.000667, reg_loss=0.000, MAE=8.260, RMSE=11.70, Loss=0.00065, RegLoss=0.000] 
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 296.58it/s]
[{'dealer': 0, 'foreign': 5, 'investment': 10}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 278.14it/s]


Training: |          | 0/? [00:09<?, ?it/s, v_num=1445, MAE_val=15.70, RMSE_val=18.90, Loss_val=0.00179, RegLoss_val=0.000, train_loss=0.000662, reg_loss=0.000, MAE=8.110, RMSE=11.50, Loss=0.000634, RegLoss=0.000]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 244.35it/s]
[{'dealer': 0, 'foreign': 10, 'investment': 0}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 280.80it/s]


Training: |          | 0/? [00:09<?, ?it/s, v_num=1446, MAE_val=15.40, RMSE_val=18.80, Loss_val=0.00175, RegLoss_val=0.000, train_loss=0.000666, reg_loss=0.000, MAE=7.950, RMSE=11.30, Loss=0.000632, RegLoss=0.000]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 225.96it/s]
[{'dealer': 0, 'foreign': 10, 'investment': 5}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 276.02it/s]


Training: |          | 0/? [00:09<?, ?it/s, v_num=1447, MAE_val=15.50, RMSE_val=18.90, Loss_val=0.00178, RegLoss_val=0.000, train_loss=0.000656, reg_loss=0.000, MAE=8.100, RMSE=11.50, Loss=0.00063, RegLoss=0.000] 
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 242.71it/s]
[{'dealer': 0, 'foreign': 10, 'investment': 10}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 274.83it/s]


Training: |          | 0/? [00:09<?, ?it/s, v_num=1448, MAE_val=15.50, RMSE_val=18.60, Loss_val=0.00173, RegLoss_val=0.000, train_loss=0.000656, reg_loss=0.000, MAE=8.390, RMSE=12.00, Loss=0.000652, RegLoss=0.000]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 249.96it/s]
[{'dealer': 5, 'foreign': 0, 'investment': 0}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 273.20it/s]


Training: |          | 0/? [00:09<?, ?it/s, v_num=1449, MAE_val=15.80, RMSE_val=19.20, Loss_val=0.00185, RegLoss_val=0.000, train_loss=0.000667, reg_loss=0.000, MAE=8.330, RMSE=11.80, Loss=0.000663, RegLoss=0.000]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 249.90it/s]
[{'dealer': 5, 'foreign': 0, 'investment': 5}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 279.08it/s]


Training: |          | 0/? [00:09<?, ?it/s, v_num=1450, MAE_val=16.20, RMSE_val=19.60, Loss_val=0.00191, RegLoss_val=0.000, train_loss=0.000679, reg_loss=0.000, MAE=8.390, RMSE=11.90, Loss=0.000665, RegLoss=0.000]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 266.31it/s]
[{'dealer': 5, 'foreign': 0, 'investment': 10}]
Epoch 140: 100%|██████████| 140/140 [01:25<00:00,  1.63it/s]    
Epoch 140: 100%|██████████| 140/140 [01:14<00:00,  1.87it/s]   
Epoch 140: 100%|██████████| 140/140 [01:04<00:00,  2.18it/s]    
Epoch 140: 100%|██████████| 140/140 [00:53<00:00,  2.61it/s]    
Epoch 140: 100%|██████████| 140/140 [00:43<00:00,  3.25it/s]    
Epoch 140: 100%|██████████| 140/140 [00:32<00:00,  4.32it/s]
Epoch 140: 100%|██████████| 140/140 [00:21<00:00,  6.45it/s]   
Epoch 140: 100%|██████████| 140/140 [00:11<00:00, 12.72it/s]   
Epoch 140: 100%|██████████| 140/140 [00:00<00:00, 369.82it/s]   


Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 274.72it/s]


Training: |          | 0/? [00:09<?, ?it/s, v_num=1451, MAE_val=17.00, RMSE_val=20.40, Loss_val=0.00207, RegLoss_val=0.000, train_loss=0.000661, reg_loss=0.000, MAE=8.090, RMSE=11.60, Loss=0.000639, RegLoss=0.000]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 200.01it/s]
[{'dealer': 5, 'foreign': 5, 'investment': 0}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 280.14it/s]


Training: |          | 0/? [00:09<?, ?it/s, v_num=1452, MAE_val=15.80, RMSE_val=19.30, Loss_val=0.00186, RegLoss_val=0.000, train_loss=0.000658, reg_loss=0.000, MAE=8.120, RMSE=11.60, Loss=0.000644, RegLoss=0.000]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 257.05it/s]
[{'dealer': 5, 'foreign': 5, 'investment': 5}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 274.46it/s]


Training: |          | 0/? [00:09<?, ?it/s, v_num=1453, MAE_val=15.80, RMSE_val=19.30, Loss_val=0.00186, RegLoss_val=0.000, train_loss=0.00065, reg_loss=0.000, MAE=8.230, RMSE=12.10, Loss=0.000654, RegLoss=0.000] 
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 285.33it/s]
[{'dealer': 5, 'foreign': 5, 'investment': 10}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 272.28it/s]


Training: |          | 0/? [00:10<?, ?it/s, v_num=1454, MAE_val=16.90, RMSE_val=20.40, Loss_val=0.00207, RegLoss_val=0.000, train_loss=0.000649, reg_loss=0.000, MAE=8.110, RMSE=11.80, Loss=0.000636, RegLoss=0.000]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 221.86it/s]
[{'dealer': 5, 'foreign': 10, 'investment': 0}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 269.08it/s]


Training: |          | 0/? [00:10<?, ?it/s, v_num=1455, MAE_val=16.70, RMSE_val=20.30, Loss_val=0.00206, RegLoss_val=0.000, train_loss=0.000644, reg_loss=0.000, MAE=7.990, RMSE=11.40, Loss=0.00062, RegLoss=0.000] 
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 221.86it/s]
[{'dealer': 5, 'foreign': 10, 'investment': 5}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 273.45it/s]


Training: |          | 0/? [00:10<?, ?it/s, v_num=1456, MAE_val=16.30, RMSE_val=19.70, Loss_val=0.00194, RegLoss_val=0.000, train_loss=0.000644, reg_loss=0.000, MAE=8.150, RMSE=11.80, Loss=0.000631, RegLoss=0.000]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 315.43it/s]
[{'dealer': 5, 'foreign': 10, 'investment': 10}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 259.28it/s]


Training: |          | 0/? [00:10<?, ?it/s, v_num=1457, MAE_val=17.30, RMSE_val=21.00, Loss_val=0.00219, RegLoss_val=0.000, train_loss=0.000646, reg_loss=0.000, MAE=8.560, RMSE=12.10, Loss=0.000701, RegLoss=0.000]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 250.12it/s]
[{'dealer': 10, 'foreign': 0, 'investment': 0}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 276.92it/s]


Training: |          | 0/? [00:10<?, ?it/s, v_num=1458, MAE_val=16.50, RMSE_val=19.90, Loss_val=0.00198, RegLoss_val=0.000, train_loss=0.000676, reg_loss=0.000, MAE=8.030, RMSE=11.40, Loss=0.000642, RegLoss=0.000]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 250.02it/s]
[{'dealer': 10, 'foreign': 0, 'investment': 5}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 270.06it/s]


Training: |          | 0/? [00:10<?, ?it/s, v_num=1459, MAE_val=17.30, RMSE_val=20.90, Loss_val=0.00217, RegLoss_val=0.000, train_loss=0.00066, reg_loss=0.000, MAE=8.020, RMSE=11.50, Loss=0.000637, RegLoss=0.000] 
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 285.40it/s]
[{'dealer': 10, 'foreign': 0, 'investment': 10}]
Epoch 140: 100%|██████████| 140/140 [01:30<00:00,  1.54it/s]
Epoch 140: 100%|██████████| 140/140 [01:19<00:00,  1.75it/s]
Epoch 140: 100%|██████████| 140/140 [01:08<00:00,  2.03it/s]    
Epoch 140: 100%|██████████| 140/140 [00:57<00:00,  2.44it/s]    
Epoch 140: 100%|██████████| 140/140 [00:45<00:00,  3.06it/s]   
Epoch 140: 100%|██████████| 140/140 [00:34<00:00,  4.07it/s]    
Epoch 140: 100%|██████████| 140/140 [00:22<00:00,  6.15it/s]
Epoch 140: 100%|██████████| 140/140 [00:11<00:00, 12.05it/s]    
Epoch 140: 100%|██████████| 140/140 [00:00<00:00, 438.84it/s]   
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 270.82it/s]


Training: |          | 0/? [00:10<?, ?it/s, v_num=1460, MAE_val=17.50, RMSE_val=21.40, Loss_val=0.00228, RegLoss_val=0.000, train_loss=0.000668, reg_loss=0.000, MAE=8.280, RMSE=12.00, Loss=0.00066, RegLoss=0.000] 
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 199.94it/s]
[{'dealer': 10, 'foreign': 5, 'investment': 0}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 254.20it/s]


Training: |          | 0/? [00:10<?, ?it/s, v_num=1461, MAE_val=16.50, RMSE_val=20.10, Loss_val=0.00201, RegLoss_val=0.000, train_loss=0.000645, reg_loss=0.000, MAE=7.930, RMSE=11.40, Loss=0.000621, RegLoss=0.000]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 269.59it/s]
[{'dealer': 10, 'foreign': 5, 'investment': 5}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 272.17it/s]


Training: |          | 0/? [00:10<?, ?it/s, v_num=1462, MAE_val=16.40, RMSE_val=20.10, Loss_val=0.00202, RegLoss_val=0.000, train_loss=0.000646, reg_loss=0.000, MAE=8.120, RMSE=11.80, Loss=0.000637, RegLoss=0.000]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 202.54it/s]
[{'dealer': 10, 'foreign': 5, 'investment': 10}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 270.64it/s]


Training: |          | 0/? [00:09<?, ?it/s, v_num=1463, MAE_val=16.00, RMSE_val=19.40, Loss_val=0.00187, RegLoss_val=0.000, train_loss=0.000655, reg_loss=0.000, MAE=8.520, RMSE=12.10, Loss=0.000706, RegLoss=0.000]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 257.90it/s]
[{'dealer': 10, 'foreign': 10, 'investment': 0}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 270.49it/s]


Training: |          | 0/? [00:09<?, ?it/s, v_num=1464, MAE_val=16.20, RMSE_val=19.80, Loss_val=0.00196, RegLoss_val=0.000, train_loss=0.00064, reg_loss=0.000, MAE=8.130, RMSE=11.80, Loss=0.000629, RegLoss=0.000] 
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 191.31it/s]
[{'dealer': 10, 'foreign': 10, 'investment': 5}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 271.90it/s]


Training: |          | 0/? [00:10<?, ?it/s, v_num=1465, MAE_val=16.20, RMSE_val=19.70, Loss_val=0.00194, RegLoss_val=0.000, train_loss=0.000645, reg_loss=0.000, MAE=8.550, RMSE=12.10, Loss=0.000696, RegLoss=0.000]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 250.03it/s]
[{'dealer': 10, 'foreign': 10, 'investment': 10}]
Training: |          | 0/? [00:00<?, ?it/s]

Finding best initial lr: 100%|██████████| 219/219 [00:00<00:00, 267.52it/s]


Training: |          | 0/? [00:10<?, ?it/s, v_num=1466, MAE_val=16.30, RMSE_val=19.80, Loss_val=0.00195, RegLoss_val=0.000, train_loss=0.000644, reg_loss=0.000, MAE=8.230, RMSE=11.60, Loss=0.000624, RegLoss=0.000]
Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 249.94it/s]


In [4]:
results_df = pd.DataFrame(results)
results_df.to_csv('params.csv')
results_df

Unnamed: 0,dealer,foreign,investment,RMSE,MAPE
0,0,0,0,18.057192,0.013987
1,0,0,5,17.749821,0.013916
2,0,0,10,18.220335,0.014399
3,0,5,0,18.484234,0.014421
4,0,5,5,18.236444,0.014255
5,0,5,10,18.934738,0.014747
6,0,10,0,18.750698,0.014567
7,0,10,5,18.919357,0.014637
8,0,10,10,18.647554,0.014579
9,5,0,0,19.248236,0.015059


In [5]:
results_df.sort_values(by="RMSE").head(10)

Unnamed: 0,dealer,foreign,investment,RMSE,MAPE
1,0,0,5,17.749821,0.013916
0,0,0,0,18.057192,0.013987
2,0,0,10,18.220335,0.014399
4,0,5,5,18.236444,0.014255
3,0,5,0,18.484234,0.014421
8,0,10,10,18.647554,0.014579
6,0,10,0,18.750698,0.014567
7,0,10,5,18.919357,0.014637
5,0,5,10,18.934738,0.014747
9,5,0,0,19.248236,0.015059


In [6]:
results_df.sort_values(by="MAPE").head(10)

Unnamed: 0,dealer,foreign,investment,RMSE,MAPE
1,0,0,5,17.749821,0.013916
0,0,0,0,18.057192,0.013987
4,0,5,5,18.236444,0.014255
2,0,0,10,18.220335,0.014399
3,0,5,0,18.484234,0.014421
6,0,10,0,18.750698,0.014567
8,0,10,10,18.647554,0.014579
7,0,10,5,18.919357,0.014637
5,0,5,10,18.934738,0.014747
23,10,5,10,19.38682,0.014985
