# 超參數及集成學習權重最佳化

我們的目的是要以前一天的氣象預報來預測當天的電力資訊，但是我們手上的氣象預報歷史資料是從2024年七月開始蒐集，直接拿來預測電力資訊天數不夠，  
所以除了 Power_predict.ipynb 裡面敘述的從氣象觀測歷史資料預測電力資料的模型之外，我們還需要建立從氣象預報資料來預測氣象觀測資料的模型。  

這裡的氣象資料預測的模型建立方式跟電力資料的模型大同小異，主要的不同點是氣象資料預測可以把每天每站的數據當成一個樣本，這樣我們就可以在相對短時間之內累積足夠的樣本數。

同時這個筆記本也要處理超參數的最佳化，我們使用 optuna 這個第三方套件來達成這個任務。  
另外我們也嘗試最佳化集成學習時各模型的權重，具體方法為計算出模型之間的誤差相關矩陣，再從這個矩陣解出最佳權重組合。

## 初始化

### 匯入模組與套件

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
#這兩行讓 matplotlib 的圖可以顯示中文，同時正常顯示負號
matplotlib.rc('font', family='Microsoft JhengHei')
plt.rcParams['axes.unicode_minus'] = False
import datetime
from copy import deepcopy
import os
import joblib
import json
from tqdm import tqdm
import optuna

# 設置Optuna日誌級別為 WARNING，僅顯示警告及以上級別的信息
optuna.logging.set_verbosity(optuna.logging.WARNING)

pd.set_option('future.no_silent_downcasting', True)

In [2]:
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold
from sklearn.svm import SVR, NuSVR

In [3]:
from sklearn.svm import SVC, NuSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, LinearRegression
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.optimize import minimize
from sklearn.metrics import f1_score

In [5]:
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

In [6]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim

In [7]:
from Pytorch_models.metrics import Array_Metrics
from Pytorch_models import models as pytorch_models
from Pytorch_models import api
MAE = Array_Metrics.mae
R2_score = Array_Metrics.r2

In [8]:
from utils.prepare_data import prepare_forecast_observation_df, prepare_data

In [9]:
def FCN_model(input_f, output_f, feature_counts, dropout_factor=0, L2_factor=1e-15, mode='regressor'):
    if mode == 'regressor':
        model = pytorch_models.SimpleNN(input_f, output_f, feature_counts, dropout_factor)
    elif mode == 'classifier':
        model = pytorch_models.SimpleNN_classifer(input_f, output_f, feature_counts, dropout_factor)
    Model_API = api.Model_API(model, L2_factor=L2_factor, classifer=(mode=='classifier'))
    return Model_API

### 初始參數

In [10]:
speed_test = False

# 資料的開始與結束日期
start_date = '2023-08-01'
end_date = '2025-01-01'

# 此值為 False 則重新計算，True 則從存檔中讀取
optuna_done = {
    '日照率': False,
    '最高氣溫': False,
    '最低氣溫': False,
    '氣溫': False,
    '風速': False,
    '風力': False,
    '太陽能': False,
    '尖峰負載': False,
    '夜尖峰': False,
}

weights_determined = {
    '日照率': False,
    '最高氣溫': False,
    '最低氣溫': False,
    '氣溫': False,
    '風速': False,
    '風力': False,
    '太陽能': False,
    '尖峰負載': False,
    '夜尖峰': False,
}

In [11]:
# 定義每個 model_label 對應的 model
model_class_dict = {}
model_class_dict['regressor'] = {
    'LinearRegression': LinearRegression,
    'RandomForest': RandomForestRegressor,
    'XGBoost': XGBRegressor,
    'LightGBM': LGBMRegressor,
    'SVR': SVR,
    'NuSVR': NuSVR,
    'FCN': FCN_model,
}
model_class_dict['classifier'] = {
    'RandomForest': RandomForestClassifier,
    'XGBoost': XGBClassifier,
    'LightGBM': LGBMClassifier,
    'SVC': SVC,
    'NuSVC': NuSVC,
    'LogisticRegression': LogisticRegression,
    'FCN': FCN_model,
}

### 讀取資料

讀取先前經由爬蟲定時抓取的預報與觀測資料

In [12]:
data_path = './historical/data/'
train_model_path = './trained_model_parameters/model_meta_2024-09-03/'

In [13]:
forecast_obs_df = prepare_forecast_observation_df(data_path, start_date=start_date, end_date=end_date)
weather_power_df = prepare_data(data_path, start_date=start_date, end_date=end_date)

## 函數

### 超參數最佳化

這部分的函數有：  
1. get_XY: 從 DataFrame 中提取需要的 X 與 Y 兩個 numpy array。
2. five_fold_test: 執行一次 5-fold 測試，會呼叫 get_XY_from_forecast_and_observation。
3. assign_model: 根據 model_label 與超參數字典建立一個模型。
3. hyperparameter_tuning: 針對特定的模型與超參數組合，呼叫 five_fold_test 執行多次 5-fold 測試，並回傳 R2 值。
4. optuna_operation: 利用第三方套件 optuna 執行超參數調整，會呼叫 hyperparameter_tuning。

流程控制函數 flow_control 會呼叫 optuna_operation，而主程式只會直接呼叫 flow_control。

In [14]:
def get_XY(data_df, Y_feature, X_features=None):
    date_related_cols = ['日期數字', '假日', '週六', '週日', '補班', '1~3月', '11~12月', '白日長度']
    
    if Y_feature in ['最高氣溫', '最低氣溫', '氣溫', '風速', '日照率', '全天空日射量']:
        target = 'obs'
    elif Y_feature in ['風力', '太陽能', '尖峰負載', '夜尖峰']:
        target = 'pwd'

    X_cols = []
    if X_features is None:
        for this_col in data_df.columns:
            if '_' in this_col:
                X_cols.append(this_col)
        if target == 'pwd':
            X_cols += date_related_cols
    else:
        for col in data_df.columns:
            if target == 'obs':
                dash_splited = col.split('預報_')
            elif target == 'pwd':
                dash_splited = col.split('_')
            if len(dash_splited) >= 2:
                if dash_splited[0] in X_features:
                    X_cols.append(col)
            else:
                if col in date_related_cols and col in X_features:
                    X_cols.append(col)

    Xs = np.array(data_df[X_cols])
    Ys = np.array(data_df[Y_feature])

    Xs = Xs[np.invert(np.isnan(Ys)),:]
    Ys = Ys[np.invert(np.isnan(Ys))]

    return Xs, Ys, X_cols

In [15]:
def five_fold_test(Xs, Ys, model=XGBRegressor(), mode='regressor',
                   deep_learning=False, fold_n=5, standard_scale=True, always_test_last_chunk=False):
    
    def metric(Y_test, Y_pred, mode=mode):
        if mode == 'regressor':
            return 1 - np.mean((Y_test - Y_pred)**2) / np.var(Y_test)
        elif mode == 'classifier':
            return f1_score(Y_test, Y_pred)

    shuffle = not always_test_last_chunk
    kf = KFold(n_splits=fold_n, shuffle=shuffle)
    
    XY_folds = {}
    for i, (train_index, test_index) in enumerate(kf.split(Xs)):
        XY_folds[i] = (train_index, test_index)
    
    metric_test_list, metric_train_list = [], []

    if always_test_last_chunk:
        iters = [fold_n-1]
    else:
        iters = range(fold_n)
    
    for i in iters:
        if deep_learning:
            input_f = model.model.params['input_f']
            output_f = model.model.params['output_f']
            feature_counts = model.model.params['feature_counts']
            dropout_factor = model.model.params['dropout_factor']
            L2_factor = model.L2_factor
            model = FCN_model(input_f=input_f, output_f=output_f, feature_counts=feature_counts,
                              dropout_factor=dropout_factor, L2_factor=L2_factor,mode=mode)
            
        X_train = Xs[XY_folds[i][0]]
        X_test = Xs[XY_folds[i][1]]
        Y_train = Ys[XY_folds[i][0]]
        Y_test = Ys[XY_folds[i][1]]

        if deep_learning:
            X_train_DL, X_val, Y_train_DL, Y_val = train_test_split(X_train, Y_train, test_size=0.20)
    
        if standard_scale:
            scaler = StandardScaler()
            scaler.fit(X_train)
            X_train = scaler.transform(X_train)
            X_test = scaler.transform(X_test)
            if deep_learning:
                X_val = scaler.transform(X_val)
            
        if deep_learning:
            _ = model.fit(X_train_DL, Y_train_DL, X_val, Y_val)
        else:
            _ = model.fit(X_train, Y_train)
    
        Y_pred = model.predict(X_test)
        metric_test_list.append(metric(Y_test, Y_pred))

        Y_pred = model.predict(X_train)
        metric_train_list.append(metric(Y_train, Y_pred))

    metric_test = np.mean(metric_test_list)
    metric_train = np.mean(metric_train_list)
    return metric_test, metric_train

In [16]:
def assign_model(model_label, Xs, cfg, mode):
    
    if model_label == 'LightGBM':
        model = model_class_dict[mode][model_label](force_col_wise=True, verbose=-1, **cfg)
    elif model_label == 'FCN':
        model = model_class_dict[mode][model_label](input_f=Xs.shape[1], output_f=1, feature_counts=[16, 16, 16, 8], mode=mode, **cfg)
    else:
        model = model_class_dict[mode][model_label](**cfg)

    return model

In [17]:
def hyperparameter_tuning(trial, Xs, Ys, mode='regressor',
                          model_label='RandomForest', n_iters=50, always_test_last_chunk=False):

    deep_learning = model_label in ['FCN']
    standard_scale = not deep_learning

    if model_label in ['RandomForest', 'XGBoost', 'LightGBM']:
        cfg = {'max_depth': trial.suggest_int('max_depth', 2, 15),
               'n_estimators': trial.suggest_int('n_estimators', 10, 200)}     
    elif model_label in ['SVR', 'SVC']:
        cfg = {'C': trial.suggest_float('C', 1e-3, 2e2, log=True),
               'kernel': trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid'])}
    elif model_label in ['NuSVR', 'NuSVC']:
        cfg = {'C': trial.suggest_float('C', 1e-3, 2e2, log=True),
               'kernel': trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid']),
               'nu': trial.suggest_float('nu', 0.1, 0.9)}
    elif model_label == 'FCN':
        cfg = {'L2_factor': trial.suggest_float('L2_factor', 1e-3, 1, log=True),
               'dropout_factor': trial.suggest_float('dropout_factor', 0, 0.5)}
    elif model_label == 'LogisticRegression':
        cfg = {'C': trial.suggest_float('C', 1e-3, 2e2, log=True),
               'solver': trial.suggest_categorical('solver', ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'])}
    elif model_label == 'LinearRegression':
        cfg = {}

    model = assign_model(model_label, Xs, cfg, mode)
   
    metric_list = []
    iterator = range(n_iters)
    for i in iterator:
        metric, _ = five_fold_test(Xs, Ys, model, mode=mode, 
                                   deep_learning=deep_learning, standard_scale=standard_scale, always_test_last_chunk=always_test_last_chunk)
        metric_list.append(metric)

    return np.mean(metric_list) - np.std(metric_list)

In [18]:
def optuna_operation(model_xcols, Y_feature, data_df, mode='regressor', speed_test=False,
                     optuna_n_trials=30, n_iters=30, always_test_last_chunk=False):

    if mode == 'regressor':
        metric_name = 'R2'
    elif mode == 'classifier':
        metric_name = 'F1'
        
    model_hyperparameters_dict = {}
    model_r2_dict = {}
    
    if always_test_last_chunk:
        n_iters = 1

    model_labels = list(model_xcols.keys())
    
    for model_label in model_labels:
        X_features = model_xcols[model_label]
        Xs, Ys, _ = get_XY(data_df, Y_feature, X_features)

        this_n_iters = n_iters
        this_optuna_n_trials = optuna_n_trials

        if model_label == 'FCN':
            this_n_iters = min(this_n_iters, 1)
            if speed_test:
                this_optuna_n_trials = 4

        if model_label == 'LinearRegression':
            this_optuna_n_trials = 1
            this_n_iters = 10
            
        def target_func(trial, model_label=model_label, Xs=Xs, Ys=Ys, mode=mode,
                        n_iters=this_n_iters, always_test_last_chunk=always_test_last_chunk):
            return hyperparameter_tuning(trial, model_label=model_label, Xs=Xs, Ys=Ys, mode=mode,
                                         n_iters=n_iters, always_test_last_chunk=always_test_last_chunk)
        
        sampler = optuna.samplers.TPESampler()
        study = optuna.create_study(sampler=sampler, direction='maximize')
        with tqdm(total=this_optuna_n_trials) as pbar:
            for _ in range(this_optuna_n_trials):
                study.optimize(target_func, n_trials=1, catch=(Exception,))
                pbar.update(1)
        
        print(model_label)
        for key, v in study.best_params.items():
            print(f"Best {key} = {v}")
        print(f"Best {metric_name} = {study.best_value}")
    
        model_hyperparameters_dict[model_label] = study.best_params
        model_r2_dict[model_label] = study.best_value

    return model_hyperparameters_dict, model_r2_dict

### Ensemble

這部分的函數有：
1. cross_correlation_matrix: 由不同模型的預測誤差產生相關矩陣。
2. sovle_optimal_weights: 由誤差相關矩陣解出最佳權重。
3. find_avg_score_with_given_model_list: 算出 N 組模型預測誤差樣本，再從中解出最佳權重，並提供不同模型的平均誤差。
4. save_model_metadata: 儲存這份筆記本得到的每個被預測值所採用的模型組合，以及每個模型採用的特徵、超參數與權重。

流程控制函數中會呼叫 find_avg_score_with_given_model_list 與 save_model_metadata

In [19]:
def cross_correlation_matrix(residuals):
    N = len(residuals)
    matrix = np.zeros((N, N))
    for i in range(N):
        for j in range(i, N):
            matrix[i][j] = np.mean(np.array(residuals[i]) * np.array(residuals[j]))

    for i in range(1, N):
        for j in range(i):
            matrix[i][j] = matrix[j][i]

    return matrix

In [20]:
def sovle_optimal_weights(matrix):
    N = matrix.shape[0]
    def objective(weights):
        return weights.T @ matrix @ weights

    initial_weights = np.array([1/N] * N)
    constraints = ({'type': 'eq', 'fun': lambda w: np.sum(w) - 1})
    bounds = [(0, 1)] * N
    result = minimize(objective, initial_weights, method='SLSQP', bounds=bounds, constraints=constraints)
    
    optimal_weights = result.x
    return optimal_weights

In [21]:
def predict(model_label, Y_train, train_ind, test_ind, mode,
            model_hyperparameters_dict, model_xcols, data_df, Y_feature):
    
    X_features = model_xcols[model_label]
    Xs, _, _ = get_XY(data_df, Y_feature=Y_feature, X_features=X_features)

    model = assign_model(model_label, Xs, cfg=model_hyperparameters_dict[model_label], mode=mode)

    deep_learning = False
    if model_label == 'FCN':
        deep_learning = True

    X_train = Xs[train_ind]
    X_test = Xs[test_ind]
    
    if deep_learning:
        X_train_dl, X_val, Y_train_dl, Y_val = train_test_split(X_train, Y_train, test_size=0.20)
        _ = model.fit(X_train_dl, Y_train_dl, X_val, Y_val)
    else:
        scaler = StandardScaler()
        X_scaler = scaler.fit(X_train)
        X_train = X_scaler.transform(X_train)
        X_test = X_scaler.transform(X_test)
        _ = model.fit(X_train, Y_train)
    YP = model.predict(X_test)
    return YP 

In [22]:
def get_residual_corr_matrix(model_hyperparameters_dict, ensemble_models, model_xcols,
                             data_df, Ys, Y_feature, mode,
                             n_iters, n_samples):
    
    def get_prediction_func(model_hyperparameters_dict=model_hyperparameters_dict,
                            model_xcols=model_xcols,
                            data_df=data_df,
                            Y_feature=Y_feature,
                            mode=mode):
        def func(model_label, Y_train, train_ind, test_ind):
            return predict(model_label, Y_train, train_ind, test_ind, mode,
                           model_hyperparameters_dict, model_xcols, data_df, Y_feature)
        return func
        
    get_prediction = get_prediction_func()
    
    Y_pred_iters, Y_test_iters, model_metric = [], [], []
    matrix = np.zeros((len(ensemble_models), len(ensemble_models)))
    for i in tqdm(range(n_iters)):
        train_ind, test_ind, _, _ = train_test_split(np.arange(n_samples), np.arange(n_samples), test_size=0.2)
        
        Y_train = Ys[train_ind]
        Y_test = Ys[test_ind]
        
        Y_preds, this_metric = [], []
        for model_label in ensemble_models:
            YP = get_prediction(model_label, Y_train, train_ind, test_ind)
            if mode == 'regressor':
                this_metric.append(MAE(Y_test, YP))
            elif mode == 'classifier':
                YP[np.where(YP<0.5)] = 0
                YP[np.where(YP>=0.5)] = 1
                this_metric.append(f1_score(Y_test, YP))
            Y_preds.append(YP)
            
        residuals = Y_preds - np.array([Y_test] * len(Y_preds)).reshape(len(Y_preds),-1)
        matrix += cross_correlation_matrix(residuals)

        model_metric.append(this_metric)
        Y_pred_iters.append(Y_preds)
        Y_test_iters.append(Y_test)
    matrix = matrix / n_iters
    return matrix, model_metric, Y_pred_iters, Y_test_iters

In [23]:
def get_weighted_ensemble_metric(Y_pred_iters, Y_test_iters, weights, mode):
    n_iters = len(Y_pred_iters)
    weighted_metric = []
    for i in range(n_iters):
        weighted_YP = np.sum(Y_pred_iters[i] * np.concatenate([weights.reshape(-1,1),] * Y_test_iters[0].shape[0], axis = 1), axis=0)
        if mode == 'regressor':
            weighted_metric.append(MAE(Y_test_iters[i], weighted_YP))
        elif mode == 'classifier':
            weighted_YP[np.where(weighted_YP<0.5)] = 0
            weighted_YP[np.where(weighted_YP>=0.5)] = 1
            weighted_metric.append(f1_score(Y_test_iters[i], weighted_YP))
    weighted_metric = np.array(weighted_metric).reshape(-1, 1)
    return weighted_metric

In [24]:
def find_optimal_weights(model_hyperparameters_dict, model_xcols, 
                         data_df, Y_feature, mode='regressor', 
                         n_iters=200, weights=None):

    if mode == 'regressor':
        metric_name = 'MAE'
    elif mode == 'classifier':
        metric_name = 'F1'       
    
    if weights is None:
        ensemble_models = list(model_hyperparameters_dict.keys())
    else:
        ensemble_models = list(weights.keys())

    n_models = len(ensemble_models)

    X_features = model_xcols[ensemble_models[0]]
    Xs, Ys, _ = get_XY(data_df, Y_feature=Y_feature, X_features=X_features)
    n_samples = Xs.shape[0]

    matrix, model_metric, Y_pred_iters, Y_test_iters = get_residual_corr_matrix(model_hyperparameters_dict=model_hyperparameters_dict,
                                                                                model_xcols=model_xcols, ensemble_models=ensemble_models,
                                                                                data_df=data_df, Ys=Ys, Y_feature=Y_feature, mode=mode,
                                                                                n_iters=n_iters, n_samples=n_samples)
    
    if weights is None:
        optimal_weights = sovle_optimal_weights(matrix)
    else:
        optimal_weights = weights

    uniform_weights = np.array([1/n_models] * n_models)
    uniform_metric = get_weighted_ensemble_metric(Y_pred_iters, Y_test_iters, uniform_weights, mode)
    optimal_metric = get_weighted_ensemble_metric(Y_pred_iters, Y_test_iters, optimal_weights, mode)

    array_metric = np.concatenate([model_metric, uniform_metric, optimal_metric], axis=1)
    
    metric_dict = {
        'Model': ensemble_models + ['Ensemble', 'Weighted_Ensemble'],
        f'Avg {metric_name}': list(np.mean(array_metric, axis=0)), 
        f'Std {metric_name}': list(np.std(array_metric, axis=0)),
        '90th percentile': list(np.sort(array_metric, axis=0)[int(array_metric.shape[0] * 0.9) - 1]),
        '10th percentile': list(np.sort(array_metric, axis=0)[int(array_metric.shape[0] * 0.1) - 1])
        }
    
    df = pd.DataFrame(metric_dict)
    if mode == 'regressor':
        df = df.sort_values('90th percentile').reset_index(drop=True)
    elif mode == 'classifier':
        df = df.sort_values('10th percentile', ascending=False).reset_index(drop=True)

    if weights is not None:
        return df
        
    optimal_weights_dict = {ensemble_models[i]: w for i, w in enumerate(optimal_weights)}
        
    return df, optimal_weights_dict

In [25]:
def save_model_metadata(file_path, model_xcols, model_hyperparameters_dict, optimal_weights):
    model_labels = list(model_hyperparameters_dict)
    output_dict = {
        'X_feature_dict':{},
        'hyperparameters_dict':{},
        'weights':{}
    }
    for model_label in model_labels:
        if optimal_weights[model_label] > 0.0005:
            output_dict['X_feature_dict'][model_label] = model_xcols[model_label]
            output_dict['hyperparameters_dict'][model_label] = model_hyperparameters_dict[model_label]
            output_dict['weights'][model_label] = optimal_weights[model_label]

    with open(file_path, 'w') as f:
        json.dump(output_dict, f)

### 流程控制

主要被主程式呼叫的函數  
負責管理超參數及權重的計算與存取

In [26]:
def flow_control(Y_feature, model_xcols, data_df, mode='regressor', speed_test=False,
                 train_model_path=train_model_path, optuna_done=optuna_done, weights_determined=weights_determined):

    n_iter_dict = {
        'hyper_parameter': 15,
        'ensemble_weight': 200
    }
    if speed_test:
        n_iter_dict = {
            'hyper_parameter': 1,
            'ensemble_weight': 20
        }
    
    this_model_path = f'{train_model_path}{Y_feature}/'
    os.makedirs(this_model_path, exist_ok=True)

    # 如果指定的 meta 檔存在，並且初始參數規定不須重新計算，則套用存檔數值。
    if os.path.exists(f'{train_model_path}{Y_feature}/meta.json'):
        with open(f'{train_model_path}{Y_feature}/meta.json', 'r') as f:
            meta = json.load(f)
    else:
        optuna_done[Y_feature] = False
        weights_determined[Y_feature] = False

    # 超參數
    print('Start to tune hyperparameters')
    if optuna_done[Y_feature]:
        model_xcols = meta['X_feature_dict']
        model_hyperparameters_dict = meta['hyperparameters_dict']
    else: 
        model_hyperparameters_dict, model_r2_dict = optuna_operation(model_xcols, Y_feature, data_df, mode=mode,
                                                                     n_iters=n_iter_dict['hyper_parameter'], speed_test=speed_test)
    
    # 集成權重
    print('Start to determine Ensemble weights.')
    if weights_determined[Y_feature]:
        optimal_weights = meta['weights']
        df = pd.read_csv(f'{this_model_path}predict_MAE.df')
        display(df)
        print('Weights:')
        for i, k in enumerate(model_hyperparameters_dict.keys()):
            print(f'{k}: {optimal_weights[k]:.3f}')
    else:
        if 'FCN' in model_hyperparameters_dict.keys():
            n_iters = int(n_iter_dict['ensemble_weight']/4)
        else:
            n_iters = n_iter_dict['ensemble_weight']
        df, optimal_weights = find_optimal_weights(model_hyperparameters_dict, model_xcols, data_df, Y_feature=Y_feature, 
                                                   mode=mode, n_iters=n_iters)
        display(df)
        df.to_csv(f'{this_model_path}predict_MAE.df', index=False, encoding='utf-8-sig')
        print('Weights:')
        for i, k in enumerate(model_hyperparameters_dict.keys()):
            print(f'{k}: {optimal_weights[k]:.3f}')

    print(' ')
    print(' ')
    print('**Copy and Paste following lines into the next cell.**')
    for model_label in model_hyperparameters_dict.keys():
        print('##### ' + model_label)
        for key, v in model_hyperparameters_dict[model_label].items():
            print(f"Best {key} = {v}  ")
        if 'model_r2_dict' in locals().keys():
            print(f"Best R2 = {model_r2_dict[model_label]}  ")
        print(f'Weight = {optimal_weights[model_label]:.3f}')
    print(' ')
    print(' ')
    
    save_model_metadata(this_model_path + 'meta.json', model_xcols, model_hyperparameters_dict, optimal_weights)

## 預測氣象數值

### 日照率

In [27]:
# 被預測的標的
Y_feature = '日照率'
# 定義集成學習使用的模型以及模型們各自使用的 X 特徵
model_xcols = {
        'RandomForest': ['晴', '多雲', '陰', '短暫陣雨', '短暫陣雨或雷雨', '午後短暫雷陣雨', '陣雨或雷雨', '相對溼度'],
        'XGBoost': ['晴', '多雲', '陰', '短暫陣雨', '短暫陣雨或雷雨', '午後短暫雷陣雨', '陣雨或雷雨', '相對溼度'],
        'LightGBM': ['晴', '多雲', '陰', '短暫陣雨', '短暫陣雨或雷雨', '午後短暫雷陣雨', '陣雨或雷雨', '相對溼度'],
        'SVR': ['晴', '多雲', '陰', '短暫陣雨', '短暫陣雨或雷雨', '午後短暫雷陣雨', '陣雨或雷雨', '相對溼度'],
        'NuSVR': ['晴', '多雲', '陰', '短暫陣雨', '短暫陣雨或雷雨', '午後短暫雷陣雨', '陣雨或雷雨', '相對溼度'],
        'FCN': ['晴', '多雲', '陰', '短暫陣雨', '短暫陣雨或雷雨', '午後短暫雷陣雨', '陣雨或雷雨', '相對溼度'],
    }

flow_control(Y_feature, model_xcols, forecast_obs_df, speed_test=speed_test)

Start to tune hyperparameters


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [05:38<00:00, 11.28s/it]


RandomForest
Best max_depth = 11
Best n_estimators = 146
Best R2 = 0.5870948515049399


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [02:03<00:00,  4.12s/it]


XGBoost
Best max_depth = 2
Best n_estimators = 23
Best R2 = 0.5890760918959569


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [01:57<00:00,  3.92s/it]


LightGBM
Best max_depth = 6
Best n_estimators = 34
Best R2 = 0.501255556359024


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:43<00:00,  1.45s/it]


SVR
Best C = 10.510584166812476
Best kernel = sigmoid
Best R2 = 0.6175865641668637


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [01:39<00:00,  3.32s/it]


NuSVR
Best C = 9.024585024739107
Best kernel = sigmoid
Best nu = 0.7512492841501225
Best R2 = 0.6253677869498243


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [20:53<00:00, 41.79s/it]


FCN
Best L2_factor = 0.021517425264349053
Best dropout_factor = 0.36979837303627927
Best R2 = 0.5866018727468167
Start to determine Ensemble weights.


100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [07:19<00:00,  8.79s/it]


Unnamed: 0,Model,Avg MAE,Std MAE,90th percentile,10th percentile
0,Weighted_Ensemble,13.595763,1.610992,15.473017,11.486974
1,NuSVR,13.815586,1.604628,15.656854,11.581361
2,Ensemble,13.905168,1.589992,15.775646,11.783316
3,SVR,13.76591,1.561043,15.834404,11.394574
4,RandomForest,14.645338,1.728273,16.471008,12.23937
5,XGBoost,14.426582,1.76797,16.539145,12.007534
6,FCN,15.270799,2.071383,17.468977,12.85259
7,LightGBM,16.162745,1.623713,17.753286,14.22541


Weights:
RandomForest: 0.038
XGBoost: 0.142
LightGBM: 0.000
SVR: 0.330
NuSVR: 0.343
FCN: 0.147
 
 
**Copy and Paste following lines into the next cell.**
##### RandomForest
Best max_depth = 11  
Best n_estimators = 146  
Best R2 = 0.5870948515049399  
Weight = 0.038
##### XGBoost
Best max_depth = 2  
Best n_estimators = 23  
Best R2 = 0.5890760918959569  
Weight = 0.142
##### LightGBM
Best max_depth = 6  
Best n_estimators = 34  
Best R2 = 0.501255556359024  
Weight = 0.000
##### SVR
Best C = 10.510584166812476  
Best kernel = sigmoid  
Best R2 = 0.6175865641668637  
Weight = 0.330
##### NuSVR
Best C = 9.024585024739107  
Best kernel = sigmoid  
Best nu = 0.7512492841501225  
Best R2 = 0.6253677869498243  
Weight = 0.343
##### FCN
Best L2_factor = 0.021517425264349053  
Best dropout_factor = 0.36979837303627927  
Best R2 = 0.5866018727468167  
Weight = 0.147
 
 


##### RandomForest
Best max_depth = 11  
Best n_estimators = 146  
Best R2 = 0.5870948515049399  
Weight = 0.038
##### XGBoost
Best max_depth = 2  
Best n_estimators = 23  
Best R2 = 0.5890760918959569  
Weight = 0.142
##### LightGBM
Best max_depth = 6  
Best n_estimators = 34  
Best R2 = 0.501255556359024  
Weight = 0.000
##### SVR
Best C = 10.510584166812476  
Best kernel = sigmoid  
Best R2 = 0.6175865641668637  
Weight = 0.330
##### NuSVR
Best C = 9.024585024739107  
Best kernel = sigmoid  
Best nu = 0.7512492841501225  
Best R2 = 0.6253677869498243  
Weight = 0.343
##### FCN
Best L2_factor = 0.021517425264349053  
Best dropout_factor = 0.36979837303627927  
Best R2 = 0.5866018727468167  
Weight = 0.147

### 高溫

In [28]:
Y_feature = '最高氣溫'
model_xcols = {
    'LinearRegression': ['溫度'],
    'FCN': ['溫度'],
    'RandomForest': ['溫度'],
    'XGBoost': ['溫度'],
    'SVR': ['溫度'],
    'NuSVR': ['溫度'],
    'LightGBM': ['溫度'],
}

flow_control(Y_feature, model_xcols, forecast_obs_df, speed_test=speed_test)

Start to tune hyperparameters


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 21.60it/s]


LinearRegression
Best R2 = 0.5534044274964922


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [28:55<00:00, 57.85s/it]


FCN
Best L2_factor = 0.0012580849117066048
Best dropout_factor = 0.17731620058252304
Best R2 = 0.5839900603099359


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [02:49<00:00,  5.64s/it]


RandomForest
Best max_depth = 4
Best n_estimators = 120
Best R2 = 0.5409705773728722


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [01:58<00:00,  3.95s/it]


XGBoost
Best max_depth = 2
Best n_estimators = 22
Best R2 = 0.5428448378319348


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:49<00:00,  1.66s/it]


SVR
Best C = 1.7157289141337408
Best kernel = linear
Best R2 = 0.5448564962294009


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [01:02<00:00,  2.10s/it]


NuSVR
Best C = 0.1539035393298039
Best kernel = linear
Best nu = 0.44010303108936244
Best R2 = 0.5592848657357319


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [01:45<00:00,  3.52s/it]


LightGBM
Best max_depth = 2
Best n_estimators = 73
Best R2 = 0.5435463799154244
Start to determine Ensemble weights.


100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [09:08<00:00, 10.96s/it]


Unnamed: 0,Model,Avg MAE,Std MAE,90th percentile,10th percentile
0,Weighted_Ensemble,0.951746,0.07561,1.045881,0.857144
1,Ensemble,0.954719,0.075508,1.047991,0.858696
2,RandomForest,0.971621,0.074793,1.048283,0.872686
3,SVR,0.973197,0.07882,1.06494,0.872694
4,LightGBM,0.986867,0.075915,1.075697,0.890317
5,NuSVR,0.975101,0.078378,1.077545,0.876928
6,XGBoost,0.979652,0.080775,1.080958,0.869659
7,LinearRegression,0.982843,0.075963,1.081477,0.886479
8,FCN,0.979121,0.084873,1.084537,0.866002


Weights:
LinearRegression: 0.000
FCN: 0.229
RandomForest: 0.000
XGBoost: 0.000
SVR: 0.253
NuSVR: 0.179
LightGBM: 0.339
 
 
**Copy and Paste following lines into the next cell.**
##### LinearRegression
Best R2 = 0.5534044274964922  
Weight = 0.000
##### FCN
Best L2_factor = 0.0012580849117066048  
Best dropout_factor = 0.17731620058252304  
Best R2 = 0.5839900603099359  
Weight = 0.229
##### RandomForest
Best max_depth = 4  
Best n_estimators = 120  
Best R2 = 0.5409705773728722  
Weight = 0.000
##### XGBoost
Best max_depth = 2  
Best n_estimators = 22  
Best R2 = 0.5428448378319348  
Weight = 0.000
##### SVR
Best C = 1.7157289141337408  
Best kernel = linear  
Best R2 = 0.5448564962294009  
Weight = 0.253
##### NuSVR
Best C = 0.1539035393298039  
Best kernel = linear  
Best nu = 0.44010303108936244  
Best R2 = 0.5592848657357319  
Weight = 0.179
##### LightGBM
Best max_depth = 2  
Best n_estimators = 73  
Best R2 = 0.5435463799154244  
Weight = 0.339
 
 


##### LinearRegression
Best R2 = 0.5534044274964922  
Weight = 0.000
##### FCN
Best L2_factor = 0.0012580849117066048  
Best dropout_factor = 0.17731620058252304  
Best R2 = 0.5839900603099359  
Weight = 0.229
##### RandomForest
Best max_depth = 4  
Best n_estimators = 120  
Best R2 = 0.5409705773728722  
Weight = 0.000
##### XGBoost
Best max_depth = 2  
Best n_estimators = 22  
Best R2 = 0.5428448378319348  
Weight = 0.000
##### SVR
Best C = 1.7157289141337408  
Best kernel = linear  
Best R2 = 0.5448564962294009  
Weight = 0.253
##### NuSVR
Best C = 0.1539035393298039  
Best kernel = linear  
Best nu = 0.44010303108936244  
Best R2 = 0.5592848657357319  
Weight = 0.179
##### LightGBM
Best max_depth = 2  
Best n_estimators = 73  
Best R2 = 0.5435463799154244  
Weight = 0.339

### 低溫

In [29]:
Y_feature = '最低氣溫'
model_xcols = {
    'LinearRegression': ['溫度'],
    'FCN': ['溫度'],
    'RandomForest': ['溫度'],
    'XGBoost': ['溫度'],
    'SVR': ['溫度'],
    'NuSVR': ['溫度'],
    'LightGBM': ['溫度'],
}

flow_control(Y_feature, model_xcols, forecast_obs_df, speed_test=speed_test)

Start to tune hyperparameters


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 21.76it/s]


LinearRegression
Best R2 = 0.14205229333746497


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [30:25<00:00, 60.85s/it]


FCN
Best L2_factor = 0.0037237461029340944
Best dropout_factor = 0.37515051123242515
Best R2 = 0.1970300802761308


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [02:23<00:00,  4.78s/it]


RandomForest
Best max_depth = 5
Best n_estimators = 77
Best R2 = 0.25684110582949154


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [02:05<00:00,  4.17s/it]


XGBoost
Best max_depth = 2
Best n_estimators = 10
Best R2 = 0.22489063953343127


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:14<00:00,  2.10it/s]


SVR
Best C = 0.11744054294023308
Best kernel = rbf
Best R2 = 0.2016527418134913


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:27<00:00,  1.09it/s]


NuSVR
Best C = 0.8638927850606558
Best kernel = rbf
Best nu = 0.42847121164079394
Best R2 = 0.1889467419868108


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [01:43<00:00,  3.45s/it]


LightGBM
Best max_depth = 10
Best n_estimators = 29
Best R2 = 0.2075151445577908
Start to determine Ensemble weights.


100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [10:12<00:00, 12.24s/it]


Unnamed: 0,Model,Avg MAE,Std MAE,90th percentile,10th percentile
0,Weighted_Ensemble,0.832064,0.067129,0.922821,0.750269
1,RandomForest,0.836811,0.068074,0.928667,0.751581
2,XGBoost,0.853135,0.062239,0.935635,0.768678
3,Ensemble,0.850426,0.066059,0.935688,0.770268
4,NuSVR,0.861865,0.064986,0.942894,0.774223
5,SVR,0.871848,0.064665,0.94718,0.796388
6,LightGBM,0.859422,0.071505,0.947609,0.778564
7,FCN,0.884137,0.070493,0.962566,0.786326
8,LinearRegression,0.895719,0.071882,0.98299,0.806366


Weights:
LinearRegression: 0.000
FCN: 0.000
RandomForest: 0.797
XGBoost: 0.000
SVR: 0.203
NuSVR: 0.000
LightGBM: 0.000
 
 
**Copy and Paste following lines into the next cell.**
##### LinearRegression
Best R2 = 0.14205229333746497  
Weight = 0.000
##### FCN
Best L2_factor = 0.0037237461029340944  
Best dropout_factor = 0.37515051123242515  
Best R2 = 0.1970300802761308  
Weight = 0.000
##### RandomForest
Best max_depth = 5  
Best n_estimators = 77  
Best R2 = 0.25684110582949154  
Weight = 0.797
##### XGBoost
Best max_depth = 2  
Best n_estimators = 10  
Best R2 = 0.22489063953343127  
Weight = 0.000
##### SVR
Best C = 0.11744054294023308  
Best kernel = rbf  
Best R2 = 0.2016527418134913  
Weight = 0.203
##### NuSVR
Best C = 0.8638927850606558  
Best kernel = rbf  
Best nu = 0.42847121164079394  
Best R2 = 0.1889467419868108  
Weight = 0.000
##### LightGBM
Best max_depth = 10  
Best n_estimators = 29  
Best R2 = 0.2075151445577908  
Weight = 0.000
 
 


##### LinearRegression
Best R2 = 0.14205229333746497  
Weight = 0.000
##### FCN
Best L2_factor = 0.0037237461029340944  
Best dropout_factor = 0.37515051123242515  
Best R2 = 0.1970300802761308  
Weight = 0.000
##### RandomForest
Best max_depth = 5  
Best n_estimators = 77  
Best R2 = 0.25684110582949154  
Weight = 0.797
##### XGBoost
Best max_depth = 2  
Best n_estimators = 10  
Best R2 = 0.22489063953343127  
Weight = 0.000
##### SVR
Best C = 0.11744054294023308  
Best kernel = rbf  
Best R2 = 0.2016527418134913  
Weight = 0.203
##### NuSVR
Best C = 0.8638927850606558  
Best kernel = rbf  
Best nu = 0.42847121164079394  
Best R2 = 0.1889467419868108  
Weight = 0.000
##### LightGBM
Best max_depth = 10  
Best n_estimators = 29  
Best R2 = 0.2075151445577908  
Weight = 0.000

### 平均溫

In [30]:
Y_feature = '氣溫'
model_xcols = {
    'LinearRegression': ['溫度'],
    'FCN': ['溫度'],
    'RandomForest': ['溫度'],
    'XGBoost': ['溫度'],
    'SVR': ['溫度'],
    'NuSVR': ['溫度'],
    'LightGBM': ['溫度'],
}

flow_control(Y_feature, model_xcols, forecast_obs_df, speed_test=speed_test)

Start to tune hyperparameters


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 21.28it/s]


LinearRegression
Best R2 = 0.412985625467958


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [27:43<00:00, 55.44s/it]


FCN
Best L2_factor = 0.0010283389751594885
Best dropout_factor = 0.3059333281801748
Best R2 = 0.4410662110913644


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [03:29<00:00,  6.99s/it]


RandomForest
Best max_depth = 4
Best n_estimators = 165
Best R2 = 0.44955960316903565


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [02:27<00:00,  4.90s/it]


XGBoost
Best max_depth = 2
Best n_estimators = 74
Best R2 = 0.4151441540062713


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:13<00:00,  2.29it/s]


SVR
Best C = 0.008473094131137855
Best kernel = linear
Best R2 = 0.4264848817494777


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [02:39<00:00,  5.31s/it]


NuSVR
Best C = 184.29333614114873
Best kernel = linear
Best nu = 0.3423328614196417
Best R2 = 0.4249491691133574


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [01:49<00:00,  3.65s/it]


LightGBM
Best max_depth = 10
Best n_estimators = 25
Best R2 = 0.4268000484226063
Start to determine Ensemble weights.


100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [09:57<00:00, 11.95s/it]


Unnamed: 0,Model,Avg MAE,Std MAE,90th percentile,10th percentile
0,RandomForest,0.701252,0.056935,0.757835,0.629828
1,Weighted_Ensemble,0.694058,0.057037,0.768866,0.623698
2,Ensemble,0.700654,0.058551,0.783428,0.631756
3,NuSVR,0.724657,0.061396,0.785201,0.643895
4,LinearRegression,0.72807,0.05995,0.792448,0.663396
5,SVR,0.71217,0.061733,0.792775,0.629024
6,XGBoost,0.72522,0.058009,0.796913,0.64291
7,LightGBM,0.717611,0.06109,0.807781,0.639743
8,FCN,0.731574,0.065135,0.819495,0.653128


Weights:
LinearRegression: 0.000
FCN: 0.000
RandomForest: 0.495
XGBoost: 0.094
SVR: 0.279
NuSVR: 0.131
LightGBM: 0.002
 
 
**Copy and Paste following lines into the next cell.**
##### LinearRegression
Best R2 = 0.412985625467958  
Weight = 0.000
##### FCN
Best L2_factor = 0.0010283389751594885  
Best dropout_factor = 0.3059333281801748  
Best R2 = 0.4410662110913644  
Weight = 0.000
##### RandomForest
Best max_depth = 4  
Best n_estimators = 165  
Best R2 = 0.44955960316903565  
Weight = 0.495
##### XGBoost
Best max_depth = 2  
Best n_estimators = 74  
Best R2 = 0.4151441540062713  
Weight = 0.094
##### SVR
Best C = 0.008473094131137855  
Best kernel = linear  
Best R2 = 0.4264848817494777  
Weight = 0.279
##### NuSVR
Best C = 184.29333614114873  
Best kernel = linear  
Best nu = 0.3423328614196417  
Best R2 = 0.4249491691133574  
Weight = 0.131
##### LightGBM
Best max_depth = 10  
Best n_estimators = 25  
Best R2 = 0.4268000484226063  
Weight = 0.002
 
 


##### LinearRegression
Best R2 = 0.412985625467958  
Weight = 0.000
##### FCN
Best L2_factor = 0.0010283389751594885  
Best dropout_factor = 0.3059333281801748  
Best R2 = 0.4410662110913644  
Weight = 0.000
##### RandomForest
Best max_depth = 4  
Best n_estimators = 165  
Best R2 = 0.44955960316903565  
Weight = 0.495
##### XGBoost
Best max_depth = 2  
Best n_estimators = 74  
Best R2 = 0.4151441540062713  
Weight = 0.094
##### SVR
Best C = 0.008473094131137855  
Best kernel = linear  
Best R2 = 0.4264848817494777  
Weight = 0.279
##### NuSVR
Best C = 184.29333614114873  
Best kernel = linear  
Best nu = 0.3423328614196417  
Best R2 = 0.4249491691133574  
Weight = 0.131
##### LightGBM
Best max_depth = 10  
Best n_estimators = 25  
Best R2 = 0.4268000484226063  
Weight = 0.002

### 風速

In [31]:
Y_feature = '風速'
model_xcols = {
    'FCN': ['風速', '東西風', '南北風', '溫度'],
    'RandomForest': ['風速', '東西風', '南北風', '晴', '多雲', '陰', '短暫陣雨', '短暫陣雨或雷雨', '午後短暫雷陣雨', '陣雨或雷雨', '相對溼度', '溫度'],
    'XGBoost': ['風速', '東西風', '南北風', '溫度'],
    'LightGBM': ['風速', '東西風', '南北風', '溫度'],
    'SVR': ['風速', '東西風', '南北風', '溫度'],
    'NuSVR': ['風速', '東西風', '南北風', '溫度'],
}

flow_control(Y_feature, model_xcols, forecast_obs_df, speed_test=speed_test)

Start to tune hyperparameters


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [27:25<00:00, 54.86s/it]


FCN
Best L2_factor = 0.05592638704213049
Best dropout_factor = 0.07847548007604593
Best R2 = 0.5813076958470627


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [07:14<00:00, 14.48s/it]


RandomForest
Best max_depth = 11
Best n_estimators = 42
Best R2 = 0.5347812133985134


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [03:01<00:00,  6.04s/it]


XGBoost
Best max_depth = 2
Best n_estimators = 129
Best R2 = 0.4174075328171598


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [01:40<00:00,  3.34s/it]


LightGBM
Best max_depth = 9
Best n_estimators = 14
Best R2 = 0.5009043458667409


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [14:19<00:00, 28.64s/it]


SVR
Best C = 23.778858785551915
Best kernel = linear
Best R2 = 0.5609041655735929


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [29:54<00:00, 59.80s/it]


NuSVR
Best C = 41.50123732942291
Best kernel = linear
Best nu = 0.5195660952664151
Best R2 = 0.5516527526034817
Start to determine Ensemble weights.


100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [09:47<00:00, 11.76s/it]


Unnamed: 0,Model,Avg MAE,Std MAE,90th percentile,10th percentile
0,Weighted_Ensemble,0.641149,0.122737,0.7889,0.478972
1,Ensemble,0.651633,0.134014,0.806428,0.484556
2,RandomForest,0.677281,0.121363,0.819995,0.496929
3,SVR,0.694901,0.131119,0.86268,0.554611
4,NuSVR,0.704941,0.134148,0.883298,0.554146
5,FCN,0.722465,0.146154,0.901823,0.51101
6,XGBoost,0.732845,0.150622,0.929725,0.547871
7,LightGBM,0.757101,0.148423,0.947066,0.572576


Weights:
FCN: 0.167
RandomForest: 0.536
XGBoost: 0.000
LightGBM: 0.000
SVR: 0.297
NuSVR: 0.000
 
 
**Copy and Paste following lines into the next cell.**
##### FCN
Best L2_factor = 0.05592638704213049  
Best dropout_factor = 0.07847548007604593  
Best R2 = 0.5813076958470627  
Weight = 0.167
##### RandomForest
Best max_depth = 11  
Best n_estimators = 42  
Best R2 = 0.5347812133985134  
Weight = 0.536
##### XGBoost
Best max_depth = 2  
Best n_estimators = 129  
Best R2 = 0.4174075328171598  
Weight = 0.000
##### LightGBM
Best max_depth = 9  
Best n_estimators = 14  
Best R2 = 0.5009043458667409  
Weight = 0.000
##### SVR
Best C = 23.778858785551915  
Best kernel = linear  
Best R2 = 0.5609041655735929  
Weight = 0.297
##### NuSVR
Best C = 41.50123732942291  
Best kernel = linear  
Best nu = 0.5195660952664151  
Best R2 = 0.5516527526034817  
Weight = 0.000
 
 


##### FCN
Best L2_factor = 0.05592638704213049  
Best dropout_factor = 0.07847548007604593  
Best R2 = 0.5813076958470627  
Weight = 0.167
##### RandomForest
Best max_depth = 11  
Best n_estimators = 42  
Best R2 = 0.5347812133985134  
Weight = 0.536
##### XGBoost
Best max_depth = 2  
Best n_estimators = 129  
Best R2 = 0.4174075328171598  
Weight = 0.000
##### LightGBM
Best max_depth = 9  
Best n_estimators = 14  
Best R2 = 0.5009043458667409  
Weight = 0.000
##### SVR
Best C = 23.778858785551915  
Best kernel = linear  
Best R2 = 0.5609041655735929  
Weight = 0.297
##### NuSVR
Best C = 41.50123732942291  
Best kernel = linear  
Best nu = 0.5195660952664151  
Best R2 = 0.5516527526034817  
Weight = 0.000

## 預測電力資料

### 風力

In [32]:
Y_feature = '風力'

model_xcols = {
    'LinearRegression': ['風速', '日期數字', '假日', '週六', '週日', '補班', '1~3月', '11~12月'],
    'FCN': ['風速', '日期數字', '假日', '週六', '週日', '補班', '1~3月', '11~12月'],
    'RandomForest': ['風速', '日期數字', '假日', '週六', '週日', '補班', '1~3月', '11~12月'],
    'XGBoost': ['風速', '日期數字', '假日', '週六', '週日', '補班', '1~3月', '11~12月'],
    'LightGBM': ['風速', '日期數字', '假日', '週六', '週日', '補班', '1~3月', '11~12月'],
    'SVR': ['風速', '日期數字', '假日', '週六', '週日', '補班', '1~3月', '11~12月'],
    'NuSVR': ['風速', '日期數字', '假日', '週六', '週日', '補班', '1~3月', '11~12月'],
}

flow_control(Y_feature, model_xcols, weather_power_df, speed_test=speed_test)

Start to tune hyperparameters


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 18.93it/s]


LinearRegression
Best R2 = 0.6044580733181938


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [36:00<00:00, 72.02s/it]


FCN
Best L2_factor = 0.03002014221111957
Best dropout_factor = 0.1129475402935946
Best R2 = 0.7277066975377966


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [05:05<00:00, 10.18s/it]


RandomForest
Best max_depth = 7
Best n_estimators = 188
Best R2 = 0.739926308023377


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [02:57<00:00,  5.91s/it]


XGBoost
Best max_depth = 2
Best n_estimators = 82
Best R2 = 0.7433508737111479


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [04:17<00:00,  8.58s/it]


LightGBM
Best max_depth = 10
Best n_estimators = 123
Best R2 = 0.7397801530188338


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:19<00:00,  1.57it/s]


SVR
Best C = 43.4883715636823
Best kernel = rbf
Best R2 = 0.7397300609763093


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:15<00:00,  1.97it/s]


NuSVR
Best C = 56.8991555460406
Best kernel = rbf
Best nu = 0.42159107656237693
Best R2 = 0.7347554525194468
Start to determine Ensemble weights.


100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [11:21<00:00, 13.64s/it]


Unnamed: 0,Model,Avg MAE,Std MAE,90th percentile,10th percentile
0,Weighted_Ensemble,23.689793,2.447451,26.569012,20.047862
1,Ensemble,24.357148,2.600217,26.971047,20.640952
2,LightGBM,24.426788,2.657557,27.661451,21.10219
3,XGBoost,24.606834,2.54131,27.936836,21.117119
4,SVR,25.890909,2.35579,28.518512,22.982049
5,NuSVR,26.200458,2.399453,28.646622,23.128377
6,RandomForest,25.065664,2.884473,28.941073,21.261278
7,FCN,26.444157,2.956582,30.539933,22.437135
8,LinearRegression,33.072311,2.906614,36.625307,29.226393


Weights:
LinearRegression: 0.000
FCN: 0.062
RandomForest: 0.000
XGBoost: 0.307
LightGBM: 0.260
SVR: 0.201
NuSVR: 0.170
 
 
**Copy and Paste following lines into the next cell.**
##### LinearRegression
Best R2 = 0.6044580733181938  
Weight = 0.000
##### FCN
Best L2_factor = 0.03002014221111957  
Best dropout_factor = 0.1129475402935946  
Best R2 = 0.7277066975377966  
Weight = 0.062
##### RandomForest
Best max_depth = 7  
Best n_estimators = 188  
Best R2 = 0.739926308023377  
Weight = 0.000
##### XGBoost
Best max_depth = 2  
Best n_estimators = 82  
Best R2 = 0.7433508737111479  
Weight = 0.307
##### LightGBM
Best max_depth = 10  
Best n_estimators = 123  
Best R2 = 0.7397801530188338  
Weight = 0.260
##### SVR
Best C = 43.4883715636823  
Best kernel = rbf  
Best R2 = 0.7397300609763093  
Weight = 0.201
##### NuSVR
Best C = 56.8991555460406  
Best kernel = rbf  
Best nu = 0.42159107656237693  
Best R2 = 0.7347554525194468  
Weight = 0.170
 
 


##### LinearRegression
Best R2 = 0.6044580733181938  
Weight = 0.000
##### FCN
Best L2_factor = 0.03002014221111957  
Best dropout_factor = 0.1129475402935946  
Best R2 = 0.7277066975377966  
Weight = 0.062
##### RandomForest
Best max_depth = 7  
Best n_estimators = 188  
Best R2 = 0.739926308023377  
Weight = 0.000
##### XGBoost
Best max_depth = 2  
Best n_estimators = 82  
Best R2 = 0.7433508737111479  
Weight = 0.307
##### LightGBM
Best max_depth = 10  
Best n_estimators = 123  
Best R2 = 0.7397801530188338  
Weight = 0.260
##### SVR
Best C = 43.4883715636823  
Best kernel = rbf  
Best R2 = 0.7397300609763093  
Weight = 0.201
##### NuSVR
Best C = 56.8991555460406  
Best kernel = rbf  
Best nu = 0.42159107656237693  
Best R2 = 0.7347554525194468  
Weight = 0.170

### 太陽能


In [33]:
Y_feature = '太陽能'

model_xcols = {
    'LinearRegression': ['氣溫', '最高氣溫', '最低氣溫', '全天空日射量', '日期數字', '假日', '週六', '週日', '補班', '白日長度'],
    'FCN': ['氣溫', '最高氣溫', '最低氣溫', '全天空日射量', '日期數字', '假日', '週六', '週日', '補班', '白日長度'],
    'RandomForest': ['氣溫', '最高氣溫', '最低氣溫', '全天空日射量', '日期數字', '假日', '週六', '週日', '補班', '白日長度'],
    'XGBoost': ['氣溫', '最高氣溫', '最低氣溫', '全天空日射量', '日期數字', '假日', '週六', '週日', '補班', '白日長度'],
    'LightGBM': ['氣溫', '最高氣溫', '最低氣溫', '全天空日射量', '日期數字', '假日', '週六', '週日', '補班', '白日長度'],
    'SVR': ['氣溫', '最高氣溫', '最低氣溫', '全天空日射量', '日期數字', '假日', '週六', '週日', '補班', '白日長度'],
    'NuSVR': ['氣溫', '最高氣溫', '最低氣溫', '全天空日射量', '日期數字', '假日', '週六', '週日', '補班', '白日長度'],
}

flow_control(Y_feature, model_xcols, weather_power_df, speed_test=speed_test)

Start to tune hyperparameters


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 18.47it/s]


LinearRegression
Best R2 = 0.6893254685091934


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [35:02<00:00, 70.07s/it]


FCN
Best L2_factor = 0.16993690927931668
Best dropout_factor = 0.26311208913533524
Best R2 = 0.7702212180762896


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [10:50<00:00, 21.69s/it]


RandomForest
Best max_depth = 12
Best n_estimators = 134
Best R2 = 0.7258615014540506


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [04:57<00:00,  9.92s/it]


XGBoost
Best max_depth = 3
Best n_estimators = 80
Best R2 = 0.7180991262122053


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [02:53<00:00,  5.80s/it]


LightGBM
Best max_depth = 5
Best n_estimators = 56
Best R2 = 0.6956657014942508


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:23<00:00,  1.26it/s]


SVR
Best C = 6.694986116951681
Best kernel = linear
Best R2 = 0.6946904903081259


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:19<00:00,  1.56it/s]


NuSVR
Best C = 177.2132188450082
Best kernel = rbf
Best nu = 0.895376762270945
Best R2 = 0.7579144014682827
Start to determine Ensemble weights.


100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [13:34<00:00, 16.29s/it]


Unnamed: 0,Model,Avg MAE,Std MAE,90th percentile,10th percentile
0,Weighted_Ensemble,82.8138,9.173639,93.198381,68.877181
1,NuSVR,83.89398,9.767768,94.697571,68.986657
2,Ensemble,89.487452,8.740659,99.07154,76.701945
3,FCN,91.121343,11.023916,103.026805,75.769183
4,RandomForest,92.96844,8.750425,103.866028,80.900226
5,XGBoost,98.244006,10.239459,112.106997,85.663249
6,LightGBM,101.745927,8.926734,114.475242,91.353746
7,SVR,111.455983,8.329078,119.238427,100.167045
8,LinearRegression,112.675842,8.966943,122.465924,98.758392


Weights:
LinearRegression: 0.000
FCN: 0.286
RandomForest: 0.000
XGBoost: 0.110
LightGBM: 0.096
SVR: 0.000
NuSVR: 0.508
 
 
**Copy and Paste following lines into the next cell.**
##### LinearRegression
Best R2 = 0.6893254685091934  
Weight = 0.000
##### FCN
Best L2_factor = 0.16993690927931668  
Best dropout_factor = 0.26311208913533524  
Best R2 = 0.7702212180762896  
Weight = 0.286
##### RandomForest
Best max_depth = 12  
Best n_estimators = 134  
Best R2 = 0.7258615014540506  
Weight = 0.000
##### XGBoost
Best max_depth = 3  
Best n_estimators = 80  
Best R2 = 0.7180991262122053  
Weight = 0.110
##### LightGBM
Best max_depth = 5  
Best n_estimators = 56  
Best R2 = 0.6956657014942508  
Weight = 0.096
##### SVR
Best C = 6.694986116951681  
Best kernel = linear  
Best R2 = 0.6946904903081259  
Weight = 0.000
##### NuSVR
Best C = 177.2132188450082  
Best kernel = rbf  
Best nu = 0.895376762270945  
Best R2 = 0.7579144014682827  
Weight = 0.508
 
 


##### LinearRegression
Best R2 = 0.6893254685091934  
Weight = 0.000
##### FCN
Best L2_factor = 0.16993690927931668  
Best dropout_factor = 0.26311208913533524  
Best R2 = 0.7702212180762896  
Weight = 0.286
##### RandomForest
Best max_depth = 12  
Best n_estimators = 134  
Best R2 = 0.7258615014540506  
Weight = 0.000
##### XGBoost
Best max_depth = 3  
Best n_estimators = 80  
Best R2 = 0.7180991262122053  
Weight = 0.110
##### LightGBM
Best max_depth = 5  
Best n_estimators = 56  
Best R2 = 0.6956657014942508  
Weight = 0.096
##### SVR
Best C = 6.694986116951681  
Best kernel = linear  
Best R2 = 0.6946904903081259  
Weight = 0.000
##### NuSVR
Best C = 177.2132188450082  
Best kernel = rbf  
Best nu = 0.895376762270945  
Best R2 = 0.7579144014682827  
Weight = 0.508

### 尖峰負載

In [34]:
Y_feature = '尖峰負載'

model_xcols = {
    'LinearRegression': ['氣溫', '最高氣溫', '最低氣溫', '日期數字', '假日', '週六', '週日', '補班', '1~3月', '11~12月'],
    'FCN': ['氣溫', '最高氣溫', '最低氣溫', '日期數字', '假日', '週六', '週日', '補班', '1~3月', '11~12月'],
    'RandomForest': ['氣溫', '最高氣溫', '最低氣溫', '日期數字', '假日', '週六', '週日', '補班', '1~3月', '11~12月'],
    'XGBoost': ['氣溫', '最高氣溫', '最低氣溫', '日期數字', '假日', '週六', '週日', '補班', '1~3月', '11~12月'],
    'LightGBM': ['氣溫', '最高氣溫', '最低氣溫', '日期數字', '假日', '週六', '週日', '補班', '1~3月', '11~12月'],
    'SVR': ['氣溫', '最高氣溫', '最低氣溫', '日期數字', '假日', '週六', '週日', '補班', '1~3月', '11~12月'],
    'NuSVR': ['氣溫', '最高氣溫', '最低氣溫', '日期數字', '假日', '週六', '週日', '補班', '1~3月', '11~12月'],
}

flow_control(Y_feature, model_xcols, weather_power_df, speed_test=speed_test)

Start to tune hyperparameters


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 18.73it/s]


LinearRegression
Best R2 = 0.8890289107063775


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [35:44<00:00, 71.50s/it]


FCN
Best L2_factor = 0.029438708713891412
Best dropout_factor = 0.03116915624902694
Best R2 = 0.9577894981452781


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [09:22<00:00, 18.76s/it]


RandomForest
Best max_depth = 13
Best n_estimators = 184
Best R2 = 0.9323579004467799


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [03:56<00:00,  7.87s/it]


XGBoost
Best max_depth = 2
Best n_estimators = 104
Best R2 = 0.9421413919017114


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [04:42<00:00,  9.43s/it]


LightGBM
Best max_depth = 7
Best n_estimators = 195
Best R2 = 0.902646535842623


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:18<00:00,  1.61it/s]


SVR
Best C = 37.19208540364481
Best kernel = linear
Best R2 = 0.8797513372128146


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:18<00:00,  1.66it/s]


NuSVR
Best C = 34.75381072717423
Best kernel = rbf
Best nu = 0.42062297329040543
Best R2 = 0.8956020285392687
Start to determine Ensemble weights.


100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [12:23<00:00, 14.86s/it]


Unnamed: 0,Model,Avg MAE,Std MAE,90th percentile,10th percentile
0,Weighted_Ensemble,58.238429,7.51743,67.310282,49.154207
1,Ensemble,67.949573,7.56316,78.427612,57.839259
2,XGBoost,71.687087,7.331282,82.414173,61.136055
3,RandomForest,72.634203,8.286729,83.898491,62.559788
4,FCN,65.97828,10.87674,84.176985,54.346568
5,LightGBM,90.91449,9.900384,104.885466,78.953365
6,NuSVR,100.614051,9.578762,111.643131,88.209442
7,LinearRegression,111.827804,8.094048,123.230984,101.959491
8,SVR,111.714345,9.350355,125.866888,98.88368


Weights:
LinearRegression: 0.000
FCN: 0.549
RandomForest: 0.013
XGBoost: 0.202
LightGBM: 0.181
SVR: 0.000
NuSVR: 0.055
 
 
**Copy and Paste following lines into the next cell.**
##### LinearRegression
Best R2 = 0.8890289107063775  
Weight = 0.000
##### FCN
Best L2_factor = 0.029438708713891412  
Best dropout_factor = 0.03116915624902694  
Best R2 = 0.9577894981452781  
Weight = 0.549
##### RandomForest
Best max_depth = 13  
Best n_estimators = 184  
Best R2 = 0.9323579004467799  
Weight = 0.013
##### XGBoost
Best max_depth = 2  
Best n_estimators = 104  
Best R2 = 0.9421413919017114  
Weight = 0.202
##### LightGBM
Best max_depth = 7  
Best n_estimators = 195  
Best R2 = 0.902646535842623  
Weight = 0.181
##### SVR
Best C = 37.19208540364481  
Best kernel = linear  
Best R2 = 0.8797513372128146  
Weight = 0.000
##### NuSVR
Best C = 34.75381072717423  
Best kernel = rbf  
Best nu = 0.42062297329040543  
Best R2 = 0.8956020285392687  
Weight = 0.055
 
 


##### LinearRegression
Best R2 = 0.8890289107063775  
Weight = 0.000
##### FCN
Best L2_factor = 0.029438708713891412  
Best dropout_factor = 0.03116915624902694  
Best R2 = 0.9577894981452781  
Weight = 0.549
##### RandomForest
Best max_depth = 13  
Best n_estimators = 184  
Best R2 = 0.9323579004467799  
Weight = 0.013
##### XGBoost
Best max_depth = 2  
Best n_estimators = 104  
Best R2 = 0.9421413919017114  
Weight = 0.202
##### LightGBM
Best max_depth = 7  
Best n_estimators = 195  
Best R2 = 0.902646535842623  
Weight = 0.181
##### SVR
Best C = 37.19208540364481  
Best kernel = linear  
Best R2 = 0.8797513372128146  
Weight = 0.000
##### NuSVR
Best C = 34.75381072717423  
Best kernel = rbf  
Best nu = 0.42062297329040543  
Best R2 = 0.8956020285392687  
Weight = 0.055

### 夜尖峰

In [35]:
Y_feature = '夜尖峰'
model_xcols = {
    'FCN': ['氣溫', '最高氣溫', '最低氣溫', '日期數字', '假日', '週六', '週日', '補班', '白日長度'],
    'LogisticRegression': ['氣溫', '最高氣溫', '最低氣溫', '日期數字', '假日', '週六', '週日', '補班', '白日長度'],
    'RandomForest': ['氣溫', '最高氣溫', '最低氣溫', '日期數字', '假日', '週六', '週日', '補班', '白日長度'],
    'XGBoost': ['氣溫', '最高氣溫', '最低氣溫', '日期數字', '假日', '週六', '週日', '補班', '白日長度'],
    'LightGBM': ['氣溫', '最高氣溫', '最低氣溫', '日期數字', '假日', '週六', '週日', '補班', '白日長度'],
    'SVC': ['氣溫', '最高氣溫', '最低氣溫', '日期數字', '假日', '週六', '週日', '補班', '白日長度']
}

flow_control(Y_feature, model_xcols, weather_power_df, mode='classifier', speed_test=speed_test)

Start to tune hyperparameters


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [41:15<00:00, 82.51s/it]


FCN
Best L2_factor = 0.002576271940664411
Best dropout_factor = 0.1252443332428825
Best F1 = 0.8748909576667637


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:13<00:00,  2.25it/s]


LogisticRegression
Best C = 0.7975026697912178
Best solver = sag
Best F1 = 0.8541996823354088


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [04:12<00:00,  8.42s/it]


RandomForest
Best max_depth = 14
Best n_estimators = 95
Best F1 = 0.8186294971282218


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [02:19<00:00,  4.63s/it]


XGBoost
Best max_depth = 15
Best n_estimators = 33
Best F1 = 0.8624582788638403


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [03:12<00:00,  6.40s/it]


LightGBM
Best max_depth = 2
Best n_estimators = 56
Best F1 = 0.8285599799076798


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:17<00:00,  1.72it/s]


SVC
Best C = 1.2964550960648644
Best kernel = rbf
Best F1 = 0.8649222578257927
Start to determine Ensemble weights.


100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [13:37<00:00, 16.35s/it]


Unnamed: 0,Model,Avg F1,Std F1,90th percentile,10th percentile
0,Ensemble,0.884325,0.054831,0.947368,0.814815
1,XGBoost,0.877427,0.04621,0.933333,0.810811
2,LogisticRegression,0.871987,0.054327,0.928571,0.8
3,Weighted_Ensemble,0.877506,0.053421,0.930233,0.8
4,SVC,0.869981,0.05805,0.930233,0.769231
5,LightGBM,0.83204,0.057751,0.904762,0.765957
6,RandomForest,0.836314,0.057493,0.9,0.758621
7,FCN,0.83814,0.064904,0.918919,0.75


Weights:
FCN: 0.153
LogisticRegression: 0.331
RandomForest: 0.208
XGBoost: 0.291
LightGBM: 0.000
SVC: 0.016
 
 
**Copy and Paste following lines into the next cell.**
##### FCN
Best L2_factor = 0.002576271940664411  
Best dropout_factor = 0.1252443332428825  
Best R2 = 0.8748909576667637  
Weight = 0.153
##### LogisticRegression
Best C = 0.7975026697912178  
Best solver = sag  
Best R2 = 0.8541996823354088  
Weight = 0.331
##### RandomForest
Best max_depth = 14  
Best n_estimators = 95  
Best R2 = 0.8186294971282218  
Weight = 0.208
##### XGBoost
Best max_depth = 15  
Best n_estimators = 33  
Best R2 = 0.8624582788638403  
Weight = 0.291
##### LightGBM
Best max_depth = 2  
Best n_estimators = 56  
Best R2 = 0.8285599799076798  
Weight = 0.000
##### SVC
Best C = 1.2964550960648644  
Best kernel = rbf  
Best R2 = 0.8649222578257927  
Weight = 0.016
 
 


##### FCN
Best L2_factor = 0.002576271940664411  
Best dropout_factor = 0.1252443332428825  
Best R2 = 0.8748909576667637  
Weight = 0.153
##### LogisticRegression
Best C = 0.7975026697912178  
Best solver = sag  
Best R2 = 0.8541996823354088  
Weight = 0.331
##### RandomForest
Best max_depth = 14  
Best n_estimators = 95  
Best R2 = 0.8186294971282218  
Weight = 0.208
##### XGBoost
Best max_depth = 15  
Best n_estimators = 33  
Best R2 = 0.8624582788638403  
Weight = 0.291
##### LightGBM
Best max_depth = 2  
Best n_estimators = 56  
Best R2 = 0.8285599799076798  
Weight = 0.000
##### SVC
Best C = 1.2964550960648644  
Best kernel = rbf  
Best R2 = 0.8649222578257927  
Weight = 0.016