# Test3-Cross Validation : Dataset-tmf

In [1]:
from brbmdl import brbm, dataset_loading, process_dataset
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import KFold
import pickle

In [2]:
features = ['macd', 'rsi', 'k', 'd', 'r', 'close',
            'before1day', 'before2day', 'before3day', 'before4day','before5day']
target = 'dailyreturn'

In [3]:
X_train, y_train = dataset_loading(features, target, 
                                   './data/tsmc_train_data.csv',
                                   './data/foxconn_train_data.csv',
                                   './data/mediatek_train_data.csv'
                                   )
y_train_label = process_dataset(y_train, target)

In [None]:
val_df = pd.concat([X_train, y_train_label], axis=1)
kf = KFold(n_splits=5, shuffle=True, random_state=42)

accuracy_list, precision_list, recall_list, f1_list = [], [], [], []

for train_idx, val_idx in kf.split(val_df):
    X_train_part = X_train.iloc[train_idx]
    y_train_part = y_train.iloc[train_idx]
    y_train_label_part = y_train_label.iloc[train_idx]
    X_val_part = X_train.iloc[val_idx]
    y_val_part = y_train_label.iloc[val_idx]

    brb_model = brbm(features, target)
    brb_model.set_ante_labels(['superlow', 'low', 'medium', 'high', 'superhigh'])
    brb_model.set_cons_labels(['down', 'up'])
    # brb_model.set_all_ante_fuzzy_ranges(X_train_part, y_train_label_part, features)
    brb_model.set_all_ante_fuzzy_ranges('./dtree_models/decision_tree_model3.pkl', features)
    cons_fuzzy_range = [
        [-1.0, -0.7, -0.25, 0.0],
        [0.0, 0.25, 0.7, 1.0]
    ]
    brb_model.set_cons_fuzzy_range(cons_fuzzy_range)
    brb_model.inital_database(X_train_part, y_train_part)
    brb_model.inital_rulebase()
    brb_model.preprocess_database()
    brb_model.optimize(X_train_part, y_train_part, features, target)
    
    y_val_pred = brb_model.predict(X_val_part)

    accuracy = accuracy_score(y_val_part, y_val_pred)
    precision = precision_score(y_val_part, y_val_pred, average='macro')
    recall = recall_score(y_val_part, y_val_pred, average='macro')
    f1 = f1_score(y_val_part, y_val_pred, average='macro')
    accuracy_list.append(accuracy)
    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)

print(f"驗證集 5 折平均 Accuracy: {np.mean(accuracy_list) * 100:.2f}%")
print(f"驗證集 5 折平均 Precision: {np.mean(precision_list) * 100:.2f}%")
print(f"驗證集 5 折平均 Recall: {np.mean(recall_list) * 100:.2f}%")
print(f"驗證集 5 折平均 F1 Score: {np.mean(f1_list) * 100:.2f}%")

Optimize: 100%|██████████| 4052/4052 [08:51<00:00,  7.63it/s]
Optimize: 100%|██████████| 4052/4052 [08:48<00:00,  7.67it/s]
Optimize: 100%|██████████| 4052/4052 [08:45<00:00,  7.71it/s]
Optimize: 100%|██████████| 4052/4052 [08:49<00:00,  7.66it/s]
Optimize: 100%|██████████| 4052/4052 [08:45<00:00,  7.72it/s]


驗證集 5 折平均 Accuracy: 79.57%
驗證集 5 折平均 Precision: 79.53%
驗證集 5 折平均 Recall: 79.60%
驗證集 5 折平均 F1 Score: 79.53%
