In [None]:
!pip install --upgrade pip
!pip install --upgrade scikit-learn
!pip install --upgrade nltk
!pip install --upgrade seaborn
!pip install --upgrade matplotlib  pandas



In [None]:
import nltk

nltk.download("punkt")
nltk.download("stopwords")
nltk.download("averaged_perceptron_tagger")
nltk.download("wordnet")

[nltk_data] Downloading package punkt to /Users/samhsiao/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/samhsiao/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/samhsiao/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/samhsiao/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import pandas as pd


# 混淆矩陣可視化函數
def plot_confusion_matrix(test_labels, target_predicted):
    # 計算混淆矩陣
    matrix = confusion_matrix(test_labels, target_predicted)
    # 將混淆矩陣轉為 DataFrame 以便於繪圖
    df_confusion = pd.DataFrame(matrix)
    # 設定顏色板
    colormap = sns.color_palette("BrBG", 10)
    # 使用 seaborn 的 heatmap 繪製混淆矩陣
    sns.heatmap(df_confusion, annot=True, fmt=".2f", cbar=None, cmap=colormap)
    plt.title("Confusion Matrix")  # 設置標題
    plt.tight_layout()  # 自動調整佈局
    plt.ylabel("True Class")  # 設置 y 軸標籤
    plt.xlabel("Predicted Class")  # 設置 x 軸標籤
    plt.show()  # 顯示圖表


# 評估指標輸出函數
def print_metrics(test_labels, target_predicted_binary):
    # 計算混淆矩陣，並解構為四個部分
    TN, FP, FN, TP = confusion_matrix(test_labels, target_predicted_binary).ravel()

    # 各種評估指標計算
    Sensitivity = float(TP) / (TP + FN) * 100  # 敏感度 (TPR)
    Specificity = float(TN) / (TN + FP) * 100  # 特異性 (TNR)
    Precision = float(TP) / (TP + FP) * 100  # 精確度
    NPV = float(TN) / (TN + FN) * 100  # 負預測值
    FPR = float(FP) / (FP + TN) * 100  # 假陽性率
    FNR = float(FN) / (TP + FN) * 100  # 假陰性率
    FDR = float(FP) / (TP + FP) * 100  # 假發現率
    ACC = float(TP + TN) / (TP + FP + FN + TN) * 100  # 總體準確率

    # 輸出各項指標
    print(f"Sensitivity or TPR: {Sensitivity}%")
    print(f"Specificity or TNR: {Specificity}%")
    print(f"Precision: {Precision}%")
    print(f"Negative Predictive Value: {NPV}%")
    print(f"False Positive Rate: {FPR}%")
    print(f"False Negative Rate: {FNR}%")
    print(f"False Discovery Rate: {FDR}%")
    print(f"Accuracy: {ACC}%")

In [None]:
df = pd.read_csv("data/imdb.csv", header=0)

In [6]:
df[:5]

Unnamed: 0,text,label
0,What I hoped for (or even expected) was the we...,0
1,Garden State must rate amongst the most contri...,0
2,There is a lot wrong with this film. I will no...,1
3,"To qualify my use of ""realistic"" in the summar...",1
4,Dirty War is absolutely one of the best politi...,1


In [None]:
df["label"].value_counts()

label
0    25000
1    25000
Name: count, dtype: int64

In [8]:
df.isna().sum()

text     0
label    0
dtype: int64

In [9]:
import nltk, re
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
from nltk.tokenize import word_tokenize

In [10]:
from sklearn.model_selection import train_test_split

def split_data(df):

    train, test_and_validate = train_test_split(
        df, 
        test_size=0.2, 
        shuffle=True, 
        random_state=324
    )
    test, validate = train_test_split(
        test_and_validate, 
        test_size=0.5, 
        shuffle=True, 
        random_state=324
    )
    return train, validate, test

In [11]:
train, validate, test = split_data(df)
print(train.shape)
print(test.shape)
print(validate.shape)

(40000, 2)
(5000, 2)
(5000, 2)


In [12]:

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

text_features = ['text']
model_target = 'label'

text_processor_0 = Pipeline([
    ('text_vect_0', CountVectorizer(max_features=500))
])

data_preprocessor = ColumnTransformer([
    ('text_pre_0', text_processor_0, text_features[0])
])

print('Datasets shapes before processing: ', train.shape, validate.shape, test.shape)
train_matrix = data_preprocessor.fit_transform(train)
test_matrix = data_preprocessor.transform(test)
validate_matrix = data_preprocessor.transform(validate)
print('Datasets shapes after processing: ', train_matrix.shape, validate_matrix.shape, test_matrix.shape)

Datasets shapes before processing:  (40000, 2) (5000, 2) (5000, 2)
Datasets shapes after processing:  (40000, 500) (5000, 500) (5000, 500)


In [None]:
import io
import os
import numpy as np

# 將數據儲存到本地 CSV 文件，而非上傳至 S3
def save_to_local_csv(filename, folder, X_train, y_train, is_test=False):
    # 建立資料夾（若不存在）
    if not os.path.exists(folder):
        os.makedirs(folder)
    
    # 使用 StringIO 作為緩衝區，準備將數據寫入 CSV
    csv_buffer = io.StringIO()
    
    # 將特徵矩陣轉為 float32 格式並展開為列表
    features = [
        t.toarray().astype('float32').flatten().tolist() for t in X_train
    ]
    
    # 如果是測試數據，僅包含特徵
    if is_test:
        temp_list = features
    else:
        # 非測試數據則在最前列插入標籤
        temp_list = np.insert(features, 0, y_train['label'], axis=1)
    
    # 將數據寫入緩衝區並指定分隔符為逗號
    np.savetxt(csv_buffer, temp_list, delimiter=',')
    
    # 將緩衝區的內容寫入指定的本地文件
    file_path = os.path.join(folder, filename)
    with open(file_path, 'w') as file:
        file.write(csv_buffer.getvalue())
    
    print(f"文件已儲存至本地：{file_path}")

# 使用範例
save_to_local_csv(
    'batch-in.csv', 
    './batch-in', 
    X_train=test_matrix, 
    y_train=test, 
    is_test=True
)

文件已儲存至本地：./batch-in/batch-in.csv


In [14]:
# 設定本地路徑和文件名稱
# 使用本地資料夾作為儲存目標
bucket = './local_data'
prefix = 'lab41'
train_file = 'train-pass1.csv'
validate_file = 'validate-pass1.csv'
test_file = 'test-pass1.csv'

# 完整的文件路徑範例
train_path = os.path.join(bucket, prefix, train_file)
validate_path = os.path.join(bucket, prefix, validate_file)
test_path = os.path.join(bucket, prefix, test_file)

# 確保資料夾存在
os.makedirs(os.path.join(bucket, prefix), exist_ok=True)


In [15]:
# 調用本地存儲函數來保存數據集
save_to_local_csv(train_file, 'train', train_matrix, train)
save_to_local_csv(validate_file, 'validate', validate_matrix, validate)
save_to_local_csv(test_file, 'test', test_matrix, test, is_test=True)

文件已儲存至本地：train/train-pass1.csv
文件已儲存至本地：validate/validate-pass1.csv
文件已儲存至本地：test/test-pass1.csv


In [19]:
import xgboost as xgb
import os

# 設置本地輸出路徑
output_dir = os.path.join(os.getcwd(), 'output')

# 檢查 output_dir 是否存在，如果存在則刪除該目錄或文件
if os.path.exists(output_dir):
    # 如果存在，且是文件，則刪除該文件
    if os.path.isfile(output_dir):
        print(f"檢測到 '{output_dir}' 存在為文件，刪除該文件...")
        os.remove(output_dir)
    # 如果存在，且是目錄，則刪除該目錄及其內容
    elif os.path.isdir(output_dir):
        print(f"檢測到 '{output_dir}' 存在為目錄，刪除該目錄及其內容...")
        import shutil
        shutil.rmtree(output_dir)

# 建立 output 目錄
os.makedirs(output_dir, exist_ok=True)
print(f"目錄 '{output_dir}' 已成功建立，準備儲存文件。")

# 設定超參數
hyperparams = {
    "num_round": 42,
    "eval_metric": "error",
    "objective": "binary:logistic",
    "silent": 1
}

# 準備訓練數據和驗證數據
# 假設 train_matrix, validate_matrix 和 train, validate 中包含了訓練和驗證數據
dtrain = xgb.DMatrix(train_matrix, label=train['label'])
dvalidate = xgb.DMatrix(validate_matrix, label=validate['label'])

# 執行 XGBoost 訓練並使用早停法則
evals = [(dtrain, 'train'), (dvalidate, 'validate')]
xgb_model = xgb.train(
    params=hyperparams,
    dtrain=dtrain,
    num_boost_round=hyperparams["num_round"],
    evals=evals,
    early_stopping_rounds=10
)

# 儲存模型到本地
model_path = os.path.join(output_dir, 'xgboost_model.bin')
xgb_model.save_model(model_path)
print(f"模型已儲存至 {model_path}")

檢測到 '/Volumes/SSD_01/00_課程講義專用/AWS_2024/01_課程筆記/00_Lab/94020/Lab 4.1_Implementing Sentiment Analysis/02_exLab41_完整/output' 存在為文件，刪除該文件...
目錄 '/Volumes/SSD_01/00_課程講義專用/AWS_2024/01_課程筆記/00_Lab/94020/Lab 4.1_Implementing Sentiment Analysis/02_exLab41_完整/output' 已成功建立，準備儲存文件。
[0]	train-error:0.28790	validate-error:0.28940
[1]	train-error:0.26698	validate-error:0.27240
[2]	train-error:0.25250	validate-error:0.26080
[3]	train-error:0.24463	validate-error:0.25120
[4]	train-error:0.23950	validate-error:0.24480
[5]	train-error:0.22737	validate-error:0.23740
[6]	train-error:0.22428	validate-error:0.23580
[7]	train-error:0.21752	validate-error:0.23060
[8]	train-error:0.20877	validate-error:0.22200
[9]	train-error:0.20563	validate-error:0.22080
[10]	train-error:0.20128	validate-error:0.21760
[11]	train-error:0.19578	validate-error:0.21600
[12]	train-error:0.19207	validate-error:0.21120
[13]	train-error:0.18707	validate-error:0.20660
[14]	train-error:0.18495	validate-error:0.20540
[15]	train-error

Parameters: { "num_round", "silent" } are not used.



[25]	train-error:0.15302	validate-error:0.18960
[26]	train-error:0.15093	validate-error:0.18780
[27]	train-error:0.14868	validate-error:0.18620
[28]	train-error:0.14647	validate-error:0.18620
[29]	train-error:0.14402	validate-error:0.18740
[30]	train-error:0.14142	validate-error:0.18880
[31]	train-error:0.14030	validate-error:0.18580
[32]	train-error:0.13905	validate-error:0.18580
[33]	train-error:0.13787	validate-error:0.18620
[34]	train-error:0.13650	validate-error:0.18680
[35]	train-error:0.13448	validate-error:0.18540
[36]	train-error:0.13230	validate-error:0.18560
[37]	train-error:0.13065	validate-error:0.18420
[38]	train-error:0.12905	validate-error:0.18440
[39]	train-error:0.12850	validate-error:0.18340
[40]	train-error:0.12773	validate-error:0.18260
[41]	train-error:0.12652	validate-error:0.18220
模型已儲存至 /Volumes/SSD_01/00_課程講義專用/AWS_2024/01_課程筆記/00_Lab/94020/Lab 4.1_Implementing Sentiment Analysis/02_exLab41_完整/output/xgboost_model.bin




In [20]:
import os

# 設定本地數據路徑
local_data_dir = os.path.join(os.getcwd(), 'data')

# 確認路徑中訓練和驗證數據是否存在
train_file_path = os.path.join(local_data_dir, 'train', train_file)
validate_file_path = os.path.join(local_data_dir, 'validate', validate_file)

# 建立訓練和驗證數據通道的字典，指向本地路徑
data_channels = {
    'train': train_file_path,
    'validation': validate_file_path
}

print(f"本地訓練數據路徑: {train_file_path}")
print(f"本地驗證數據路徑: {validate_file_path}")


本地訓練數據路徑: /Volumes/SSD_01/00_課程講義專用/AWS_2024/01_課程筆記/00_Lab/94020/Lab 4.1_Implementing Sentiment Analysis/02_exLab41_完整/data/train/train-pass1.csv
本地驗證數據路徑: /Volumes/SSD_01/00_課程講義專用/AWS_2024/01_課程筆記/00_Lab/94020/Lab 4.1_Implementing Sentiment Analysis/02_exLab41_完整/data/validate/validate-pass1.csv


In [23]:
import pandas as pd
import os

def save_to_csv_local(filename, data_matrix, labels, output_dir='output'):
    """
    儲存數據矩陣和標籤至本地的 CSV 文件。
    """
    os.makedirs(output_dir, exist_ok=True)
    filepath = os.path.join(output_dir, filename)
    # 將數據和標籤合併成 DataFrame
    data_df = pd.DataFrame(data_matrix)
    data_df['label'] = labels
    # 將 DataFrame 存儲為 CSV
    data_df.to_csv(filepath, index=False)
    print(f"數據已儲存至 {filepath}")

# 使用上述函數保存數據
save_to_csv_local('train-pass1.csv', train_matrix, train['label'], output_dir)
save_to_csv_local('validate-pass1.csv', validate_matrix, validate['label'], output_dir)


數據已儲存至 /Volumes/SSD_01/00_課程講義專用/AWS_2024/01_課程筆記/00_Lab/94020/Lab 4.1_Implementing Sentiment Analysis/02_exLab41_完整/output/train-pass1.csv
數據已儲存至 /Volumes/SSD_01/00_課程講義專用/AWS_2024/01_課程筆記/00_Lab/94020/Lab 4.1_Implementing Sentiment Analysis/02_exLab41_完整/output/validate-pass1.csv


In [26]:
from datetime import datetime
import xgboost as xgb
import pandas as pd
import os

# 設定文件路徑
output_dir = 'output'
train_file = 'train-pass1.csv'
validate_file = 'validate-pass1.csv'

# 設定超參數
hyperparams = {
    "num_round": 42,
    "eval_metric": "error",
    "objective": "binary:logistic",
    "silent": 1
}

# 讀取數據
train_df = pd.read_csv(os.path.join(output_dir, train_file))
validate_df = pd.read_csv(os.path.join(output_dir, validate_file))

# 確保所有特徵欄位都是數值型
train_df = train_df.apply(pd.to_numeric, errors='coerce').fillna(0)
validate_df = validate_df.apply(pd.to_numeric, errors='coerce').fillna(0)

# 將數據轉換為 DMatrix 格式
dtrain = xgb.DMatrix(train_df.drop('label', axis=1), label=train_df['label'])
dvalidate = xgb.DMatrix(validate_df.drop('label', axis=1), label=validate_df['label'])

# 設置評估數據
evals = [(dtrain, 'train'), (dvalidate, 'validate')]

# 執行訓練並使用早停法則
print("開始訓練...")
xgb_model = xgb.train(
    params=hyperparams,
    dtrain=dtrain,
    num_boost_round=hyperparams["num_round"],
    evals=evals,
    early_stopping_rounds=10
)
print("訓練完成")

# 保存模型至本地
model_path = os.path.join(output_dir, f'xgboost_model_{datetime.now().strftime("%m-%d-%Y-%H-%M-%S")}.bin')
xgb_model.save_model(model_path)
print(f"模型已儲存至 {model_path}")


開始訓練...
[0]	train-error:0.40187	validate-error:0.05220
[1]	train-error:0.40187	validate-error:0.05220
[2]	train-error:0.40187	validate-error:0.05220
[3]	train-error:0.40187	validate-error:0.05220
[4]	train-error:0.40187	validate-error:0.05220
[5]	train-error:0.40187	validate-error:0.05220
[6]	train-error:0.40187	validate-error:0.05220
[7]	train-error:0.40187	validate-error:0.05220
[8]	train-error:0.40187	validate-error:0.05220
[9]	train-error:0.40187	validate-error:0.05220
訓練完成
模型已儲存至 output/xgboost_model_11-14-2024-17-41-03.bin


Parameters: { "num_round", "silent" } are not used.



In [27]:
from datetime import datetime
import time
import xgboost as xgb
import pandas as pd
import os

# 設定文件路徑
output_dir = 'output'
train_file = 'train-pass1.csv'
validate_file = 'validate-pass1.csv'

# 設定超參數
hyperparams = {
    "num_round": 42,
    "eval_metric": "error",
    "objective": "binary:logistic",
    "silent": 1
}

# 讀取數據
train_df = pd.read_csv(os.path.join(output_dir, train_file))
validate_df = pd.read_csv(os.path.join(output_dir, validate_file))

# 確保所有特徵欄位都是數值型
train_df = train_df.apply(pd.to_numeric, errors='coerce').fillna(0)
validate_df = validate_df.apply(pd.to_numeric, errors='coerce').fillna(0)

# 將數據轉換為 DMatrix 格式
dtrain = xgb.DMatrix(train_df.drop('label', axis=1), label=train_df['label'])
dvalidate = xgb.DMatrix(validate_df.drop('label', axis=1), label=validate_df['label'])

# 設置評估數據
evals = [(dtrain, 'train'), (dvalidate, 'validate')]

# 執行訓練並使用早停法則，並計時
print("開始訓練...")
start_time = time.time()
xgb_model = xgb.train(
    params=hyperparams,
    dtrain=dtrain,
    num_boost_round=hyperparams["num_round"],
    evals=evals,
    early_stopping_rounds=10
)
end_time = time.time()
print("訓練完成")

# 計算和顯示訓練時間
elapsed_time = end_time - start_time
print(f"訓練時間: {elapsed_time:.2f} 秒")

# 保存模型至本地
model_path = os.path.join(output_dir, f'xgboost_model_{datetime.now().strftime("%m-%d-%Y-%H-%M-%S")}.bin')
xgb_model.save_model(model_path)
print(f"模型已儲存至 {model_path}")


開始訓練...
[0]	train-error:0.40187	validate-error:0.05220
[1]	train-error:0.40187	validate-error:0.05220
[2]	train-error:0.40187	validate-error:0.05220
[3]	train-error:0.40187	validate-error:0.05220
[4]	train-error:0.40187	validate-error:0.05220
[5]	train-error:0.40187	validate-error:0.05220
[6]	train-error:0.40187	validate-error:0.05220
[7]	train-error:0.40187	validate-error:0.05220
[8]	train-error:0.40187	validate-error:0.05220
[9]	train-error:0.40187	validate-error:0.05220
[10]	train-error:0.40187	validate-error:0.05220
訓練完成
訓練時間: 0.01 秒
模型已儲存至 output/xgboost_model_11-14-2024-17-42-37.bin


Parameters: { "num_round", "silent" } are not used.



In [31]:
import pandas as pd
from datetime import datetime
import xgboost as xgb
import os

# 設定文件路徑
output_dir = 'output'
train_file = 'train-pass1.csv'
validate_file = 'validate-pass1.csv'

# 設定超參數
hyperparams = {
    "eval_metric": "error",
    "objective": "binary:logistic",
    "silent": 1
}
num_boost_round = 42  # 設定迭代次數

# 讀取數據
train_df = pd.read_csv(os.path.join(output_dir, train_file))
validate_df = pd.read_csv(os.path.join(output_dir, validate_file))

# 確保所有特徵欄位都是數值型
train_df = train_df.apply(pd.to_numeric, errors='coerce').fillna(0)
validate_df = validate_df.apply(pd.to_numeric, errors='coerce').fillna(0)

# 將數據轉換為 DMatrix 格式
dtrain = xgb.DMatrix(train_df.drop('label', axis=1), label=train_df['label'])
dvalidate = xgb.DMatrix(validate_df.drop('label', axis=1), label=validate_df['label'])

# 設置評估數據
evals = [(dtrain, 'train'), (dvalidate, 'validate')]

# 訓練過程中的評估結果將儲存在 `eval_result` 字典中
eval_result = {}

# 執行訓練並使用早停法則
print("開始訓練...")
xgb_model = xgb.train(
    params=hyperparams,
    dtrain=dtrain,
    num_boost_round=num_boost_round,
    evals=evals,
    early_stopping_rounds=10,
    evals_result=eval_result
)
print("訓練完成")

# 提取訓練和驗證的錯誤率數據
train_error = eval_result['train']['error']
validate_error = eval_result['validate']['error']

# 調整 timestamp 和 metric_name 列以匹配數據長度
df_train = pd.DataFrame({
    'timestamp': range(len(train_error)),
    'metric_name': ['train:error'] * len(train_error),
    'value': train_error
})

df_validate = pd.DataFrame({
    'timestamp': range(len(validate_error)),
    'metric_name': ['validation:error'] * len(validate_error),
    'value': validate_error
})

# 合併訓練和驗證的數據
df_analytics = pd.concat([df_train, df_validate], ignore_index=True)

# 顯示 DataFrame 中的錯誤率，方便進行分析
print("訓練和驗證錯誤率分析：")
print(df_analytics)


開始訓練...
[0]	train-error:0.40187	validate-error:0.05220
[1]	train-error:0.40187	validate-error:0.05220
[2]	train-error:0.40187	validate-error:0.05220
[3]	train-error:0.40187	validate-error:0.05220
[4]	train-error:0.40187	validate-error:0.05220
[5]	train-error:0.40187	validate-error:0.05220
[6]	train-error:0.40187	validate-error:0.05220
[7]	train-error:0.40187	validate-error:0.05220
[8]	train-error:0.40187	validate-error:0.05220
[9]	train-error:0.40187	validate-error:0.05220
[10]	train-error:0.40187	validate-error:0.05220
訓練完成
訓練和驗證錯誤率分析：
    timestamp       metric_name     value
0           0       train:error  0.401875
1           1       train:error  0.401875
2           2       train:error  0.401875
3           3       train:error  0.401875
4           4       train:error  0.401875
5           5       train:error  0.401875
6           6       train:error  0.401875
7           7       train:error  0.401875
8           8       train:error  0.401875
9           9       train:error  0.40

Parameters: { "silent" } are not used.



In [33]:
import pandas as pd
from datetime import datetime
import xgboost as xgb
import os

# 設定文件路徑
output_dir = 'output'
train_file = 'train-pass1.csv'
validate_file = 'validate-pass1.csv'

# 設定超參數
hyperparams = {
    "eval_metric": "error",
    "objective": "binary:logistic",
    "silent": 1
}
num_boost_round = 42  # 設定迭代次數

# 讀取數據
train_df = pd.read_csv(os.path.join(output_dir, train_file))
validate_df = pd.read_csv(os.path.join(output_dir, validate_file))

# 確保所有特徵欄位都是數值型
train_df = train_df.apply(pd.to_numeric, errors='coerce').fillna(0)
validate_df = validate_df.apply(pd.to_numeric, errors='coerce').fillna(0)

# 將數據轉換為 DMatrix 格式
dtrain = xgb.DMatrix(train_df.drop('label', axis=1), label=train_df['label'])
dvalidate = xgb.DMatrix(validate_df.drop('label', axis=1), label=validate_df['label'])

# 設置評估數據
evals = [(dtrain, 'train'), (dvalidate, 'validate')]

# 訓練過程中的評估結果將儲存在 `eval_result` 字典中
eval_result = {}

# 執行訓練並使用早停法則
print("開始訓練...")
xgb_model = xgb.train(
    params=hyperparams,
    dtrain=dtrain,
    num_boost_round=num_boost_round,
    evals=evals,
    early_stopping_rounds=10,
    evals_result=eval_result
)
print("訓練完成")

# 提取最終的訓練和驗證錯誤率數據
final_train_error = eval_result['train']['error'][-1]
final_validate_error = eval_result['validate']['error'][-1]

# 建立 DataFrame 以符合期望的格式
df_analytics = pd.DataFrame({
    'timestamp': [0.0, 0.0],
    'metric_name': ['train:error', 'validation:error'],
    'value': [final_train_error, final_validate_error]
})

# 顯示 DataFrame 中的最終錯誤率，方便進行分析
print("最終訓練和驗證錯誤率分析：")
print(df_analytics)


開始訓練...
[0]	train-error:0.40187	validate-error:0.05220
[1]	train-error:0.40187	validate-error:0.05220
[2]	train-error:0.40187	validate-error:0.05220
[3]	train-error:0.40187	validate-error:0.05220
[4]	train-error:0.40187	validate-error:0.05220
[5]	train-error:0.40187	validate-error:0.05220
[6]	train-error:0.40187	validate-error:0.05220
[7]	train-error:0.40187	validate-error:0.05220
[8]	train-error:0.40187	validate-error:0.05220
[9]	train-error:0.40187	validate-error:0.05220
[10]	train-error:0.40187	validate-error:0.05220
訓練完成
最終訓練和驗證錯誤率分析：
   timestamp       metric_name     value
0        0.0       train:error  0.401875
1        0.0  validation:error  0.052200


Parameters: { "silent" } are not used.

