In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib
from sklearn.impute import SimpleImputer


In [None]:
# 讀取上傳的文件
'''
north_data = pd.read_csv('D:/DS_Prediction/Weather/north_weekly_averages.csv')
south_data = pd.read_csv('D:/DS_Prediction/Weather/south_weekly_averages.csv')
central_data = pd.read_csv('D:/DS_Prediction/Weather/central_weekly_averages.csv')
east_data = pd.read_csv('D:/DS_Prediction/Weather/east_weekly_averages.csv')
fuel_prices = pd.read_csv('D:/DS_Prediction/fuel_prices.csv')
cabbage_prices = pd.read_csv('D:/DS_Prediction/Domestic_Cabbage.csv')
'''

north_data = pd.read_csv('C:/Users/$EKH000-V5FHVTC5DRPM/DS_Prediction/weather-csv/north_weekly_averages.csv')
south_data = pd.read_csv('C:/Users/$EKH000-V5FHVTC5DRPM/DS_Prediction/weather-csv/south_weekly_averages.csv')
central_data = pd.read_csv('C:/Users/$EKH000-V5FHVTC5DRPM/DS_Prediction/weather-csv/central_weekly_averages.csv')
east_data = pd.read_csv('C:/Users/$EKH000-V5FHVTC5DRPM/DS_Prediction/weather-csv/east_weekly_averages.csv')
fuel_prices = pd.read_csv('C:/Users/$EKH000-V5FHVTC5DRPM/DS_Prediction/fuel_prices.csv')
cabbage_prices = pd.read_csv('C:/Users/$EKH000-V5FHVTC5DRPM/DS_Prediction/vegetable-csv/Domestic_Cabbage.csv')


In [None]:
## 
''' 
Data Processing

'''
## 

# Merge regional data into a single DataFrame
regional_data_1 = pd.concat([north_data, south_data, central_data, east_data], ignore_index=True)


# Check and rename date columns if necessary
def ensure_date_column(df, possible_names):
    for col in df.columns:
        if col in possible_names:
            df.rename(columns={col: 'date'}, inplace=True)
            break
    return df

# Rename the date columns where applicable
regional_data = ensure_date_column(regional_data_1, ['週', 'date'])
fuel_prices = ensure_date_column(fuel_prices, ['Date', 'date', '週', '日期'])
cabbage_prices = ensure_date_column(cabbage_prices, ['週', 'date'])

# Convert date columns to datetime
def parse_date(df, column_name):
    if column_name in df.columns:
        df[column_name] = pd.to_datetime(df[column_name], errors='coerce', utc=True)
    return df

regional_data = parse_date(regional_data, 'date')
fuel_prices = parse_date(fuel_prices, 'date')
cabbage_prices = parse_date(cabbage_prices, 'date')

# Drop rows with missing or invalid 'date' values
for df in [regional_data, fuel_prices, cabbage_prices]:
    if 'date' in df.columns:
        df.dropna(subset=['date'], inplace=True)

# Ensure 'date' columns are consistent and datetimelike
for df in [regional_data, fuel_prices, cabbage_prices]:
    if 'date' in df.columns:
        df['date'] = pd.to_datetime(df['date']).dt.normalize()

# Extract additional features from date
def extract_date_features(df, date_column):
    df['year'] = df[date_column].dt.year
    df['month'] = df[date_column].dt.month
    df['week'] = df[date_column].dt.day // 7
    return df

cabbage_prices = extract_date_features(cabbage_prices, 'date')
fuel_prices = extract_date_features(fuel_prices, 'date')

# Merge all the data into a single DataFrame
try:
    merged_data = pd.merge(cabbage_prices, regional_data, on='date', how='left')
    merged_data = pd.merge(merged_data, fuel_prices, on=['year', 'month', 'week'], how='left')
except KeyError as e:
    raise KeyError(f"Error during merging: {e}. Please check that all dataframes contain a 'date' column.")

# 根據 Group 分組計算均值並填補缺失值
merged_data['Fuel_92'] = merged_data.groupby('month')['Fuel_92'].transform(lambda x: x.fillna(x.mean()))
merged_data['Fuel_95'] = merged_data.groupby('month')['Fuel_95'].transform(lambda x: x.fillna(x.mean()))
merged_data['Fuel_High'] = merged_data.groupby('month')['Fuel_High'].transform(lambda x: x.fillna(x.mean()))

# Handle missing values
merged_data.fillna(method='ffill', inplace=True)

# Prepare features and target variable
y = merged_data[['平均價', '交易量']]
X = merged_data.drop(columns=['date_x', '平均價', '交易量', 'year', 'month', 'week', 'date_y'])

# Save the merged DataFrame to a CSV file
output_file = "All_X.csv"
X.to_csv(output_file, index=False, encoding='utf-8-sig')

print(f"Merged data saved to {output_file}")

print(merged_data)

Merged data saved to Train_X.csv
                       date_x    平均價       交易量  year  month  week  平均氣壓(hPa)  \
0   2019-01-01 00:00:00+00:00  22.22   4487.67  2019      1     0    1019.18   
1   2019-01-01 00:00:00+00:00  22.22   4487.67  2019      1     0    1019.90   
2   2019-01-01 00:00:00+00:00  22.22   4487.67  2019      1     0    1020.22   
3   2019-01-01 00:00:00+00:00  22.22   4487.67  2019      1     0    1017.62   
4   2019-01-08 00:00:00+00:00  20.40   4323.00  2019      1     1    1016.19   
..                        ...    ...       ...   ...    ...   ...        ...   
922 2024-11-12 00:00:00+00:00  47.13  14259.17  2024     11     1    1006.90   
923 2024-11-25 00:00:00+00:00  40.70  17676.00  2024     11     3    1015.60   
924 2024-11-25 00:00:00+00:00  40.70  17676.00  2024     11     3    1014.70   
925 2024-11-25 00:00:00+00:00  40.70  17676.00  2024     11     3    1016.55   
926 2024-11-25 00:00:00+00:00  40.70  17676.00  2024     11     3    1013.20   

     平

  df[column_name] = pd.to_datetime(df[column_name], errors='coerce', utc=True)
  merged_data.fillna(method='ffill', inplace=True)


# XGBOOST

In [None]:
## 
''' 
Build and Train the Model

'''
## 
# Split the data into training and testing sets
X_train = pd.read_csv('train.csv').drop(columns=['date', 'cabbage_平均價', 'cauliflower_平均價', 'chinese_cabbage_平均價'])
y_train = pd.read_csv('train.csv')[['cabbage_平均價', 'cauliflower_平均價', 'chinese_cabbage_平均價']]
print(X_train.head(2))
X_test = pd.read_csv('test.csv').drop(columns=['date', 'cabbage_平均價', 'cauliflower_平均價', 'chinese_cabbage_平均價'])
y_test = pd.read_csv('test.csv')[['cabbage_平均價', 'cauliflower_平均價', 'chinese_cabbage_平均價']]
# Set up the parameter grid for GridSearchCV
param_grid = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'n_estimators': [100, 200, 300],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}

# Set up the XGBoost model
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', verbose=0, n_jobs=-1)

   north_平均氣壓(hPa)  north_平均氣溫(℃)  north_平均相對溼度( %)  north_平均風速(m/s)  \
0          1002.21          27.77             79.43             2.24   
1          1013.04          20.47             89.71             5.01   

   north_累計雨量(mm)  north_累積日射量(MJ/m2)  south_平均氣壓(hPa)  south_平均氣溫(℃)  \
0            3.36               20.65          1001.39          27.34   
1            1.43                9.22          1013.21          22.69   

   south_平均相對溼度( %)  south_平均風速(m/s)  ...  east_平均相對溼度( %)  east_平均風速(m/s)  \
0             80.50             1.16  ...            85.86            1.47   
1             82.86             1.24  ...            82.14            1.74   

   east_累計雨量(mm)  east_累積日射量(MJ/m2)    Fuel_92    Fuel_95  Fuel_High  \
0          14.93              19.62  26.858286  28.358286  24.541714   
1           0.00              15.61  29.100000  30.600000  26.900000   

   cabbage_交易量  caulifower_交易量  chinese_cabbage_交易量  
0      5017.00         8161.25             10080.50  
1  

## Cabbage

In [17]:
## 
''' 
Prediction and Evaluation

'''
## 
# Fit the model using GridSearchCV
X_train_C = X_train.copy()
X_test_C = X_test.copy()
grid_search.fit(X_train_C.drop(columns=['caulifower_交易量', 'chinese_cabbage_交易量']), y_train['cabbage_平均價'])

# Get the best estimator and parameters
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_
print(f'Best Parameters: {best_params}')

# Make predictions
y_pred = best_model.predict(X_test_C.drop(columns=['caulifower_交易量', 'chinese_cabbage_交易量']))

# Evaluate the model
rmse = mean_squared_error(y_test['cabbage_平均價'], y_pred, squared=False)
mae = mean_absolute_error(y_test['cabbage_平均價'], y_pred)
r2 = r2_score(y_test['cabbage_平均價'], y_pred)
print(f'Root Mean Squared Error: {rmse}')
print(f'Mean Absolute Error: {mae}')
print(f'R2 Score: {r2}')

Best Parameters: {'colsample_bytree': 1.0, 'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 100, 'subsample': 1.0}
Root Mean Squared Error: 10.38609171147781
Mean Absolute Error: 8.218775018964497
R2 Score: 0.6889750735839266




## Cauliflower

In [None]:
## 
''' 
Prediction and Evaluation

'''
## 
# Fit the model using GridSearchCV
X_train_Cau = X_train.copy()
X_test_Cau = X_test.copy()
grid_search.fit(X_train_Cau.drop(columns=['cabbage_交易量', 'chinese_cabbage_交易量']), y_train['cauliflower_平均價'])

# Get the best estimator and parameters
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_
print(f'Best Parameters: {best_params}')

# Make predictions
y_pred = best_model.predict(X_test_Cau.drop(columns=['cabbage_交易量', 'chinese_cabbage_交易量']))

# Evaluate the model
rmse = mean_squared_error(y_test['cauliflower_平均價'], y_pred, squared=False)
mae = mean_absolute_error(y_test['cauliflower_平均價'], y_pred)
r2 = r2_score(y_test['cauliflower_平均價'], y_pred)
print(f'Root Mean Squared Error: {rmse}')
print(f'Mean Absolute Error: {mae}')
print(f'R2 Score: {r2}')

Best Parameters: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Root Mean Squared Error: 8.58890521141602
Mean Absolute Error: 5.994199945359004
R2 Score: 0.7396820586313532




## Chinese_Cabbage

In [22]:
## 
''' 
Prediction and Evaluation

'''
## 
# Fit the model using GridSearchCV
X_train_Chinese = X_train.copy()
X_test_Chinese = X_test.copy()
grid_search.fit(X_train_Chinese.drop(columns=['cabbage_交易量', 'caulifower_交易量']), y_train['chinese_cabbage_平均價'])

# Get the best estimator and parameters
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_
print(f'Best Parameters: {best_params}')

# Make predictions
y_pred = best_model.predict(X_test_Chinese.drop(columns=['cabbage_交易量', 'caulifower_交易量']))

# Evaluate the model
rmse = mean_squared_error(y_test['chinese_cabbage_平均價'], y_pred, squared=False)
mae = mean_absolute_error(y_test['chinese_cabbage_平均價'], y_pred)
r2 = r2_score(y_test['chinese_cabbage_平均價'], y_pred)
print(f'Root Mean Squared Error: {rmse}')
print(f'Mean Absolute Error: {mae}')
print(f'R2 Score: {r2}')

Best Parameters: {'colsample_bytree': 1.0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 300, 'subsample': 0.8}
Root Mean Squared Error: 9.670579204993857
Mean Absolute Error: 7.3700291097731805
R2 Score: 0.4662623479577098




# SVR

In [30]:
## 
''' 
Build and Train the Model

'''
## 
import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.impute import SimpleImputer
import joblib

# Split the data into training and testing sets
X_train = pd.read_csv('train.csv').drop(columns=['date', 'cabbage_平均價', 'cauliflower_平均價', 'chinese_cabbage_平均價'])
y_train = pd.read_csv('train.csv')[['cabbage_平均價', 'cauliflower_平均價', 'chinese_cabbage_平均價']]
print(X_train.head(2))
X_test = pd.read_csv('test.csv').drop(columns=['date', 'cabbage_平均價', 'cauliflower_平均價', 'chinese_cabbage_平均價'])
y_test = pd.read_csv('test.csv')[['cabbage_平均價', 'cauliflower_平均價', 'chinese_cabbage_平均價']]

param_grid = {
    'C': [0.01, 0.1, 1, 10, 100, 1000],
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1],
    'kernel': ['rbf'],
    'degree': [2, 3, 4],  # Only applicable for 'poly' kernel
    'epsilon': [0.01, 0.1, 0.2, 0.5, 1.0]
}


# Set up the SVR model
svr_model = SVR()

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=svr_model, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', verbose=0, n_jobs=-1)

   north_平均氣壓(hPa)  north_平均氣溫(℃)  north_平均相對溼度( %)  north_平均風速(m/s)  \
0          1002.21          27.77             79.43             2.24   
1          1013.04          20.47             89.71             5.01   

   north_累計雨量(mm)  north_累積日射量(MJ/m2)  south_平均氣壓(hPa)  south_平均氣溫(℃)  \
0            3.36               20.65          1001.39          27.34   
1            1.43                9.22          1013.21          22.69   

   south_平均相對溼度( %)  south_平均風速(m/s)  ...  east_平均相對溼度( %)  east_平均風速(m/s)  \
0             80.50             1.16  ...            85.86            1.47   
1             82.86             1.24  ...            82.14            1.74   

   east_累計雨量(mm)  east_累積日射量(MJ/m2)    Fuel_92    Fuel_95  Fuel_High  \
0          14.93              19.62  26.858286  28.358286  24.541714   
1           0.00              15.61  29.100000  30.600000  26.900000   

   cabbage_交易量  caulifower_交易量  chinese_cabbage_交易量  
0      5017.00         8161.25             10080.50  
1  

## Cabbage

In [31]:
# Fit the model using GridSearchCV
X_train_C = X_train.copy()
X_test_C = X_test.copy()
grid_search.fit(X_train_C.drop(columns=['caulifower_交易量', 'chinese_cabbage_交易量']), y_train['cabbage_平均價'])

# Get the best estimator and parameters
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_
print(f'Best Parameters: {best_params}')

# Make predictions
y_pred = best_model.predict(X_test_C.drop(columns=['caulifower_交易量', 'chinese_cabbage_交易量']))

# Evaluate the model
rmse = mean_squared_error(y_test['cabbage_平均價'], y_pred, squared=False)
mae = mean_absolute_error(y_test['cabbage_平均價'], y_pred)
r2 = r2_score(y_test['cabbage_平均價'], y_pred)
print(f'Root Mean Squared Error: {rmse}')
print(f'Mean Absolute Error: {mae}')
print(f'R2 Score: {r2}')

Best Parameters: {'C': 100, 'degree': 2, 'epsilon': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Root Mean Squared Error: 15.02186305769243
Mean Absolute Error: 10.479030938722607
R2 Score: 0.34936340465699645




## Cauliflower

In [32]:
# Fit the model using GridSearchCV
X_train_Cau = X_train.copy()
X_test_Cau = X_test.copy()
grid_search.fit(X_train_Cau.drop(columns=['cabbage_交易量', 'chinese_cabbage_交易量']), y_train['cauliflower_平均價'])

# Get the best estimator and parameters
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_
print(f'Best Parameters: {best_params}')

# Make predictions
y_pred = best_model.predict(X_test_Cau.drop(columns=['cabbage_交易量', 'chinese_cabbage_交易量']))

# Evaluate the model
rmse = mean_squared_error(y_test['cauliflower_平均價'], y_pred, squared=False)
mae = mean_absolute_error(y_test['cauliflower_平均價'], y_pred)
r2 = r2_score(y_test['cauliflower_平均價'], y_pred)
print(f'Root Mean Squared Error: {rmse}')
print(f'Mean Absolute Error: {mae}')
print(f'R2 Score: {r2}')

Best Parameters: {'C': 1000, 'degree': 2, 'epsilon': 1.0, 'gamma': 'scale', 'kernel': 'rbf'}
Root Mean Squared Error: 9.046500145611084
Mean Absolute Error: 7.599290887598884
R2 Score: 0.7112049995673857




## Chinese_Cabbage

In [33]:
# Fit the model using GridSearchCV
X_train_Cau = X_train.copy()
X_test_Cau = X_test.copy()
grid_search.fit(X_train_Cau.drop(columns=['cabbage_交易量', 'caulifower_交易量']), y_train['chinese_cabbage_平均價'])

# Get the best estimator and parameters
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_
print(f'Best Parameters: {best_params}')

# Make predictions
y_pred = best_model.predict(X_test_Cau.drop(columns=['cabbage_交易量', 'caulifower_交易量']))

# Evaluate the model
rmse = mean_squared_error(y_test['chinese_cabbage_平均價'], y_pred, squared=False)
mae = mean_absolute_error(y_test['chinese_cabbage_平均價'], y_pred)
r2 = r2_score(y_test['chinese_cabbage_平均價'], y_pred)
print(f'Root Mean Squared Error: {rmse}')
print(f'Mean Absolute Error: {mae}')
print(f'R2 Score: {r2}')

Best Parameters: {'C': 10, 'degree': 2, 'epsilon': 0.01, 'gamma': 'scale', 'kernel': 'rbf'}
Root Mean Squared Error: 9.984689047648873
Mean Absolute Error: 7.633159698941175
R2 Score: 0.4310266085939495


  _data = np.array(data, dtype=dtype, copy=copy,


# CNN+Transfermor

In [5]:
## 
''' 
Build and Train the Model

'''
## 
import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.impute import SimpleImputer
import joblib
from tensorflow.keras import layers, models, optimizers
import numpy as np

# Split the data into training and testing sets
X_train = pd.read_csv('train.csv').drop(columns=['date', 'cabbage_平均價', 'cauliflower_平均價', 'chinese_cabbage_平均價'])
y_train = pd.read_csv('train.csv')[['cabbage_平均價', 'cauliflower_平均價', 'chinese_cabbage_平均價']]
X_test = pd.read_csv('test.csv').drop(columns=['date', 'cabbage_平均價', 'cauliflower_平均價', 'chinese_cabbage_平均價'])
y_test = pd.read_csv('test.csv')[['cabbage_平均價', 'cauliflower_平均價', 'chinese_cabbage_平均價']]

def build_simplified_model(input_shape):
    inputs = layers.Input(shape=input_shape)
    
    # CNN 层
    x = layers.Conv1D(64, 5, activation='relu', padding='same')(inputs)
    x = layers.MaxPooling1D(pool_size=2)(x)  # 池化操作
    x = layers.Conv1D(64, 5, activation='relu', strides=2, padding='same')(x)
    x = layers.MaxPooling1D(pool_size=2)(x)  # 池化操作
    x = layers.Conv1D(128, 3, activation='relu', strides=2, padding='same')(x)
    x = layers.MaxPooling1D(pool_size=2)(x)
    
    # Transformer-like Dense Layer
    transformer_dim = 128
    x = layers.TimeDistributed(layers.Dense(transformer_dim))(x)
    
    # Flatten 层：将卷积输出展平成一维
    x = layers.Flatten()(x)
    
    # 全连接层
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dense(1)(x)  # 输出预测值

    # 创建模型
    model = models.Model(inputs=inputs, outputs=x)
    
    return model

## Cabbage

In [12]:
X_train_C = X_train.copy()
X_train_C = X_train_C.drop(columns=['caulifower_交易量', 'chinese_cabbage_交易量']).values
X_test_C = X_test.copy()
X_test_C = X_test_C.drop(columns=['caulifower_交易量', 'chinese_cabbage_交易量']).values

# 重塑输入形状以适应CNN
X_train_C = X_train_C.reshape(-1, X_train_C.shape[1], 1)
X_test_C = X_test_C.reshape(-1, X_test_C.shape[1], 1)

# X_train_C = X_train_C.reshape(-1, 28, 1)
# X_test_C = X_test_C.reshape(-1, 28, 1)

# 创建模型并编译
model = build_simplified_model((X_train_C.shape[1], 1))
# model = build_simplified_model((28, 1))
optimizer = optimizers.Adam(learning_rate=1e-3)

model.compile(optimizer=optimizer, loss='mean_squared_error')

# 打印模型概述
model.summary()

# 训练模型
model.fit(X_train_C, y_train['cabbage_平均價'], epochs=200, batch_size=32, validation_data=(X_test_C, y_test['cabbage_平均價']))

# 预测
y_pred = model.predict(X_test_C)

# 评估模型
rmse = mean_squared_error(y_test['cabbage_平均價'], y_pred, squared=False)
mae = mean_absolute_error(y_test['cabbage_平均價'], y_pred)
r2 = r2_score(y_test['cabbage_平均價'], y_pred)

print(f"Root Mean Squared Error: {rmse}")
print(f"Mean Absolute Error: {mae}")
print(f"R2 Score: {r2}")


Epoch 1/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 45ms/step - loss: 4457.7427 - val_loss: 1046.3912
Epoch 2/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 867.6656 - val_loss: 344.7148
Epoch 3/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 300.2794 - val_loss: 250.4181
Epoch 4/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 227.3802 - val_loss: 215.6487
Epoch 5/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 187.2579 - val_loss: 233.8371
Epoch 6/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 150.1849 - val_loss: 203.4561
Epoch 7/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 158.9400 - val_loss: 215.3537
Epoch 8/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 124.9519 - val_loss: 195.7888
Epoch 9/200
[1m6/6[0



## Cauliflower

In [14]:
X_train_C = X_train.copy()
X_train_C = X_train_C.drop(columns=['cabbage_交易量', 'chinese_cabbage_交易量']).values
X_test_C = X_test.copy()
X_test_C = X_test_C.drop(columns=['cabbage_交易量', 'chinese_cabbage_交易量']).values

# 重塑输入形状以适应CNN
X_train_C = X_train_C.reshape(-1, X_train_C.shape[1], 1)
X_test_C = X_test_C.reshape(-1, X_test_C.shape[1], 1)

# 创建模型并编译
model = build_simplified_model((X_train_C.shape[1], 1))
optimizer = optimizers.Adam(learning_rate=1e-3)

model.compile(optimizer=optimizer, loss='mean_squared_error')

# 打印模型概述
model.summary()

# 训练模型
model.fit(X_train_C, y_train['cauliflower_平均價'], epochs=200, batch_size=32, validation_data=(X_test_C, y_test['cauliflower_平均價']))

# 预测
y_pred = model.predict(X_test_C)

# 评估模型
rmse = mean_squared_error(y_test['cauliflower_平均價'], y_pred, squared=False)
mae = mean_absolute_error(y_test['cauliflower_平均價'], y_pred)
r2 = r2_score(y_test['cauliflower_平均價'], y_pred)

print(f"Root Mean Squared Error: {rmse}")
print(f"Mean Absolute Error: {mae}")
print(f"R2 Score: {r2}")


Epoch 1/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 45ms/step - loss: 60126.2695 - val_loss: 5785.7510
Epoch 2/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 6832.5044 - val_loss: 297.2374
Epoch 3/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 1652.7461 - val_loss: 1403.8584
Epoch 4/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 1273.9196 - val_loss: 92.8739
Epoch 5/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 323.8263 - val_loss: 342.9808
Epoch 6/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 417.0980 - val_loss: 77.7903
Epoch 7/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 168.9726 - val_loss: 120.2205
Epoch 8/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 146.1339 - val_loss: 89.6451
Epoch 9/200
[1m6/6



## Chinese_Cabbage

In [15]:
X_train_C = X_train.copy()
X_train_C = X_train_C.drop(columns=['cabbage_交易量', 'caulifower_交易量']).values
X_test_C = X_test.copy()
X_test_C = X_test_C.drop(columns=['cabbage_交易量', 'caulifower_交易量']).values

# 重塑输入形状以适应CNN
X_train_C = X_train_C.reshape(-1, X_train_C.shape[1], 1)
X_test_C = X_test_C.reshape(-1, X_test_C.shape[1], 1)

# 创建模型并编译
model = build_simplified_model((X_train_C.shape[1], 1))
optimizer = optimizers.Adam(learning_rate=1e-3)

model.compile(optimizer=optimizer, loss='mean_squared_error')

# 打印模型概述
model.summary()

# 训练模型
model.fit(X_train_C, y_train['chinese_cabbage_平均價'], epochs=200, batch_size=32, validation_data=(X_test_C, y_test['chinese_cabbage_平均價']))

# 预测
y_pred = model.predict(X_test_C)

# 评估模型
rmse = mean_squared_error(y_test['chinese_cabbage_平均價'], y_pred, squared=False)
mae = mean_absolute_error(y_test['chinese_cabbage_平均價'], y_pred)
r2 = r2_score(y_test['chinese_cabbage_平均價'], y_pred)

print(f"Root Mean Squared Error: {rmse}")
print(f"Mean Absolute Error: {mae}")
print(f"R2 Score: {r2}")


Epoch 1/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 44ms/step - loss: 11989.4980 - val_loss: 2349.9470
Epoch 2/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 1475.7764 - val_loss: 427.8645
Epoch 3/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 339.9246 - val_loss: 312.7151
Epoch 4/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 214.2870 - val_loss: 242.5308
Epoch 5/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 191.9510 - val_loss: 174.5446
Epoch 6/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 151.9243 - val_loss: 135.0159
Epoch 7/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 118.7592 - val_loss: 141.5480
Epoch 8/200
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 119.5449 - val_loss: 103.8513
Epoch 9/200
[1m6/6

