In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from lightgbm import LGBMClassifier
from sklearn.metrics import f1_score

# 步驟一：加載和分割數據
train_val_data = pd.read_csv('data\spaceship_titanic_fe_test.csv')
test_data = pd.read_csv('spaceship_titanic_fe_test.csv')

# 假設最後一列是目標變量
X = train_val_data.iloc[:, :-1]
y = train_val_data.iloc[:, -1]

X_train, X_validation, y_train, y_validation = train_test_split(X, y, test_size=0.2, random_state=42)

# 步驟二：調整超參數
params = {
    'n_estimators': [30, 50, 100, 200, 300, 400, 500, 600, 700],
    'max_depth': [3, 4, 5, 6, 7, 8],
    'colsample_bytree': [0.7, 0.8, 0.9, 1],
    'num_leaves': [21, 31, 41, 51, 61, 71, 81],
}

lgbm = LGBMClassifier()
grid = GridSearchCV(lgbm, params, cv=5, scoring='f1')
grid.fit(X_train, y_train)

# 步驟三：訓練模型
best_params = grid.best_params_
model = LGBMClassifier(**best_params)
model.fit(X_train, y_train)

# 模型評估
predictions = model.predict(X_validation)
f1 = f1_score(y_validation, predictions)
print(f'F1 Score: {f1}')
