In [None]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from catboost import CatBoostRegressor
from catboost import Pool


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train = pd.read_csv('/kaggle/input/playground-series-s5e1/train.csv')
test = pd.read_csv('/kaggle/input/playground-series-s5e1/test.csv')
print(train.head(2))

In [None]:
train = train.dropna()
X = train.drop(columns=['num_sold', 'id']).astype('str').astype("category")
y = train['num_sold']

x_test = test.drop(columns=['id']).astype('str').astype("category")
train_pool = Pool(data=X, label=y, cat_features=X.columns.values)

In [None]:
import optuna
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error

def objective(trial):
    param = {
        'depth': trial.suggest_int('depth', 4, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 10),
        'iterations': trial.suggest_int('iterations', 500, 5000),
        'verbose': 0
    }
    model = CatBoostRegressor(**param)
    model.fit(train_pool)
    preds = model.predict(X)
    return mean_squared_error(y, preds)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)
print(f"Best Param: {study.best_params}")


In [None]:
best_param = study.best_params

model = CatBoostRegressor(
    **best_param
)

In [None]:
model.fit(train_pool)

In [None]:
test_pool = Pool(data=x_test, cat_features=x_test.columns.values)

In [None]:
prediction = model.predict(test_pool)

In [None]:
df_results = pd.DataFrame({
    'id': test['id'],
    'num_sold': prediction
})
# 保存为 CSV 文件
df_results.to_csv('submission.csv', index=False) 