# LightGBM 방식

In [4]:
import lightgbm as lgb
from lightgbm.callback import early_stopping
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

# Load Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# For binary classification (use only two classes for simplicity in this example)
X = X[y != 2]
y = y[y != 2]

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create LightGBM Dataset
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

# Define parameters
params = {
    "boosting_type": "gbdt",
    "objective": "binary",
    "metric": "binary_logloss",
    "device": "gpu",  # Use GPU for acceleration
}

# Add early stopping as a callback
callbacks = [early_stopping(stopping_rounds=10)]

# Train LightGBM model
model = lgb.train(
    params,
    train_data,
    num_boost_round=100,
    valid_sets=[train_data, test_data],
    callbacks=callbacks,  # Use callbacks for early stopping
)

# Make predictions
y_pred = model.predict(X_test)
y_pred_binary = np.round(y_pred)  # Convert probabilities to binary predictions

# Evaluate model
accuracy = accuracy_score(y_test, y_pred_binary)
print(f"Accuracy: {accuracy:.4f}")

# CatBoost GPU

In [8]:
import seaborn as sns
import pandas as pd
from catboost import CatBoostRegressor, Pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load the Seaborn tips dataset
tips = sns.load_dataset('tips')

# Convert categorical columns to string for CatBoost compatibility
tips['sex'] = tips['sex'].astype(str)
tips['smoker'] = tips['smoker'].astype(str)
tips['day'] = tips['day'].astype(str)
tips['time'] = tips['time'].astype(str)

# Define features and target
X = tips.drop(columns=['total_bill'])
y = tips['total_bill']

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Identify categorical columns for CatBoost
categorical_features = ['sex', 'smoker', 'day', 'time']

# Create Pool objects for CatBoost
train_pool = Pool(data=X_train, label=y_train, cat_features=categorical_features)
test_pool = Pool(data=X_test, label=y_test, cat_features=categorical_features)

# Initialize the CatBoostRegressor with GPU settings
model = CatBoostRegressor(
    iterations=500,
    learning_rate=0.1,
    depth=6,
    loss_function='RMSE',
    task_type="GPU",  # Use GPU
    devices='0'       # Specify GPU device
)

# Train the model
model.fit(train_pool, eval_set=test_pool, verbose=50, early_stopping_rounds=10)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate model performance
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f"RMSE: {rmse:.4f}")

0:	learn: 8.3682067	test: 9.1091529	best: 9.1091529 (0)	total: 4.27s	remaining: 35m 30s
50:	learn: 5.0242698	test: 6.2593988	best: 6.2593988 (50)	total: 4.85s	remaining: 42.7s
bestTest = 6.175403441
bestIteration = 71
Shrink model to first 72 iterations.
RMSE: 6.1754


# XGBooost GPU

In [14]:
import xgboost as xgb
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd

# Load the Seaborn tips dataset
tips = sns.load_dataset('tips')

# Preprocess the dataset
# Convert categorical variables into dummy/one-hot encoding
tips = pd.get_dummies(tips, columns=['sex', 'smoker', 'day', 'time'], drop_first=True)

# Define features and target
X = tips.drop(columns=['total_bill'])
y = tips['total_bill']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert the data into DMatrix format, which is optimized for XGBoost
train_data = xgb.DMatrix(X_train, label=y_train)
test_data = xgb.DMatrix(X_test, label=y_test)

# Define GPU parameters for XGBoost
params = {
    "objective": "reg:squarederror",  # Regression objective
    "eval_metric": "rmse",           # Evaluation metric
    "tree_method": "hist",       # Use GPU for histogram building
    "device" : "cuda",
    "learning_rate": 0.1,
    "max_depth": 6
}

# Train the XGBoost model
model = xgb.train(
    params,
    train_data,
    num_boost_round=200,
    evals=[(train_data, "train"), (test_data, "test")],
    early_stopping_rounds=10,
    verbose_eval=10
)

# Make predictions
y_pred = model.predict(test_data)

# Evaluate model performance
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f"RMSE: {rmse:.4f}")

[0]	train-rmse:8.16107	test-rmse:9.02373
[10]	train-rmse:4.75365	test-rmse:7.12777
[20]	train-rmse:3.46133	test-rmse:6.63339
[30]	train-rmse:2.83794	test-rmse:6.62786
[37]	train-rmse:2.62125	test-rmse:6.60553
RMSE: 6.6142
