In [2]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split

df = pd.read_csv('../assets/dataset.csv')

features = [
    'surface_type', 
    'liquid_type', 
    'diameter',
    'height',
    'fall_point_type',
    'time',
]
labels = ['area', 'circumstance', 'circularity', 'finger_num']

for label in labels:
    X = df[features]
    y = df[label]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
    
    train_data = lgb.Dataset(X_train, label=y_train)
    test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)
    
    params = {
        'objective': 'regression',  
        'metric': 'rmse',          
        'boosting_type': 'gbdt',  
        'learning_rate': 0.1,
        'num_leaves': 31,
        'max_depth': -1,
        'verbose': -1
    }
    
    model = lgb.train(
        params,
        train_data,
        valid_sets=[train_data, test_data],
        num_boost_round=100,         
    )
    
    model.save_model(f'../model/baseline/model_{label}.txt')