In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import root_mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.linear_model import Ridge
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import optuna
import lightgbm as lgb
import xgboost as xgb
import catboost as cb
from optuna.trial import Trial
from sklearn.model_selection import KFold
import time, pickle, os


warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['figure.figsize'] = (21, 9)
plt.rcParams['figure.dpi'] = 300
plt.style.use('dark_background')

In [2]:
COLUMNS=[
    'manufacturer',
    'model',
    'vehicle_condition',
    'battery_capacity',
    'drivetrain',
    'mileage',
    'warranty_period',
    'accident_history',
    'year_of_manufacture',
]
SEED = 42

In [3]:
train = pd.read_csv('./data/train.csv')
train = train.drop(columns=['ID'])
train.columns = COLUMNS + ['y']
train.manufacturer = train.manufacturer.str.replace('사', '_corp')

test = pd.read_csv('./data/test.csv')
test = test.drop(columns=['ID'])
test.columns = COLUMNS
test_X = test
test_X.manufacturer = test_X.manufacturer.str.replace('사', '_corp')

In [4]:
import autogluon
from autogluon.tabular import TabularPredictor
tune_kwargs = {
    'searcher': 'random',
    'num_trials': 7,
    'scheduler' : 'local'
}
predictor = TabularPredictor(label='y', eval_metric='rmse').fit(train_data=train,
    presets=["best_quality"],
    hyperparameter_tune_kwargs=tune_kwargs,
    num_bag_folds=20,
    refit_full=True,
    time_limit=43200)

2025-01-28 00:25:44,721	INFO timeout.py:54 -- Reached timeout of 292.83759806553525 seconds. Stopping all trials.
2025-01-28 00:25:44,753	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to 'C:/Users/gus47/Desktop/workspace/dacon/ev-price-prediction/AutogluonModels/ag-20250127_103007/models/NeuralNetTorch_r89_BAG_L1' in 0.0180s.
- 4b3c4_00000: FileNotFoundError('Could not fetch metrics for 4b3c4_00000: both result.json and progress.csv were not found at C:/Users/gus47/Desktop/workspace/dacon/ev-price-prediction/AutogluonModels/ag-20250127_103007/models/NeuralNetTorch_r89_BAG_L1/4b3c4_00000')
- 4b3c4_00001: FileNotFoundError('Could not fetch metrics for 4b3c4_00001: both result.json and progress.csv were not found at C:/Users/gus47/Desktop/workspace/dacon/ev-price-prediction/AutogluonModels/ag-20250127_103007/models/NeuralNetTorch_r89_BAG_L1/4b3c4_00001')
- 4b3c4_00002: FileNotFoundError('Could not fetch metrics for 4b3c4_00002: both result.json and

In [7]:
predictor.predict(test)

0      130.498093
1       80.094032
2       64.828552
3       34.806995
4       47.993629
          ...    
841    151.511444
842     38.948040
843     38.960117
844     58.820107
845     22.489162
Name: y, Length: 846, dtype: float32

In [8]:
submission = pd.read_csv('./data/sample_submission.csv')
submission['가격(백만원)'] = submission['가격(백만원)'] = predictor.predict(test).values
submission.to_csv('submission.csv', index=False)

In [10]:
predictor.leaderboard(train, silent=True)

Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,ExtraTrees_r197_BAG_L1_FULL,-0.538237,,root_mean_squared_error,0.300999,0.265000,0.618446,0.300999,0.265000,0.618446,1,True,249
1,ExtraTreesMSE_BAG_L1_FULL,-0.538237,,root_mean_squared_error,0.314001,0.318998,0.601003,0.314001,0.318998,0.601003,1,True,154
2,ExtraTreesMSE_BAG_L1,-0.538237,-1.441919,root_mean_squared_error,0.319999,0.318998,1.133000,0.319999,0.318998,1.133000,1,True,19
3,ExtraTrees_r197_BAG_L1,-0.538237,-1.441919,root_mean_squared_error,0.321999,0.265000,1.071452,0.321999,0.265000,1.071452,1,True,114
4,ExtraTrees_r42_BAG_L1_FULL,-0.539318,,root_mean_squared_error,0.334003,0.312998,0.600002,0.334003,0.312998,0.600002,1,True,178
...,...,...,...,...,...,...,...,...,...,...,...,...,...
265,NeuralNetFastAI_r102_BAG_L1_FULL,-8.404357,,root_mean_squared_error,0.054998,,1.158562,0.054998,,1.158562,1,True,180
266,NeuralNetFastAI_r100_BAG_L1_FULL,-9.021433,,root_mean_squared_error,0.056998,,4.387404,0.056998,,4.387404,1,True,259
267,NeuralNetFastAI_r100_BAG_L1,-13.248940,-13.547300,root_mean_squared_error,0.263999,0.133988,17.180936,0.263999,0.133988,17.180936,1,True,124
268,KNeighborsUnif_BAG_L1_FULL,-31.971832,,root_mean_squared_error,0.035002,0.026001,0.009000,0.035002,0.026001,0.009000,1,True,136
