In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold

import torch
from pytorch_tabnet.pretraining import TabNetPretrainer
from pytorch_tabnet.tab_model import TabNetRegressor

In [2]:
train = pd.read_csv('../data/train.csv')
test = pd.read_csv('../data/test.csv')
sample_submission = pd.read_csv('../data/sample_submission.csv')

In [3]:
#ID 컬럼 분리
train = train.drop(columns=['ID'], axis = 1)
test = test.drop(columns=['ID'], axis = 1)

In [20]:
train.head()

Unnamed: 0,설립연도,국가,분야,투자단계,직원 수,인수여부,상장여부,고객수(백만명),총 투자금(억원),연매출(억원),SNS 팔로워 수(백만명),기업가치(백억원),성공확률
0,2009,CT005,이커머스,Series A,4126.0,No,No,56.0,3365.0,4764.0,4.71,,0.3
1,2023,CT006,핀테크,Seed,4167.0,Yes,No,80.0,4069.0,279.0,1.0,2500-3500,0.8
2,2018,CT007,기술,Series A,3132.0,Yes,Yes,54.0,6453.0,12141.0,4.0,3500-4500,0.5
3,2016,CT006,,Seed,3245.0,Yes,Yes,,665.0,10547.0,2.97,,0.7
4,2020,CT002,에듀테크,Seed,1969.0,No,Yes,94.0,829.0,9810.0,1.0,1500-2500,0.1


In [4]:
import re

def clean_valuation(val):
    if pd.isna(val):
        return None
    if '이상' in val:
        # '6000이상' → 6000
        return int(re.sub('[^0-9]', '', val))
    elif '-' in val:
        # '2500-3500' → 평균값 계산
        low, high = map(int, val.split('-'))
        return (low + high) / 2
    else:
        # 숫자로 변환 가능한 경우
        try:
            return float(val)
        except:
            return None


In [None]:
# 설립연도 -> 연차로 변경
current_year = 2025

train['연차'] = current_year - train['설립연도']
test['연차'] = current_year - test['설립연도']

# 설립연도 제거
train.drop(columns = ['설립연도'], inplace = True)
test.drop(columns = ['설립연도'], inplace = True)

category_features = ['국가','분야']
numeric_features = ['연차', '투자단계', '직원 수','고객수(백만명)','총 투자금(억원)','연매출(억원)','SNS 팔로워 수(백만명)', '기업가치(백억원)']
bool_features = ['인수여부','상장여부']

# 투자단계 순서를 숫자로 매핑
investment_stage_map = {
    'Seed': 0,
    'Series A': 1,
    'Series B': 2,
    'Series C': 3,
    'IPO': 4,
    'Missing': -1
}

# 결측치 먼저 처리 후 매핑
train['투자단계'] = train['투자단계'].fillna('Missing').map(investment_stage_map)
test['투자단계'] = test['투자단계'].fillna('Missing').map(investment_stage_map)

# 기업가치 변환
train['기업가치(백억원)'] = train['기업가치(백억원)'].apply(clean_valuation)
test['기업가치(백억원)'] = test['기업가치(백억원)'].apply(clean_valuation)

# LabelEncoder 객체를 각 범주형 feature별로 따로 저장하여 사용
encoders = {}

# 범주형 데이터를 encoding
for feature in category_features:
    encoders[feature] = LabelEncoder()
    train[feature] = train[feature].fillna('Missing')
    test[feature] = test[feature].fillna('Missing')
    train[feature] = encoders[feature].fit_transform(train[feature])
    test[feature] = encoders[feature].transform(test[feature])

# 불리언 값을 0과 1로 변환 ('Yes' → 1, 'No' → 0 으로 변환)
bool_map = {'Yes': 1, 'No': 0}

for feature in bool_features:
    train[feature] = train[feature].map(bool_map)
    test[feature] = test[feature].map(bool_map)

# 수치형 변수 결측치를 중간값으로 대체
for feature in numeric_features:
    median_value = train[feature].median()
    train[feature] = train[feature].fillna(median_value)
    test[feature] = test[feature].fillna(median_value)

# TabNet용 범주형 변수 인덱스(cat_idxs) 및 차원(cat_dims) 설정
features = [col for col in train.columns if col != '성공확률']
cat_idxs = [features.index(col) for col in category_features]
cat_dims = [train[col].max() + 1 for col in category_features]

In [11]:
train.head()

Unnamed: 0,국가,분야,투자단계,직원 수,인수여부,상장여부,고객수(백만명),총 투자금(억원),연매출(억원),SNS 팔로워 수(백만명),기업가치(백억원),성공확률,연차
0,4,7,1,4126.0,0,0,56.0,3365.0,4764.0,4.71,4049.984157,0.3,16
1,5,9,0,4167.0,1,0,80.0,4069.0,279.0,1.0,3000.0,0.8,2
2,6,3,1,3132.0,1,1,54.0,6453.0,12141.0,4.0,4000.0,0.5,7
3,5,1,0,3245.0,1,1,49.214332,665.0,10547.0,2.97,4049.984157,0.7,9
4,1,6,0,1969.0,0,1,94.0,829.0,9810.0,1.0,2000.0,0.1,5


In [6]:
# 타겟 지정
target = train['성공확률']  
X = train[features]
y = target

# KFold 설정
N_FOLDS = 10
kf = KFold(n_splits=N_FOLDS, shuffle=True, random_state=42)

models = [] # 모델 저장 리스트
cv_scores = []

for fold, (train_idx, valid_idx) in enumerate(kf.split(X)):
    print(f"\n🔁 Fold {fold+1}/{N_FOLDS}")
    
    X_train = X.iloc[train_idx].values
    y_train = y.iloc[train_idx].values.reshape(-1, 1)
    
    X_valid = X.iloc[valid_idx].values
    y_valid = y.iloc[valid_idx].values.reshape(-1, 1)
    
    # 비지도 사전학습
    print("▶ Pretraining...")

    pretrainer = TabNetPretrainer(
        cat_idxs=cat_idxs,
        cat_dims=cat_dims,
        seed=42,
        verbose=0
    )

    pretrainer.fit(
        X_train=X_train,
        max_epochs=100,
        batch_size=512,
        virtual_batch_size=64
    )


    # 지도 학습 
    print("▶ Fine-tuning...")
    model = TabNetRegressor(
        cat_idxs=cat_idxs,
        cat_dims=cat_dims,
        seed=42,
        verbose=1,       
        optimizer_fn=torch.optim.AdamW,
    )

    model.fit(
        X_train=X_train, y_train=y_train,
        eval_set=[(X_valid, y_valid)],
        from_unsupervised=pretrainer,
        eval_metric=['mae'],
        max_epochs=100,
        patience=10,
    )

    # 모델을 메모리에 저장
    models.append(model)
    cv_scores.append(model.best_cost)

print("\n✅ 모든 fold 모델 학습 완료!")


🔁 Fold 1/10
▶ Pretraining...




▶ Fine-tuning...
epoch 0  | loss: 9.75046 | val_0_mae: 1.88839 |  0:00:00s
epoch 1  | loss: 1.61759 | val_0_mae: 2.18384 |  0:00:00s
epoch 2  | loss: 0.70409 | val_0_mae: 1.40964 |  0:00:00s




epoch 3  | loss: 0.31501 | val_0_mae: 0.94061 |  0:00:00s
epoch 4  | loss: 0.19432 | val_0_mae: 1.16273 |  0:00:00s
epoch 5  | loss: 0.12828 | val_0_mae: 1.22528 |  0:00:00s
epoch 6  | loss: 0.10042 | val_0_mae: 0.93654 |  0:00:00s
epoch 7  | loss: 0.09095 | val_0_mae: 0.84326 |  0:00:00s
epoch 8  | loss: 0.07859 | val_0_mae: 0.74689 |  0:00:00s
epoch 9  | loss: 0.0702  | val_0_mae: 0.64778 |  0:00:00s
epoch 10 | loss: 0.06966 | val_0_mae: 0.61959 |  0:00:00s
epoch 11 | loss: 0.06756 | val_0_mae: 0.53817 |  0:00:00s
epoch 12 | loss: 0.06546 | val_0_mae: 0.44731 |  0:00:00s
epoch 13 | loss: 0.06788 | val_0_mae: 0.44165 |  0:00:00s
epoch 14 | loss: 0.06518 | val_0_mae: 0.41787 |  0:00:00s
epoch 15 | loss: 0.06392 | val_0_mae: 0.38005 |  0:00:00s
epoch 16 | loss: 0.06155 | val_0_mae: 0.36391 |  0:00:00s
epoch 17 | loss: 0.06265 | val_0_mae: 0.34463 |  0:00:00s
epoch 18 | loss: 0.06145 | val_0_mae: 0.33018 |  0:00:00s
epoch 19 | loss: 0.06156 | val_0_mae: 0.30239 |  0:00:00s
epoch 20 | los



▶ Fine-tuning...
epoch 0  | loss: 4.18775 | val_0_mae: 0.98624 |  0:00:00s
epoch 1  | loss: 0.82364 | val_0_mae: 0.67322 |  0:00:00s
epoch 2  | loss: 0.38889 | val_0_mae: 0.76104 |  0:00:00s




epoch 3  | loss: 0.16747 | val_0_mae: 0.39123 |  0:00:00s
epoch 4  | loss: 0.12381 | val_0_mae: 0.43226 |  0:00:00s
epoch 5  | loss: 0.10231 | val_0_mae: 0.30735 |  0:00:00s
epoch 6  | loss: 0.08949 | val_0_mae: 0.30989 |  0:00:00s
epoch 7  | loss: 0.07885 | val_0_mae: 0.27362 |  0:00:00s
epoch 8  | loss: 0.07322 | val_0_mae: 0.24294 |  0:00:00s
epoch 9  | loss: 0.06925 | val_0_mae: 0.23566 |  0:00:00s
epoch 10 | loss: 0.06934 | val_0_mae: 0.23672 |  0:00:00s
epoch 11 | loss: 0.06684 | val_0_mae: 0.23535 |  0:00:00s
epoch 12 | loss: 0.06552 | val_0_mae: 0.23731 |  0:00:00s
epoch 13 | loss: 0.06328 | val_0_mae: 0.21843 |  0:00:00s
epoch 14 | loss: 0.0639  | val_0_mae: 0.21734 |  0:00:00s
epoch 15 | loss: 0.06229 | val_0_mae: 0.22649 |  0:00:00s
epoch 16 | loss: 0.06276 | val_0_mae: 0.21554 |  0:00:00s
epoch 17 | loss: 0.06201 | val_0_mae: 0.21416 |  0:00:00s
epoch 18 | loss: 0.06348 | val_0_mae: 0.21856 |  0:00:00s
epoch 19 | loss: 0.05984 | val_0_mae: 0.22202 |  0:00:00s
epoch 20 | los



▶ Fine-tuning...
epoch 0  | loss: 11.10798| val_0_mae: 1.25535 |  0:00:00s
epoch 1  | loss: 1.65661 | val_0_mae: 0.58924 |  0:00:00s
epoch 2  | loss: 0.43875 | val_0_mae: 0.62683 |  0:00:00s
epoch 3  | loss: 0.22724 | val_0_mae: 0.79719 |  0:00:00s




epoch 4  | loss: 0.14166 | val_0_mae: 0.59279 |  0:00:00s
epoch 5  | loss: 0.10119 | val_0_mae: 0.48227 |  0:00:00s
epoch 6  | loss: 0.09552 | val_0_mae: 0.34663 |  0:00:00s
epoch 7  | loss: 0.07253 | val_0_mae: 0.28092 |  0:00:00s
epoch 8  | loss: 0.07659 | val_0_mae: 0.27806 |  0:00:00s
epoch 9  | loss: 0.06976 | val_0_mae: 0.23746 |  0:00:00s
epoch 10 | loss: 0.06929 | val_0_mae: 0.2537  |  0:00:00s
epoch 11 | loss: 0.06581 | val_0_mae: 0.22823 |  0:00:00s
epoch 12 | loss: 0.06478 | val_0_mae: 0.23972 |  0:00:00s
epoch 13 | loss: 0.06292 | val_0_mae: 0.22152 |  0:00:00s
epoch 14 | loss: 0.06051 | val_0_mae: 0.23276 |  0:00:00s
epoch 15 | loss: 0.06177 | val_0_mae: 0.21741 |  0:00:00s
epoch 16 | loss: 0.06063 | val_0_mae: 0.22927 |  0:00:00s
epoch 17 | loss: 0.06196 | val_0_mae: 0.21485 |  0:00:00s
epoch 18 | loss: 0.06055 | val_0_mae: 0.22251 |  0:00:00s
epoch 19 | loss: 0.0611  | val_0_mae: 0.21204 |  0:00:00s
epoch 20 | loss: 0.05966 | val_0_mae: 0.21603 |  0:00:00s
epoch 21 | los



▶ Fine-tuning...
epoch 0  | loss: 4.54647 | val_0_mae: 0.6813  |  0:00:00s
epoch 1  | loss: 0.63129 | val_0_mae: 0.48261 |  0:00:00s
epoch 2  | loss: 0.23344 | val_0_mae: 0.90957 |  0:00:00s




epoch 3  | loss: 0.14892 | val_0_mae: 0.61017 |  0:00:00s
epoch 4  | loss: 0.10334 | val_0_mae: 0.445   |  0:00:00s
epoch 5  | loss: 0.09102 | val_0_mae: 0.36818 |  0:00:00s
epoch 6  | loss: 0.08382 | val_0_mae: 0.46324 |  0:00:00s
epoch 7  | loss: 0.07508 | val_0_mae: 0.38998 |  0:00:00s
epoch 8  | loss: 0.07185 | val_0_mae: 0.28595 |  0:00:00s
epoch 9  | loss: 0.06895 | val_0_mae: 0.25508 |  0:00:00s
epoch 10 | loss: 0.06794 | val_0_mae: 0.22236 |  0:00:00s
epoch 11 | loss: 0.0691  | val_0_mae: 0.22382 |  0:00:00s
epoch 12 | loss: 0.06603 | val_0_mae: 0.24949 |  0:00:00s
epoch 13 | loss: 0.06621 | val_0_mae: 0.26609 |  0:00:00s
epoch 14 | loss: 0.06416 | val_0_mae: 0.2389  |  0:00:00s
epoch 15 | loss: 0.0649  | val_0_mae: 0.23658 |  0:00:00s
epoch 16 | loss: 0.0643  | val_0_mae: 0.22636 |  0:00:00s
epoch 17 | loss: 0.06344 | val_0_mae: 0.22872 |  0:00:00s
epoch 18 | loss: 0.06179 | val_0_mae: 0.23797 |  0:00:00s
epoch 19 | loss: 0.06206 | val_0_mae: 0.22592 |  0:00:00s
epoch 20 | los



▶ Fine-tuning...
epoch 0  | loss: 10.56116| val_0_mae: 1.33075 |  0:00:00s
epoch 1  | loss: 1.03649 | val_0_mae: 1.04295 |  0:00:00s
epoch 2  | loss: 0.39883 | val_0_mae: 1.04113 |  0:00:00s




epoch 3  | loss: 0.20555 | val_0_mae: 0.73774 |  0:00:00s
epoch 4  | loss: 0.13324 | val_0_mae: 0.45456 |  0:00:00s
epoch 5  | loss: 0.11787 | val_0_mae: 0.49885 |  0:00:00s
epoch 6  | loss: 0.08715 | val_0_mae: 0.3789  |  0:00:00s
epoch 7  | loss: 0.07998 | val_0_mae: 0.37336 |  0:00:00s
epoch 8  | loss: 0.07251 | val_0_mae: 0.38126 |  0:00:00s
epoch 9  | loss: 0.07005 | val_0_mae: 0.29965 |  0:00:00s
epoch 10 | loss: 0.06683 | val_0_mae: 0.27402 |  0:00:00s
epoch 11 | loss: 0.0648  | val_0_mae: 0.23675 |  0:00:00s
epoch 12 | loss: 0.064   | val_0_mae: 0.2266  |  0:00:00s
epoch 13 | loss: 0.06322 | val_0_mae: 0.23718 |  0:00:00s
epoch 14 | loss: 0.06436 | val_0_mae: 0.22618 |  0:00:00s
epoch 15 | loss: 0.06265 | val_0_mae: 0.22964 |  0:00:00s
epoch 16 | loss: 0.06115 | val_0_mae: 0.23788 |  0:00:00s
epoch 17 | loss: 0.06196 | val_0_mae: 0.22802 |  0:00:00s
epoch 18 | loss: 0.05978 | val_0_mae: 0.22858 |  0:00:00s
epoch 19 | loss: 0.06183 | val_0_mae: 0.21601 |  0:00:00s
epoch 20 | los



▶ Fine-tuning...
epoch 0  | loss: 5.78926 | val_0_mae: 1.0516  |  0:00:00s
epoch 1  | loss: 0.60912 | val_0_mae: 0.54361 |  0:00:00s
epoch 2  | loss: 0.23803 | val_0_mae: 0.50783 |  0:00:00s




epoch 3  | loss: 0.13172 | val_0_mae: 0.34487 |  0:00:00s
epoch 4  | loss: 0.09817 | val_0_mae: 0.24505 |  0:00:00s
epoch 5  | loss: 0.08438 | val_0_mae: 0.28956 |  0:00:00s
epoch 6  | loss: 0.07669 | val_0_mae: 0.33858 |  0:00:00s
epoch 7  | loss: 0.07249 | val_0_mae: 0.3186  |  0:00:00s
epoch 8  | loss: 0.06981 | val_0_mae: 0.26283 |  0:00:00s
epoch 9  | loss: 0.0673  | val_0_mae: 0.21953 |  0:00:00s
epoch 10 | loss: 0.06612 | val_0_mae: 0.2103  |  0:00:00s
epoch 11 | loss: 0.06582 | val_0_mae: 0.21738 |  0:00:00s
epoch 12 | loss: 0.06245 | val_0_mae: 0.22444 |  0:00:00s
epoch 13 | loss: 0.06339 | val_0_mae: 0.22816 |  0:00:00s
epoch 14 | loss: 0.0619  | val_0_mae: 0.21869 |  0:00:00s
epoch 15 | loss: 0.06036 | val_0_mae: 0.20863 |  0:00:00s
epoch 16 | loss: 0.06112 | val_0_mae: 0.20516 |  0:00:00s
epoch 17 | loss: 0.06044 | val_0_mae: 0.20392 |  0:00:00s
epoch 18 | loss: 0.05977 | val_0_mae: 0.204   |  0:00:00s
epoch 19 | loss: 0.06033 | val_0_mae: 0.20499 |  0:00:00s
epoch 20 | los



▶ Fine-tuning...
epoch 0  | loss: 10.64314| val_0_mae: 1.44936 |  0:00:00s
epoch 1  | loss: 1.70525 | val_0_mae: 2.17105 |  0:00:00s
epoch 2  | loss: 0.64996 | val_0_mae: 2.03985 |  0:00:00s




epoch 3  | loss: 0.35942 | val_0_mae: 1.17896 |  0:00:00s
epoch 4  | loss: 0.18008 | val_0_mae: 0.88454 |  0:00:00s
epoch 5  | loss: 0.14692 | val_0_mae: 0.78485 |  0:00:00s
epoch 6  | loss: 0.10794 | val_0_mae: 0.65372 |  0:00:00s
epoch 7  | loss: 0.10042 | val_0_mae: 0.62188 |  0:00:00s
epoch 8  | loss: 0.0879  | val_0_mae: 0.60838 |  0:00:00s
epoch 9  | loss: 0.08276 | val_0_mae: 0.53846 |  0:00:00s
epoch 10 | loss: 0.08057 | val_0_mae: 0.49264 |  0:00:00s
epoch 11 | loss: 0.07249 | val_0_mae: 0.45871 |  0:00:00s
epoch 12 | loss: 0.07449 | val_0_mae: 0.37725 |  0:00:00s
epoch 13 | loss: 0.07075 | val_0_mae: 0.39097 |  0:00:00s
epoch 14 | loss: 0.06576 | val_0_mae: 0.39674 |  0:00:00s
epoch 15 | loss: 0.06821 | val_0_mae: 0.36741 |  0:00:00s
epoch 16 | loss: 0.06582 | val_0_mae: 0.33916 |  0:00:00s
epoch 17 | loss: 0.06456 | val_0_mae: 0.32681 |  0:00:00s
epoch 18 | loss: 0.06638 | val_0_mae: 0.30052 |  0:00:00s
epoch 19 | loss: 0.06479 | val_0_mae: 0.30613 |  0:00:00s
epoch 20 | los



▶ Fine-tuning...
epoch 0  | loss: 21.63481| val_0_mae: 1.0781  |  0:00:00s
epoch 1  | loss: 3.09279 | val_0_mae: 0.77832 |  0:00:00s
epoch 2  | loss: 0.96579 | val_0_mae: 0.94559 |  0:00:00s




epoch 3  | loss: 0.52167 | val_0_mae: 0.75642 |  0:00:00s
epoch 4  | loss: 0.23327 | val_0_mae: 0.6464  |  0:00:00s
epoch 5  | loss: 0.1796  | val_0_mae: 0.56044 |  0:00:00s
epoch 6  | loss: 0.13187 | val_0_mae: 0.39991 |  0:00:00s
epoch 7  | loss: 0.11129 | val_0_mae: 0.43814 |  0:00:00s
epoch 8  | loss: 0.09356 | val_0_mae: 0.29762 |  0:00:00s
epoch 9  | loss: 0.08548 | val_0_mae: 0.30705 |  0:00:00s
epoch 10 | loss: 0.07912 | val_0_mae: 0.27592 |  0:00:00s
epoch 11 | loss: 0.07355 | val_0_mae: 0.27796 |  0:00:00s
epoch 12 | loss: 0.07419 | val_0_mae: 0.22442 |  0:00:00s
epoch 13 | loss: 0.07264 | val_0_mae: 0.24702 |  0:00:00s
epoch 14 | loss: 0.06857 | val_0_mae: 0.23935 |  0:00:00s
epoch 15 | loss: 0.0684  | val_0_mae: 0.22726 |  0:00:00s
epoch 16 | loss: 0.06633 | val_0_mae: 0.22324 |  0:00:00s
epoch 17 | loss: 0.06619 | val_0_mae: 0.22367 |  0:00:00s
epoch 18 | loss: 0.06401 | val_0_mae: 0.22105 |  0:00:00s
epoch 19 | loss: 0.06501 | val_0_mae: 0.22392 |  0:00:00s
epoch 20 | los



▶ Fine-tuning...
epoch 0  | loss: 15.7881 | val_0_mae: 1.90869 |  0:00:00s
epoch 1  | loss: 2.05157 | val_0_mae: 0.89672 |  0:00:00s
epoch 2  | loss: 0.55127 | val_0_mae: 0.66361 |  0:00:00s




epoch 3  | loss: 0.27287 | val_0_mae: 0.50429 |  0:00:00s
epoch 4  | loss: 0.14112 | val_0_mae: 0.44656 |  0:00:00s
epoch 5  | loss: 0.10665 | val_0_mae: 0.34357 |  0:00:00s
epoch 6  | loss: 0.08628 | val_0_mae: 0.23959 |  0:00:00s
epoch 7  | loss: 0.07728 | val_0_mae: 0.25752 |  0:00:00s
epoch 8  | loss: 0.07184 | val_0_mae: 0.24558 |  0:00:00s
epoch 9  | loss: 0.07168 | val_0_mae: 0.22289 |  0:00:00s
epoch 10 | loss: 0.06634 | val_0_mae: 0.2394  |  0:00:00s
epoch 11 | loss: 0.06364 | val_0_mae: 0.24462 |  0:00:00s
epoch 12 | loss: 0.06418 | val_0_mae: 0.22706 |  0:00:00s
epoch 13 | loss: 0.06242 | val_0_mae: 0.21251 |  0:00:00s
epoch 14 | loss: 0.06142 | val_0_mae: 0.21476 |  0:00:00s
epoch 15 | loss: 0.06157 | val_0_mae: 0.21294 |  0:00:00s
epoch 16 | loss: 0.06135 | val_0_mae: 0.21452 |  0:00:00s
epoch 17 | loss: 0.06097 | val_0_mae: 0.2132  |  0:00:00s
epoch 18 | loss: 0.06089 | val_0_mae: 0.20859 |  0:00:00s
epoch 19 | loss: 0.06202 | val_0_mae: 0.21147 |  0:00:00s
epoch 20 | los



▶ Fine-tuning...
epoch 0  | loss: 3.45799 | val_0_mae: 0.52669 |  0:00:00s
epoch 1  | loss: 0.41193 | val_0_mae: 0.43101 |  0:00:00s
epoch 2  | loss: 0.20684 | val_0_mae: 0.47479 |  0:00:00s




epoch 3  | loss: 0.12299 | val_0_mae: 0.28763 |  0:00:00s
epoch 4  | loss: 0.10577 | val_0_mae: 0.25542 |  0:00:00s
epoch 5  | loss: 0.08835 | val_0_mae: 0.24428 |  0:00:00s
epoch 6  | loss: 0.07895 | val_0_mae: 0.23209 |  0:00:00s
epoch 7  | loss: 0.07471 | val_0_mae: 0.22482 |  0:00:00s
epoch 8  | loss: 0.06727 | val_0_mae: 0.21613 |  0:00:00s
epoch 9  | loss: 0.06767 | val_0_mae: 0.22197 |  0:00:00s
epoch 10 | loss: 0.06579 | val_0_mae: 0.22368 |  0:00:00s
epoch 11 | loss: 0.06378 | val_0_mae: 0.21551 |  0:00:00s
epoch 12 | loss: 0.06067 | val_0_mae: 0.21389 |  0:00:00s
epoch 13 | loss: 0.06246 | val_0_mae: 0.21541 |  0:00:00s
epoch 14 | loss: 0.06144 | val_0_mae: 0.2126  |  0:00:00s
epoch 15 | loss: 0.06078 | val_0_mae: 0.21452 |  0:00:00s
epoch 16 | loss: 0.06194 | val_0_mae: 0.21255 |  0:00:00s
epoch 17 | loss: 0.05932 | val_0_mae: 0.21313 |  0:00:00s
epoch 18 | loss: 0.0602  | val_0_mae: 0.21003 |  0:00:00s
epoch 19 | loss: 0.06151 | val_0_mae: 0.21084 |  0:00:00s
epoch 20 | los



In [7]:
# 저장된 모델들로 예측
predictions_list = []

for fold, model in enumerate(models):
    print(f"Predict with fold {fold+1}")
    preds = model.predict(test[features].values)
    predictions_list.append(preds)

# 평균 예측
final_predictions = np.mean(predictions_list, axis=0)

Predict with fold 1
Predict with fold 2
Predict with fold 3
Predict with fold 4
Predict with fold 5
Predict with fold 6
Predict with fold 7
Predict with fold 8
Predict with fold 9
Predict with fold 10


In [8]:
sample_submission['성공확률'] = final_predictions
sample_submission.to_csv('./baseline_submission5.csv', index = False, encoding = 'utf-8-sig')

In [35]:
import pandas as pd

# 모델에서 중요도 추출 (예: 첫 번째 fold 모델 사용)
importances = models[0].feature_importances_

# 컬럼명과 매핑
feature_importance_df = pd.DataFrame({
    'Feature': features,
    'Importance': importances
})

# 중요도 기준 정렬
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

# 결과 출력
print(feature_importance_df)


           Feature  Importance
5             상장여부    0.516250
4             인수여부    0.244577
11              연차    0.136009
3             직원 수    0.022742
1               분야    0.021302
10       기업가치(백억원)    0.020834
9   SNS 팔로워 수(백만명)    0.016876
8          연매출(억원)    0.014887
2             투자단계    0.003031
6         고객수(백만명)    0.001531
7        총 투자금(억원)    0.000988
0               국가    0.000975
