## 코드

# Import

In [13]:
import pandas as pd
import random
import os
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_val_score
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(42) # Seed 고정

# Data Load

In [14]:
train_df = pd.read_csv('train.csv')

train_x = train_df.filter(regex='X') # Input : X Featrue
train_y = train_df.filter(regex='Y') # Output : Y Feature

# Regression & Inference

In [46]:
HR = HistGradientBoostingRegressor(random_state=42,learning_rate = 0.1, max_depth = 7)
preds = []
test_x = pd.read_csv('./test.csv').drop(columns=['ID'])
for i in range(1,15):
    if i<10:
        train_y = train_df.filter(regex='Y_0'+str(i))
        HR.fit(train_x, train_y.values.ravel())
        arr = HR.predict(test_x)
        preds.append(arr)
    else :
        train_y = train_df.filter(regex='Y_'+str(i))
        HR.fit(train_x, train_y.values.ravel())
        arr = HR.predict(test_x)
        preds.append(arr)

# Evaluation

In [47]:
X_train, X_test, y_train, y_test = train_test_split(train_x, train_y, train_size=0.8, test_size=0.2, random_state=42)

y_pred = HR.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

r2 = r2_score(y_test,y_pred)

print(cross_val_score(HR, train_x, train_y.values.ravel(), cv=3))
print(f'Test MSE: ${mse:,.0f}')
print(f'R2 Score: {r2:,.4f}\n')

[-0.04959872  0.06764595  0.06276484]
Test MSE: $0
R2 Score: 0.1835



# Submit

In [48]:
submit = pd.read_csv('./sample_submission.csv')

In [49]:
for idx, col in enumerate(submit.columns):
    if col=='ID':
        continue
    submit[col] = preds[:][idx-1]
print('Done.')

Done.


In [50]:
submit.to_csv('./submit_Hist.csv', index=False)