## 코드

# Import

In [1]:
import pandas as pd
import random
import os
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(42) # Seed 고정

# Data Load

In [2]:
train_df = pd.read_csv('train.csv')

train_x = train_df.filter(regex='X') # Input : X Featrue
train_y = train_df.filter(regex='Y') # Output : Y Feature

# Regression & Inference

In [4]:
FR = RandomForestRegressor(max_depth=2, random_state=42)
FR.fit(train_x, train_y)

RandomForestRegressor(max_depth=2, random_state=42)

In [5]:
test_x = pd.read_csv('./test.csv').drop(columns=['ID'])
preds = FR.predict(test_x)
preds

array([[  1.36807091,   1.08921468,   1.02141383, ..., -26.16794281,
        -26.16349399, -26.17733352],
       [  1.36795283,   1.0886605 ,   1.02148679, ..., -26.16977317,
        -26.16534062, -26.17912012],
       [  1.39458788,   1.07238221,   0.98944761, ..., -25.83402913,
        -25.82582706, -25.84149071],
       ...,
       [  1.33129234,   1.01718132,   1.00717878, ..., -26.36466787,
        -26.36182815, -26.37118334],
       [  1.33129234,   1.01718132,   1.00717878, ..., -26.36466787,
        -26.36182815, -26.37118334],
       [  1.33129234,   1.01718132,   1.00717878, ..., -26.36466787,
        -26.36182815, -26.37118334]])

# Evaluation

In [7]:
X_train, X_test, y_train, y_test = train_test_split(train_x, train_y, train_size=0.8, test_size=0.2, random_state=42)

y_pred = FR.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

r2 = r2_score(y_test,y_pred)
print(f'Test MSE: ${mse:,.0f}')
print(f'R2 Score: {r2:,.4f}\n')

Test MSE: $1
R2 Score: 0.0228



# Submit

In [8]:
submit = pd.read_csv('./sample_submission.csv')

In [10]:
for idx, col in enumerate(submit.columns):
    if col=='ID':
        continue
    submit[col] = preds[:,idx-1]
print('Done.')

Done.


In [11]:
submit.to_csv('./submit_RandomForest.csv', index=False)