## 코드

# Import

In [33]:
import numpy as np
import pandas as pd
import random
import os
import matplotlib.pyplot as plt

from sklearn.linear_model import SGDRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(42) # Seed 고정

# Data Load

In [34]:
train_df = pd.read_csv('train.csv')

train_x = train_df.filter(regex='X') # Input : X Featrue
train_y = train_df.filter(regex='Y') # Output : Y Feature

# Regression & Inference

In [35]:
preds = []
test_x = pd.read_csv('./test.csv').drop(columns=['ID'])
for i in range(1,15):
    if i<10:
        train_y = train_df.filter(regex='Y_0'+str(i))
        SR = make_pipeline(StandardScaler(),SGDRegressor(max_iter=1000, tol=1e-3))
        SR.fit(train_x, train_y.values.ravel())
        arr = SR.predict(test_x)
        preds.append(arr)
    else :
        train_y = train_df.filter(regex='Y_'+str(i))
        SR = make_pipeline(StandardScaler(),SGDRegressor(max_iter=1000, tol=1e-3))
        SR.fit(train_x, train_y.values.ravel())
        arr = SR.predict(test_x)
        preds.append(arr)
    

In [36]:
preds

[array([1.44345741, 1.57457528, 1.32480386, ..., 1.24771913, 1.27172852,
        1.38218959]),
 array([1.2576785 , 1.1629118 , 1.13842339, ..., 0.96031803, 0.99527808,
        0.99892523]),
 array([1.05595553, 1.18702934, 0.947257  , ..., 0.95619116, 1.01987343,
        1.03802634]),
 array([12.80204271, 13.54413303, 14.50307678, ..., 14.1727126 ,
        13.37551225, 13.54925948]),
 array([30.86025811, 31.00757802, 31.87987967, ..., 32.30045824,
        31.67258646, 31.63702079]),
 array([16.13998403, 16.551481  , 16.05579274, ..., 17.14385128,
        17.15660718, 16.89974107]),
 array([3.17388731, 3.20024581, 2.95225441, ..., 3.11022713, 3.13433289,
        3.12857869]),
 array([-26.11228254, -25.98280612, -26.08991736, ..., -26.57514968,
        -26.55235671, -26.52832296]),
 array([-26.12237262, -26.19340473, -25.93681306, ..., -26.48439353,
        -26.49306265, -26.41619555]),
 array([-22.11699697, -22.19914951, -22.45836762, ..., -22.56077207,
        -22.59579568, -22.59533826

# Evaluation

In [37]:
X_train, X_test, y_train, y_test = train_test_split(train_x, train_y, train_size=0.8, test_size=0.2, random_state=42)

y_pred = SR.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

r2 = r2_score(y_test,y_pred)
print(f'Test MSE: ${mse:,.0f}')
print(f'R2 Score: {r2:,.4f}\n')

Test MSE: $0
R2 Score: 0.0370



# Submit

In [38]:
submit = pd.read_csv('./sample_submission.csv')

In [39]:
for idx, col in enumerate(submit.columns):
    if col=='ID':
        continue
    submit[col] = preds[:][idx-1]
print('Done.')

Done.


In [40]:
submit.to_csv('./submit_SGD.csv', index=False)