## 코드

# Import

In [3]:
import pandas as pd
import random
import os
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, r2_score

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(42) # Seed 고정

# Data Load

In [4]:
train_df = pd.read_csv('train.csv')

train_x = train_df.filter(regex='X') # Input : X Featrue
train_y = train_df.filter(regex='Y') # Output : Y Feature


# Regression & Inference

## Y1

In [5]:
selected_x = train_df[['X_48', 'X_22', 'X_19', 'X_18', 'X_52', 'X_17', 'X_45', 'X_13', 'X_49', 'X_51', 'X_40', 'X_46', 'X_04', 'X_23', 'X_47', 'X_03', 'X_06', 'X_20', 'X_05', 'X_27', 'X_24', 'X_07', 'X_21', 'X_15', 'X_43', 'X_42', 'X_33', 'X_41', 'X_56', 'X_30', 'X_31', 'X_25', 'X_39']] # selected input : X feature
target_y = train_df['Y_01']
LR = LinearRegression().fit(selected_x, target_y)

In [6]:
test_x = pd.read_csv('./test.csv')[['X_48', 'X_22', 'X_19', 'X_18', 'X_52', 'X_17', 'X_45', 'X_13', 'X_49', 'X_51', 'X_40', 'X_46', 'X_04', 'X_23', 'X_47', 'X_03', 'X_06', 'X_20', 'X_05', 'X_27', 'X_24', 'X_07', 'X_21', 'X_15', 'X_43', 'X_42', 'X_33', 'X_41', 'X_56', 'X_30', 'X_31', 'X_25', 'X_39']]
arr = LR.predict(test_x) 
preds = []
preds.append(arr)
preds

[array([1.43825721, 1.49550256, 1.39457171, ..., 1.33692933, 1.33677015,
        1.38760283])]

## Y2

In [7]:
selected_x = train_df[['X_48', 'X_22', 'X_18', 'X_43', 'X_17', 'X_51', 'X_42', 'X_49', 'X_40', 'X_05', 'X_52', 'X_06', 'X_45', 'X_27', 'X_24', 'X_25', 'X_56', 'X_09', 'X_19', 'X_20', 'X_46', 'X_04', 'X_23', 'X_47', 'X_16', 'X_15', 'X_41', 'X_28', 'X_39']]
target_y = train_df['Y_02']
LR = LinearRegression().fit(selected_x, target_y)

In [8]:
test_x = pd.read_csv('./test.csv')[['X_48', 'X_22', 'X_18', 'X_43', 'X_17', 'X_51', 'X_42', 'X_49', 'X_40', 'X_05', 'X_52', 'X_06', 'X_45', 'X_27', 'X_24', 'X_25', 'X_56', 'X_09', 'X_19', 'X_20', 'X_46', 'X_04', 'X_23', 'X_47', 'X_16', 'X_15', 'X_41', 'X_28', 'X_39']]
arr = LR.predict(test_x)
preds.append(arr)
preds

[array([1.43825721, 1.49550256, 1.39457171, ..., 1.33692933, 1.33677015,
        1.38760283]),
 array([1.18404935, 1.20696829, 1.07569915, ..., 0.99489271, 0.99088465,
        1.05550575])]

## Y3

In [9]:
selected_x = train_df[['X_48', 'X_18', 'X_17', 'X_43', 'X_22', 'X_19', 'X_49', 'X_51', 'X_05', 'X_13', 'X_42', 'X_24', 'X_25', 'X_52', 'X_06', 'X_40', 'X_46', 'X_45', 'X_56', 'X_04', 'X_23', 'X_47', 'X_32', 'X_03', 'X_27', 'X_33', 'X_01', 'X_20', 'X_21', 'X_12', 'X_10', 'X_39', 'X_28', 'X_16', 'X_41']] # selected input : X feature
target_y = train_df['Y_03']
LR = LinearRegression().fit(selected_x, target_y)

In [10]:
test_x = pd.read_csv('./test.csv')[['X_48', 'X_18', 'X_17', 'X_43', 'X_22', 'X_19', 'X_49', 'X_51', 'X_05', 'X_13', 'X_42', 'X_24', 'X_25', 'X_52', 'X_06', 'X_40', 'X_46', 'X_45', 'X_56', 'X_04', 'X_23', 'X_47', 'X_32', 'X_03', 'X_27', 'X_33', 'X_01', 'X_20', 'X_21', 'X_12', 'X_10', 'X_39', 'X_28', 'X_16', 'X_41']]
arr = LR.predict(test_x)
preds.append(arr)
preds

[array([1.43825721, 1.49550256, 1.39457171, ..., 1.33692933, 1.33677015,
        1.38760283]),
 array([1.18404935, 1.20696829, 1.07569915, ..., 0.99489271, 0.99088465,
        1.05550575]),
 array([1.14295867, 1.13984526, 1.02780117, ..., 1.00547766, 1.01756548,
        1.05106684])]

## Y4

In [11]:
selected_x = train_df[['X_48', 'X_04', 'X_23', 'X_47', 'X_30', 'X_21', 'X_32', 'X_16', 'X_17', 'X_12', 'X_19', 'X_03', 'X_24', 'X_54', 'X_50', 'X_52', 'X_56', 'X_22', 'X_09', 'X_25', 'X_06', 'X_40', 'X_44', 'X_42', 'X_51', 'X_55', 'X_18', 'X_20', 'X_08', 'X_26', 'X_10', 'X_53', 'X_37', 'X_35', 'X_15']] # selected input : X feature
target_y = train_df['Y_04']
LR = LinearRegression().fit(selected_x, target_y)

In [12]:
test_x = pd.read_csv('./test.csv')[['X_48', 'X_04', 'X_23', 'X_47', 'X_30', 'X_21', 'X_32', 'X_16', 'X_17', 'X_12', 'X_19', 'X_03', 'X_24', 'X_54', 'X_50', 'X_52', 'X_56', 'X_22', 'X_09', 'X_25', 'X_06', 'X_40', 'X_44', 'X_42', 'X_51', 'X_55', 'X_18', 'X_20', 'X_08', 'X_26', 'X_10', 'X_53', 'X_37', 'X_35', 'X_15']]
arr = LR.predict(test_x)
preds.append(arr)
preds

[array([1.43825721, 1.49550256, 1.39457171, ..., 1.33692933, 1.33677015,
        1.38760283]),
 array([1.18404935, 1.20696829, 1.07569915, ..., 0.99489271, 0.99088465,
        1.05550575]),
 array([1.14295867, 1.13984526, 1.02780117, ..., 1.00547766, 1.01756548,
        1.05106684]),
 array([12.91491699, 13.30749512, 14.66195679, ..., 13.98266602,
        13.34503174, 13.30877686])]

## Y5

In [13]:
selected_x = train_df[['X_48', 'X_56', 'X_32', 'X_12', 'X_15', 'X_18', 'X_17', 'X_04', 'X_23', 'X_47', 'X_33', 'X_29', 'X_09', 'X_40', 'X_54', 'X_50', 'X_44', 'X_49', 'X_51', 'X_55', 'X_03', 'X_30', 'X_19', 'X_28', 'X_53', 'X_13', 'X_35', 'X_01', 'X_06', 'X_52', 'X_37', 'X_31', 'X_45', 'X_26', 'X_10']] # selected input : X feature
target_y = train_df['Y_05']
LR = LinearRegression().fit(selected_x, target_y)

In [14]:
test_x = pd.read_csv('./test.csv')[['X_48', 'X_56', 'X_32', 'X_12', 'X_15', 'X_18', 'X_17', 'X_04', 'X_23', 'X_47', 'X_33', 'X_29', 'X_09', 'X_40', 'X_54', 'X_50', 'X_44', 'X_49', 'X_51', 'X_55', 'X_03', 'X_30', 'X_19', 'X_28', 'X_53', 'X_13', 'X_35', 'X_01', 'X_06', 'X_52', 'X_37', 'X_31', 'X_45', 'X_26', 'X_10']]
arr = LR.predict(test_x)
preds.append(arr)
preds

[array([1.43825721, 1.49550256, 1.39457171, ..., 1.33692933, 1.33677015,
        1.38760283]),
 array([1.18404935, 1.20696829, 1.07569915, ..., 0.99489271, 0.99088465,
        1.05550575]),
 array([1.14295867, 1.13984526, 1.02780117, ..., 1.00547766, 1.01756548,
        1.05106684]),
 array([12.91491699, 13.30749512, 14.66195679, ..., 13.98266602,
        13.34503174, 13.30877686]),
 array([30.99674723, 30.93767465, 31.84795645, ..., 31.90803251,
        31.48614506, 31.2803425 ])]

## Y6

In [15]:
selected_x = train_df[['X_48', 'X_19', 'X_30', 'X_56', 'X_05', 'X_06', 'X_26', 'X_49', 'X_17', 'X_18', 'X_16', 'X_44', 'X_13', 'X_43', 'X_51', 'X_55', 'X_50', 'X_09', 'X_04', 'X_23', 'X_47', 'X_03', 'X_22']] # selected input : X feature
target_y = train_df['Y_06']
LR = LinearRegression().fit(selected_x, target_y)

In [16]:
test_x = pd.read_csv('./test.csv')[['X_48', 'X_19', 'X_30', 'X_56', 'X_05', 'X_06', 'X_26', 'X_49', 'X_17', 'X_18', 'X_16', 'X_44', 'X_13', 'X_43', 'X_51', 'X_55', 'X_50', 'X_09', 'X_04', 'X_23', 'X_47', 'X_03', 'X_22']]
arr = LR.predict(test_x)
preds.append(arr)
preds

[array([1.43825721, 1.49550256, 1.39457171, ..., 1.33692933, 1.33677015,
        1.38760283]),
 array([1.18404935, 1.20696829, 1.07569915, ..., 0.99489271, 0.99088465,
        1.05550575]),
 array([1.14295867, 1.13984526, 1.02780117, ..., 1.00547766, 1.01756548,
        1.05106684]),
 array([12.91491699, 13.30749512, 14.66195679, ..., 13.98266602,
        13.34503174, 13.30877686]),
 array([30.99674723, 30.93767465, 31.84795645, ..., 31.90803251,
        31.48614506, 31.2803425 ]),
 array([16.24667417, 16.53302224, 16.24556479, ..., 17.04870175,
        16.9939059 , 16.84667803])]

## Y7

In [17]:
selected_x = train_df[['X_48', 'X_04', 'X_23', 'X_47', 'X_19', 'X_32', 'X_21', 'X_15', 'X_18', 'X_03', 'X_17', 'X_50', 'X_54', 'X_41', 'X_24', 'X_12', 'X_14', 'X_38', 'X_31', 'X_09', 'X_56', 'X_52', 'X_06', 'X_05', 'X_22', 'X_33', 'X_29', 'X_55']] # selected input : X feature
target_y = train_df['Y_07']
LR = LinearRegression().fit(selected_x, target_y)

In [18]:
test_x = pd.read_csv('./test.csv')[['X_48', 'X_04', 'X_23', 'X_47', 'X_19', 'X_32', 'X_21', 'X_15', 'X_18', 'X_03', 'X_17', 'X_50', 'X_54', 'X_41', 'X_24', 'X_12', 'X_14', 'X_38', 'X_31', 'X_09', 'X_56', 'X_52', 'X_06', 'X_05', 'X_22', 'X_33', 'X_29', 'X_55']]
arr = LR.predict(test_x)
preds.append(arr)
preds

[array([1.43825721, 1.49550256, 1.39457171, ..., 1.33692933, 1.33677015,
        1.38760283]),
 array([1.18404935, 1.20696829, 1.07569915, ..., 0.99489271, 0.99088465,
        1.05550575]),
 array([1.14295867, 1.13984526, 1.02780117, ..., 1.00547766, 1.01756548,
        1.05106684]),
 array([12.91491699, 13.30749512, 14.66195679, ..., 13.98266602,
        13.34503174, 13.30877686]),
 array([30.99674723, 30.93767465, 31.84795645, ..., 31.90803251,
        31.48614506, 31.2803425 ]),
 array([16.24667417, 16.53302224, 16.24556479, ..., 17.04870175,
        16.9939059 , 16.84667803]),
 array([3.2002669 , 3.15272131, 3.03455064, ..., 3.1268497 , 3.20310534,
        3.17225018])]

## Y8

In [19]:
selected_x = train_df[['X_48', 'X_20', 'X_32', 'X_30', 'X_03', 'X_42', 'X_22', 'X_52', 'X_40', 'X_17', 'X_18', 'X_15', 'X_10', 'X_51', 'X_55', 'X_04', 'X_23', 'X_47', 'X_09', 'X_27', 'X_56', 'X_41', 'X_14', 'X_54', 'X_53', 'X_35', 'X_44']] # selected input : X feature
target_y = train_df['Y_08']
LR = LinearRegression().fit(selected_x, target_y)

In [20]:
test_x = pd.read_csv('./test.csv')[['X_48', 'X_20', 'X_32', 'X_30', 'X_03', 'X_42', 'X_22', 'X_52', 'X_40', 'X_17', 'X_18', 'X_15', 'X_10', 'X_51', 'X_55', 'X_04', 'X_23', 'X_47', 'X_09', 'X_27', 'X_56', 'X_41', 'X_14', 'X_54', 'X_53', 'X_35', 'X_44']]
arr = LR.predict(test_x)
preds.append(arr)
preds

[array([1.43825721, 1.49550256, 1.39457171, ..., 1.33692933, 1.33677015,
        1.38760283]),
 array([1.18404935, 1.20696829, 1.07569915, ..., 0.99489271, 0.99088465,
        1.05550575]),
 array([1.14295867, 1.13984526, 1.02780117, ..., 1.00547766, 1.01756548,
        1.05106684]),
 array([12.91491699, 13.30749512, 14.66195679, ..., 13.98266602,
        13.34503174, 13.30877686]),
 array([30.99674723, 30.93767465, 31.84795645, ..., 31.90803251,
        31.48614506, 31.2803425 ]),
 array([16.24667417, 16.53302224, 16.24556479, ..., 17.04870175,
        16.9939059 , 16.84667803]),
 array([3.2002669 , 3.15272131, 3.03455064, ..., 3.1268497 , 3.20310534,
        3.17225018]),
 array([-26.16796276, -26.10731453, -26.06937105, ..., -26.51638648,
        -26.52334178, -26.48255159])]

## Y9

In [21]:
selected_x = train_df[['X_48', 'X_20', 'X_30', 'X_42', 'X_03', 'X_32', 'X_22', 'X_52', 'X_40', 'X_17', 'X_18', 'X_15', 'X_04', 'X_23', 'X_47', 'X_51', 'X_55', 'X_09', 'X_12', 'X_41', 'X_10', 'X_14', 'X_56', 'X_54', 'X_53', 'X_35']] # selected input : X feature
target_y = train_df['Y_09']
LR = LinearRegression().fit(selected_x, target_y)

In [22]:
test_x = pd.read_csv('./test.csv')[['X_48', 'X_20', 'X_30', 'X_42', 'X_03', 'X_32', 'X_22', 'X_52', 'X_40', 'X_17', 'X_18', 'X_15', 'X_04', 'X_23', 'X_47', 'X_51', 'X_55', 'X_09', 'X_12', 'X_41', 'X_10', 'X_14', 'X_56', 'X_54', 'X_53', 'X_35']]
arr = LR.predict(test_x)
preds.append(arr)
preds

[array([1.43825721, 1.49550256, 1.39457171, ..., 1.33692933, 1.33677015,
        1.38760283]),
 array([1.18404935, 1.20696829, 1.07569915, ..., 0.99489271, 0.99088465,
        1.05550575]),
 array([1.14295867, 1.13984526, 1.02780117, ..., 1.00547766, 1.01756548,
        1.05106684]),
 array([12.91491699, 13.30749512, 14.66195679, ..., 13.98266602,
        13.34503174, 13.30877686]),
 array([30.99674723, 30.93767465, 31.84795645, ..., 31.90803251,
        31.48614506, 31.2803425 ]),
 array([16.24667417, 16.53302224, 16.24556479, ..., 17.04870175,
        16.9939059 , 16.84667803]),
 array([3.2002669 , 3.15272131, 3.03455064, ..., 3.1268497 , 3.20310534,
        3.17225018]),
 array([-26.16796276, -26.10731453, -26.06937105, ..., -26.51638648,
        -26.52334178, -26.48255159]),
 array([-26.16547444, -26.11496667, -26.09683401, ..., -26.52674927,
        -26.51939356, -26.48810239])]

## Y10

In [23]:
selected_x = train_df[['X_48', 'X_04', 'X_23', 'X_47', 'X_18', 'X_15', 'X_22', 'X_42', 'X_30', 'X_56', 'X_49', 'X_03', 'X_09', 'X_51', 'X_16', 'X_27', 'X_24', 'X_41', 'X_32', 'X_45', 'X_17', 'X_21', 'X_01', 'X_40', 'X_55', 'X_52', 'X_54', 'X_53', 'X_29', 'X_10', 'X_08', 'X_44', 'X_31', 'X_50', 'X_07']] # selected input : X feature
target_y = train_df['Y_10']
LR = LinearRegression().fit(selected_x, target_y)

In [24]:
test_x = pd.read_csv('./test.csv')[['X_48', 'X_04', 'X_23', 'X_47', 'X_18', 'X_15', 'X_22', 'X_42', 'X_30', 'X_56', 'X_49', 'X_03', 'X_09', 'X_51', 'X_16', 'X_27', 'X_24', 'X_41', 'X_32', 'X_45', 'X_17', 'X_21', 'X_01', 'X_40', 'X_55', 'X_52', 'X_54', 'X_53', 'X_29', 'X_10', 'X_08', 'X_44', 'X_31', 'X_50', 'X_07']]
arr = LR.predict(test_x)
preds.append(arr)
preds

[array([1.43825721, 1.49550256, 1.39457171, ..., 1.33692933, 1.33677015,
        1.38760283]),
 array([1.18404935, 1.20696829, 1.07569915, ..., 0.99489271, 0.99088465,
        1.05550575]),
 array([1.14295867, 1.13984526, 1.02780117, ..., 1.00547766, 1.01756548,
        1.05106684]),
 array([12.91491699, 13.30749512, 14.66195679, ..., 13.98266602,
        13.34503174, 13.30877686]),
 array([30.99674723, 30.93767465, 31.84795645, ..., 31.90803251,
        31.48614506, 31.2803425 ]),
 array([16.24667417, 16.53302224, 16.24556479, ..., 17.04870175,
        16.9939059 , 16.84667803]),
 array([3.2002669 , 3.15272131, 3.03455064, ..., 3.1268497 , 3.20310534,
        3.17225018]),
 array([-26.16796276, -26.10731453, -26.06937105, ..., -26.51638648,
        -26.52334178, -26.48255159]),
 array([-26.16547444, -26.11496667, -26.09683401, ..., -26.52674927,
        -26.51939356, -26.48810239]),
 array([-22.15837973, -22.23240709, -22.29680389, ..., -22.46786237,
        -22.55224234, -22.57647085

## Y11

In [25]:
selected_x = train_df[['X_48', 'X_17', 'X_18', 'X_15', 'X_30', 'X_51', 'X_42', 'X_03', 'X_01', 'X_56', 'X_05', 'X_54', 'X_31', 'X_21', 'X_52', 'X_55', 'X_10', 'X_50', 'X_04', 'X_23', 'X_47', 'X_32', 'X_09', 'X_33', 'X_35']] # selected input : X feature
target_y = train_df['Y_11']
LR = LinearRegression().fit(selected_x, target_y)

In [26]:
test_x = pd.read_csv('./test.csv')[['X_48', 'X_17', 'X_18', 'X_15', 'X_30', 'X_51', 'X_42', 'X_03', 'X_01', 'X_56', 'X_05', 'X_54', 'X_31', 'X_21', 'X_52', 'X_55', 'X_10', 'X_50', 'X_04', 'X_23', 'X_47', 'X_32', 'X_09', 'X_33', 'X_35']]
arr = LR.predict(test_x)
preds.append(arr)
preds

[array([1.43825721, 1.49550256, 1.39457171, ..., 1.33692933, 1.33677015,
        1.38760283]),
 array([1.18404935, 1.20696829, 1.07569915, ..., 0.99489271, 0.99088465,
        1.05550575]),
 array([1.14295867, 1.13984526, 1.02780117, ..., 1.00547766, 1.01756548,
        1.05106684]),
 array([12.91491699, 13.30749512, 14.66195679, ..., 13.98266602,
        13.34503174, 13.30877686]),
 array([30.99674723, 30.93767465, 31.84795645, ..., 31.90803251,
        31.48614506, 31.2803425 ]),
 array([16.24667417, 16.53302224, 16.24556479, ..., 17.04870175,
        16.9939059 , 16.84667803]),
 array([3.2002669 , 3.15272131, 3.03455064, ..., 3.1268497 , 3.20310534,
        3.17225018]),
 array([-26.16796276, -26.10731453, -26.06937105, ..., -26.51638648,
        -26.52334178, -26.48255159]),
 array([-26.16547444, -26.11496667, -26.09683401, ..., -26.52674927,
        -26.51939356, -26.48810239]),
 array([-22.15837973, -22.23240709, -22.29680389, ..., -22.46786237,
        -22.55224234, -22.57647085

## Y12

In [27]:
selected_x = train_df[['X_48', 'X_20', 'X_30', 'X_42', 'X_03', 'X_32', 'X_22', 'X_56', 'X_40', 'X_18', 'X_15', 'X_51', 'X_04', 'X_23', 'X_47', 'X_09', 'X_13', 'X_10', 'X_41', 'X_55', 'X_52', 'X_12', 'X_54', 'X_53', 'X_35']] # selected input : X feature
target_y = train_df['Y_12']
LR = LinearRegression().fit(selected_x, target_y)

In [28]:
test_x = pd.read_csv('./test.csv')[['X_48', 'X_20', 'X_30', 'X_42', 'X_03', 'X_32', 'X_22', 'X_56', 'X_40', 'X_18', 'X_15', 'X_51', 'X_04', 'X_23', 'X_47', 'X_09', 'X_13', 'X_10', 'X_41', 'X_55', 'X_52', 'X_12', 'X_54', 'X_53', 'X_35']]
arr = LR.predict(test_x)
preds.append(arr)
preds

[array([1.43825721, 1.49550256, 1.39457171, ..., 1.33692933, 1.33677015,
        1.38760283]),
 array([1.18404935, 1.20696829, 1.07569915, ..., 0.99489271, 0.99088465,
        1.05550575]),
 array([1.14295867, 1.13984526, 1.02780117, ..., 1.00547766, 1.01756548,
        1.05106684]),
 array([12.91491699, 13.30749512, 14.66195679, ..., 13.98266602,
        13.34503174, 13.30877686]),
 array([30.99674723, 30.93767465, 31.84795645, ..., 31.90803251,
        31.48614506, 31.2803425 ]),
 array([16.24667417, 16.53302224, 16.24556479, ..., 17.04870175,
        16.9939059 , 16.84667803]),
 array([3.2002669 , 3.15272131, 3.03455064, ..., 3.1268497 , 3.20310534,
        3.17225018]),
 array([-26.16796276, -26.10731453, -26.06937105, ..., -26.51638648,
        -26.52334178, -26.48255159]),
 array([-26.16547444, -26.11496667, -26.09683401, ..., -26.52674927,
        -26.51939356, -26.48810239]),
 array([-22.15837973, -22.23240709, -22.29680389, ..., -22.46786237,
        -22.55224234, -22.57647085

## Y13

In [29]:
selected_x = train_df[['X_48', 'X_20', 'X_30', 'X_42', 'X_03', 'X_32', 'X_22', 'X_52', 'X_40', 'X_17', 'X_18', 'X_15', 'X_51', 'X_04', 'X_23', 'X_47', 'X_09', 'X_55', 'X_10', 'X_56', 'X_41', 'X_14', 'X_12', 'X_54', 'X_53', 'X_35']] # selected input : X feature
target_y = train_df['Y_13']
LR = LinearRegression().fit(selected_x, target_y)

In [30]:
test_x = pd.read_csv('./test.csv')[['X_48', 'X_20', 'X_30', 'X_42', 'X_03', 'X_32', 'X_22', 'X_52', 'X_40', 'X_17', 'X_18', 'X_15', 'X_51', 'X_04', 'X_23', 'X_47', 'X_09', 'X_55', 'X_10', 'X_56', 'X_41', 'X_14', 'X_12', 'X_54', 'X_53', 'X_35']]
arr = LR.predict(test_x)
preds.append(arr)
preds

[array([1.43825721, 1.49550256, 1.39457171, ..., 1.33692933, 1.33677015,
        1.38760283]),
 array([1.18404935, 1.20696829, 1.07569915, ..., 0.99489271, 0.99088465,
        1.05550575]),
 array([1.14295867, 1.13984526, 1.02780117, ..., 1.00547766, 1.01756548,
        1.05106684]),
 array([12.91491699, 13.30749512, 14.66195679, ..., 13.98266602,
        13.34503174, 13.30877686]),
 array([30.99674723, 30.93767465, 31.84795645, ..., 31.90803251,
        31.48614506, 31.2803425 ]),
 array([16.24667417, 16.53302224, 16.24556479, ..., 17.04870175,
        16.9939059 , 16.84667803]),
 array([3.2002669 , 3.15272131, 3.03455064, ..., 3.1268497 , 3.20310534,
        3.17225018]),
 array([-26.16796276, -26.10731453, -26.06937105, ..., -26.51638648,
        -26.52334178, -26.48255159]),
 array([-26.16547444, -26.11496667, -26.09683401, ..., -26.52674927,
        -26.51939356, -26.48810239]),
 array([-22.15837973, -22.23240709, -22.29680389, ..., -22.46786237,
        -22.55224234, -22.57647085

## Y14

In [31]:
selected_x = train_df[['X_48', 'X_20', 'X_32', 'X_30', 'X_03', 'X_42', 'X_22', 'X_52', 'X_40', 'X_18', 'X_15', 'X_04', 'X_23', 'X_47', 'X_51', 'X_55', 'X_09', 'X_27', 'X_10', 'X_56', 'X_54', 'X_41', 'X_13', 'X_53', 'X_12', 'X_35']] # selected input : X feature
target_y = train_df['Y_14']
LR = LinearRegression().fit(selected_x, target_y)

In [32]:
test_x = pd.read_csv('./test.csv')[['X_48', 'X_20', 'X_32', 'X_30', 'X_03', 'X_42', 'X_22', 'X_52', 'X_40', 'X_18', 'X_15', 'X_04', 'X_23', 'X_47', 'X_51', 'X_55', 'X_09', 'X_27', 'X_10', 'X_56', 'X_54', 'X_41', 'X_13', 'X_53', 'X_12', 'X_35']]
arr = LR.predict(test_x)
preds.append(arr)
preds

[array([1.43825721, 1.49550256, 1.39457171, ..., 1.33692933, 1.33677015,
        1.38760283]),
 array([1.18404935, 1.20696829, 1.07569915, ..., 0.99489271, 0.99088465,
        1.05550575]),
 array([1.14295867, 1.13984526, 1.02780117, ..., 1.00547766, 1.01756548,
        1.05106684]),
 array([12.91491699, 13.30749512, 14.66195679, ..., 13.98266602,
        13.34503174, 13.30877686]),
 array([30.99674723, 30.93767465, 31.84795645, ..., 31.90803251,
        31.48614506, 31.2803425 ]),
 array([16.24667417, 16.53302224, 16.24556479, ..., 17.04870175,
        16.9939059 , 16.84667803]),
 array([3.2002669 , 3.15272131, 3.03455064, ..., 3.1268497 , 3.20310534,
        3.17225018]),
 array([-26.16796276, -26.10731453, -26.06937105, ..., -26.51638648,
        -26.52334178, -26.48255159]),
 array([-26.16547444, -26.11496667, -26.09683401, ..., -26.52674927,
        -26.51939356, -26.48810239]),
 array([-22.15837973, -22.23240709, -22.29680389, ..., -22.46786237,
        -22.55224234, -22.57647085

# Evaluation

# Submit

In [None]:
submit = pd.read_csv('./sample_submission.csv')

In [None]:
for idx, col in enumerate(submit.columns):
    if col=='ID':
        continue
    submit[col] = preds[:][idx-1]
print('Done.')

In [None]:
submit.to_csv('./submit_LR_selected.csv', index=False)