## 코드

# Import

In [14]:
import pandas as pd
import random
import os
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(42) # Seed 고정

# Data Load

In [10]:
train_df = pd.read_csv('train.csv')

train_x = train_df.filter(regex='X') # Input : X Featrue
train_y = train_df.filter(regex='Y') # Output : Y Feature

# Regression & Inference

In [11]:
LR = Lasso(alpha = 0.1)
LR.fit(train_x, train_y)

Lasso(alpha=0.1)

In [12]:
test_x = pd.read_csv('./test.csv').drop(columns=['ID'])
preds = LR.predict(test_x)
preds

array([[  1.35536118,   1.0688313 ,   1.01330494, ..., -26.15388049,
        -26.1488201 , -26.1638172 ],
       [  1.35451017,   1.06002917,   1.01259143, ..., -26.23667233,
        -26.23318878, -26.24544109],
       [  1.34393652,   1.05565546,   1.01116418, ..., -26.26220175,
        -26.25918617, -26.27459888],
       ...,
       [  1.43240923,   1.11652035,   1.09656683, ..., -26.26091248,
        -26.25694684, -26.27178366],
       [  1.42848328,   1.11478641,   1.09557787, ..., -26.27981383,
        -26.27654266, -26.29080643],
       [  1.42908491,   1.12006571,   1.09438557, ..., -26.26066103,
        -26.25774668, -26.26976724]])

# Evaluation

In [17]:
X_train, X_test, y_train, y_test = train_test_split(train_x, train_y, train_size=0.8, test_size=0.2, random_state=42)

y_pred = LR.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

r2 = r2_score(y_test,y_pred)
lasso = Lasso()
print(cross_val_score(lasso, train_x, train_y, cv=3))
print(f'Test MSE: ${mse:,.0f}')
print(f'R2 Score: {r2:,.4f}\n')

[-0.0464871  -0.01068483 -0.01472037]
Test MSE: $1
R2 Score: 0.0084



# Submit

In [6]:
submit = pd.read_csv('./sample_submission.csv')

In [7]:
for idx, col in enumerate(submit.columns):
    if col=='ID':
        continue
    submit[col] = preds[:,idx-1]
print('Done.')

Done.


In [8]:
submit.to_csv('./submit_Lasso.csv', index=False)