In [1]:
import pandas as pd

train=pd.read_csv('./data/preprocess/train_0918.csv')
test=pd.read_csv('./data/preprocess/test_0918.csv')

In [2]:
from sklearn.model_selection import train_test_split

train_set, test_set = train_test_split(train,
                                       test_size = 0.3,
                                       random_state = 2045)

train_set.shape, test_set.shape

((1022, 58), (438, 58))

In [3]:
from sklearn.metrics import mean_squared_error, mean_squared_log_error, r2_score
import numpy as np

# RMSLE 정의
def rmsle(origin, pred):
    return np.sqrt(mean_squared_log_error(origin, pred))

# Ridge Regression

## Validation

In [None]:
from sklearn.linear_model import Ridge

RG = Ridge(normalize = True, 
           alpha = 0.3,
           solver = 'cholesky')   

X=train_set.drop(columns=['SalePrice'])
y=train_set['SalePrice']

RG.fit(X, y)

In [None]:
y_hat=RG.predict(test_set.drop(columns=['SalePrice']))
rmsle(np.expm1(test_set['SalePrice']),np.expm1(y_hat))

## Submission

In [None]:
X=train.drop(columns=['SalePrice'])
y=train['SalePrice']

RG.fit(X, y)

pred=RG.predict(test)

In [None]:
pred = np.expm1(pred)
sub=pd.read_csv('data/sample_submission.csv')
sub['SalePrice']=pred
sub.to_csv('./submission/ridge_1.csv',index=False)

# Lasso Regression

## Validation

In [None]:
from sklearn.linear_model import Lasso

LS = Lasso(normalize = True, 
           alpha = 0.2)

X=train_set.drop(columns=['SalePrice'])
y=train_set['SalePrice']

LS.fit(X, y)

In [None]:
y_hat=LS.predict(test_set.drop(columns=['SalePrice']))
rmsle(np.expm1(test_set['SalePrice']),np.expm1(y_hat))

# ElasticNet Regression

## Validation

In [4]:
from sklearn.linear_model import ElasticNet

EN = ElasticNet(normalize = True, 
                alpha = 0.0001,
                l1_ratio = 0.7)

X=train_set.drop(columns=['SalePrice'])
y=train_set['SalePrice']

EN.fit(X, y)

ElasticNet(alpha=0.0001, l1_ratio=0.7, normalize=True)

In [5]:
y_hat=EN.predict(test_set.drop(columns=['SalePrice']))
rmsle(np.expm1(test_set['SalePrice']),np.expm1(y_hat))

0.1251141288280315

## Submission

In [6]:
X=train.drop(columns=['SalePrice'])
y=train['SalePrice']

EN.fit(X, y)

pred=EN.predict(test)

In [7]:
pred = np.expm1(pred)
sub=pd.read_csv('data/sample_submission.csv')
sub['SalePrice']=pred
sub.to_csv('./submission/EN_1.csv',index=False)

# Adaptive Boosting Regressor

In [20]:
from sklearn.ensemble import AdaBoostRegressor

ABR = AdaBoostRegressor(loss = 'square',
                        n_estimators = 500,
                        learning_rate = 0.0001,
                        random_state = 2045)

X=train_set.drop(columns=['SalePrice'])
y=train_set['SalePrice']

ABR.fit(X, y)

AdaBoostRegressor(learning_rate=0.0001, loss='square', n_estimators=500,
                  random_state=2045)

In [21]:
y_hat=ABR.predict(test_set.drop(columns=['SalePrice']))
rmsle(np.expm1(test_set['SalePrice']),np.expm1(y_hat))

0.1898079637273237

#The End#