In [16]:
import pandas as pd

train=pd.read_csv('./data/preprocess/train_0926.csv')
test=pd.read_csv('./data/preprocess/test_0926.csv')

In [17]:
from sklearn.model_selection import train_test_split

train_set, test_set = train_test_split(train,
                                       test_size = 0.3,
                                       random_state = 2045)

train_set.shape, test_set.shape

((1022, 58), (438, 58))

In [18]:
from sklearn.metrics import mean_squared_error, mean_squared_log_error, r2_score
import numpy as np

# RMSLE 정의
def rmsle(origin, pred):
    return np.sqrt(mean_squared_log_error(origin, pred))

# Ridge Regression

## Validation

In [4]:
from sklearn.linear_model import Ridge

RG = Ridge(normalize = True, 
           alpha = 0.3,
           solver = 'cholesky')   

X=train_set.drop(columns=['SalePrice'])
y=train_set['SalePrice']

RG.fit(X, y)

Ridge(alpha=0.3, normalize=True, solver='cholesky')

In [5]:
y_hat=RG.predict(test_set.drop(columns=['SalePrice']))
rmsle(np.expm1(test_set['SalePrice']),np.expm1(y_hat))

0.1261448281058073

## Submission

In [None]:
X=train.drop(columns=['SalePrice'])
y=train['SalePrice']

RG.fit(X, y)

pred=RG.predict(test)

In [None]:
pred = np.expm1(pred)
sub=pd.read_csv('data/sample_submission.csv')
sub['SalePrice']=pred
sub.to_csv('./submission/ridge_1.csv',index=False)

# Lasso Regression

## Validation

In [10]:
from sklearn.linear_model import Lasso

LS = Lasso(normalize = True, 
           alpha = 0.2)

X=train_set.drop(columns=['SalePrice'])
y=train_set['SalePrice']

LS.fit(X, y)

Lasso(alpha=0.2, normalize=True)

In [11]:
y_hat=LS.predict(test_set.drop(columns=['SalePrice']))
rmsle(np.expm1(test_set['SalePrice']),np.expm1(y_hat))

0.38277372901293605

# ElasticNet Regression

## Validation

In [6]:
from sklearn.linear_model import ElasticNet

EN = ElasticNet(normalize = True, 
                alpha = 0.0001,
                l1_ratio = 0.7)

X=train_set.drop(columns=['SalePrice'])
y=train_set['SalePrice']

EN.fit(X, y)

ElasticNet(alpha=0.0001, l1_ratio=0.7, normalize=True)

In [7]:
y_hat=EN.predict(test_set.drop(columns=['SalePrice']))
rmsle(np.expm1(test_set['SalePrice']),np.expm1(y_hat))

0.12484478031715027

## Submission

In [8]:
X=train.drop(columns=['SalePrice'])
y=train['SalePrice']

EN.fit(X, y)

pred=EN.predict(test)

In [9]:
pred = np.expm1(pred)
sub=pd.read_csv('data/sample_submission.csv')
sub['SalePrice']=pred
sub.to_csv('./submission/EN_2.csv',index=False)

# Adaptive Boosting Regressor

In [19]:
from sklearn.ensemble import AdaBoostRegressor

ABR = AdaBoostRegressor(random_state = 0)

X=train_set.drop(columns=['SalePrice'])
y=train_set['SalePrice']

ABR.fit(X, y)

AdaBoostRegressor(random_state=0)

In [20]:
y_hat=ABR.predict(test_set.drop(columns=['SalePrice']))
rmsle(np.expm1(test_set['SalePrice']),np.expm1(y_hat))

0.16522581623969151

# Gradient Boosting Machine(GBM) Regressor

## Validation

In [21]:
from sklearn.ensemble import GradientBoostingRegressor

GBR = GradientBoostingRegressor(random_state=0)
X=train_set.drop(columns=['SalePrice'])
y=train_set['SalePrice']

GBR.fit(X, y)

GradientBoostingRegressor(random_state=0)

In [22]:
y_hat=GBR.predict(test_set.drop(columns=['SalePrice']))
rmsle(np.expm1(test_set['SalePrice']),np.expm1(y_hat))

0.12740232742201202

## Submission

In [None]:
X=train.drop(columns=['SalePrice'])
y=train['SalePrice']

GBR.fit(X, y)

pred=GBR.predict(test)

In [None]:
pred = np.expm1(pred)
sub=pd.read_csv('data/sample_submission.csv')
sub['SalePrice']=pred
sub.to_csv('./submission/GBR_1.csv',index=False)

# LightGBM Regressor

## validation

In [23]:
from lightgbm import LGBMRegressor

LGB = LGBMRegressor(random_state=0)

X=train_set.drop(columns=['SalePrice'])
y=train_set['SalePrice']

LGB.fit(X, y)

LGBMRegressor(random_state=0)

In [24]:
y_hat=LGB.predict(test_set.drop(columns=['SalePrice']))
rmsle(np.expm1(test_set['SalePrice']),np.expm1(y_hat))

0.13519916615901967

## Submission

In [None]:
X=train.drop(columns=['SalePrice'])
y=train['SalePrice']

LGB.fit(X, y)

pred=LGB.predict(test)

In [None]:
pred = np.expm1(pred)
sub=pd.read_csv('data/sample_submission.csv')
sub['SalePrice']=pred
sub.to_csv('./submission/LGB_1.csv',index=False)

#The End#