In [2]:
#import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler

#import data
homeprices = pd.read_csv('trainprices.csv')

In [3]:
y = homeprices['SalePrice']
homeprices.drop(['SalePrice'], axis=1, inplace=True)

In [24]:
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression, Ridge, HuberRegressor, Lasso, ElasticNet, BayesianRidge

In [26]:
cat_var = homeprices.select_dtypes(include=['object']).columns.tolist()
num_var = homeprices.select_dtypes(exclude=['object']).columns.tolist()

In [73]:
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scalar', StandardScaler())])
    
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy="constant", fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', sparse=False))])

preprocessor = ColumnTransformer(
    transformers = [
        ('num', numerical_transformer, num_var),
        ('cat', categorical_transformer, cat_var) ])

In [74]:
pipe_Linear = Pipeline(
    steps = [('preprocessor', preprocessor),
            ('Linear', LinearRegression())])

In [75]:
cleandata = preprocessor.fit_transform(homeprices)

## Lasso

In [43]:
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV

# Lasso with 5 fold cross-validation
lassomodel = LassoCV(cv=5, random_state=0, max_iter=10000)
lassomodel.fit(cleandata, y)
best_alpha = lassomodel.alpha_

# Fit model with best alpha
lasso_best = Lasso(alpha=best_alpha)
lasso_best.fit(cleandata, y)

Lasso(alpha=166.84490230495553)

In [47]:
homepricestest = pd.read_csv('testprices.csv')
test = pd.DataFrame(homepricestest)
cleantestdata = preprocessor.transform(test)

In [107]:
test_predictions = lasso_best.predict(cleantestdata)
test_predictions
predict_lasso = pd.DataFrame(test_predictions)
predict_lasso['id'] = test['Id']
predict_lasso.columns =['SalePrice', 'Id']
predict_lasso = predict_lasso[['Id', 'SalePrice']]

In [108]:
predict_lasso.to_csv('gaertnerlassoprediction.csv', index=False)

## Ridge

In [98]:
from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeCV

# Ridge with 5 fold cross-validation
ridgemodel = RidgeCV(cv=5)
ridgemodel.fit(cleandata, y)
best_ridge_alpha = ridgemodel.alpha_

# Fit model with best alpha
ridge_best = Ridge(alpha=best_ridge_alpha)
ridge_best.fit(cleandata, y)

Ridge(alpha=10.0)

In [99]:
ridge_predictions = ridge_best.predict(cleantestdata)
predict_ridge = pd.DataFrame(ridge_predictions)
predict_ridge['id'] = test['Id']
predict_ridge.columns =['SalePrice', 'Id']
predict_ridge = predict_ridge[['Id', 'SalePrice']]

In [101]:
predict_ridge.to_csv('gaertnerridgepredictionbest.csv', index=False)

## ElasticNet

In [104]:
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import ElasticNetCV

# ElasticNet with 5 fold cross-validation
enmodel = ElasticNetCV(cv=5)
enmodel.fit(cleandata, y)
best_en_alpha = enmodel.alpha_

# Fit model with best alpha
en_best = ElasticNet(alpha=best_en_alpha)
en_best.fit(cleandata, y)

ElasticNet(alpha=125.63206937374352)

In [105]:
en_predictions = en_best.predict(cleantestdata)
predict_en = pd.DataFrame(en_predictions)
predict_en['id'] = test['Id']
predict_en.columns =['SalePrice', 'Id']
predict_en = predict_en[['Id', 'SalePrice']]

In [106]:
predict_en.to_csv('gaertnerelasticnetpredictionbest.csv', index=False)