In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from sklearn.linear_model import LinearRegression, Lasso, LassoCV, Ridge, RidgeCV
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, VarianceThreshold, f_regression
from sklearn.linear_model import ElasticNet

pd.options.display.max_columns = 82
pd.options.display.max_rows = 1000

In [None]:
train = pd.read_csv('./Datasets/train_clean.csv')
train.head()

In [None]:
test = pd.read_csv('./Datasets/train_clean.csv')
test.head()

In [None]:
# Setting Features
neighborhoods = [col for col in train.columns if col.startswith('Neighborhood')]
X = train[[neighborhoods, 'asdf', 'asdf', 'asdf', 'adf', 'asdf']]
y = train['SalePrice_log']

In [None]:
# Train-test-split with a random state
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state = 42)

In [None]:
poly = PolynomialFeatures()
X_poly = poly.fit_transform(X)

In [None]:
pipe = Pipeline(
    [
            ('var_thresh', VarianceThreshold(0.05)),
            ('ss', StandardScaler()),
            ('kbest', SelectKBest(f_regression, k= 'all')),
            ('ridge', Ridge())
    ]
)
pipe_params = {
    'var_thresh__threshold': [0, 0.05, 0.1, 0.25],
    'ridge__alpha': np.logspace(0, 3, 100)
}
gs = GridSearchCV(pipe, pipe_params, cv = 5, verbose = 1)
gs.fit(X_train_poly, y_train);

In [None]:
# Submission
X_test = train[[neighborhoods, 'asdf', 'asdf', 'asdf', 'adf', 'asdf']]
X_poly_poly = poly.fit_transform(X)

test_data['SalePrice'] = np.exp(gs.predict(X_test_poly))
Submission = test_data[['Id','SalePrice']]

Submission.to_csv('./datasets/DeepestDeepDish_3.csv', index = False)