In [19]:
# Import all the relevant libraries 
import pandas as pd 
import numpy as no 
import seaborn as sns 
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.linear_model import LinearRegression, LassoCV, RidgeCV
from sklearn.preprocessing import PolynomialFeatures, PowerTransformer
from sklearn.metrics import r2_score

%matplotlib inline

In [4]:
def column_cleaner(data): 
    data.columns=map(str.lower, data.columns)
    data.columns = data.columns.str.strip().str.replace(" ","_")
    return data 


In [5]:
def fill_na(data): 
    data[['garage_qual', 'garage_cond']] = data[['garage_qual', 'garage_cond']].fillna(value = 0)
    data['mas_vnr_area'] = data[["mas_vnr_area"]].fillna(value = 0)
    return data 
    
    

In [6]:
def dataframe_dictionary_input(data): 
    street_cleaned = { "Grvl" : 0,"Pave" : 1}
    data["street"] = data[["street"]].replace(street_cleaned)


    na_to_ex_rating = {"Po" : 1, "Fa" : 2,"TA" : 3, "Gd" : 4, "Ex" : 5,}
    data['bsmt_qual'] = data[['bsmt_qual']].replace(na_to_ex_rating)
    data["bsmt_cond"] = data[['bsmt_cond']].replace(na_to_ex_rating)


    heating_qc_cleaned = {"Po" : 0, "Fa" : 1,"TA" : 2, "Gd" : 3, "Ex" : 4,}
    data['heating_qc'] = data[['heating_qc']].replace(heating_qc_cleaned)

    poor_to_ex_no_na = {"Po" : 0, "Fa" : 1,"TA" : 2, "Gd" : 3, "Ex" : 4,}
    data['kitchen_qual'] = data[['kitchen_qual']].replace(kitchen_qc_cleaned)

    data['exter_qual'] = data[['exter_qual']].replace(poor_to_ex_no_na)
    data['exter_cond'] = data[['exter_cond']].replace(poor_to_ex_no_na)
    return data


In [7]:
def feature_interactions(data): 
    data["garage_comb"] = data["garage_area"] * data["garage_cars"]
    data["deck_*_porch"] = data["wood_deck_sf"] * data["open_porch_sf"]
    data["exter_cond*exter_qual"] = data["exter_cond"] * data["exter_qual"]
    data["total_bsmt_sf*gr_liv_area"] = data["total_bsmt_sf"] * data["gr_liv_area"]
    data["bsmt_qual*bsmt_cond"] = data["bsmt_qual"] * data["bsmt_cond"]
    return data 
    
    

In [21]:
def fitter_and_instantiator(): 
    lr = LinearRegression()
    X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 42)
    lr.fit(X_train, y_train)
    return X_train, X_test, y_train, Y_test, lr.fit(X_train,y_train)

In [18]:
def model_evaluation(X_test): 
    pred = lr.predict(X_test)
    residuals = y-test-pred
    residuals.hist()

In [None]:
def dummy_cols_maker(data, list_of_columns):
    data = pd.get_dummies(data, columns = ['neighborhood'], drop_first=True)
    data = pd.get_dummies(data, columns = ['central_air'], drop_first=True)
    data = pd.get_dummies(data, columns = ['electrical'], drop_first = True)
    data = pd.get_dummies(data, columns = ['heating'], drop_first = True)
    data = pd.get_dummies(data, columns = ['functional'], drop_first = True)
    data = pd.get_dummies(data, columns = ['exterior_1st'], drop_first = True)
    data = pd.get_dummies(data, columns = ["garage_cond"], drop_first = True )
    data = pd.get_dummies(data, columns = ["garage_qual"], drop_first = True)
    
    return data

In [13]:
def prediction_helper(): 
    val_preds = pd.DataFrame(lr.predict(X_test), columns = ["val_preds"])
    val_preds['y_val'] = y_test.values
    val_preds['val_errors'] = val_preds['y_val']-val_preds['val_preds']
    print(val_preds.head())

In [23]:
def prediction_maker(): 
    test_preds = pd.DataFrame(lr.predict(test[features]))
    test['preds_1'] = test_preds

    submission_x = test.loc[:, ['id', 'preds']]
    submission_x.rename(columns = {'id': 'Id', 'preds_1': 'SalePrice'}, inplace=True)
    
    return test_preds, submission_x

In [22]:
def power_transformer(): 
    pt = PowerTransformer()
    pt.fit(X_train)
    X_train_pt_transformed = pt.transform(X_train)
    X_test_pt_transformed = pt.transform(X_test)
    
    
    pt_y = PowerTransformer()
    pt_y.fit(y_train.to_frame())
    y_train_pt_transformed = pt_y.transform(y_train.to_frame())
    y_test_pt_transformed = pt_y.transform(y_test.to_frame())
    
    return (pt, X_train_pt_transformed, X_test_pt_transformed,pt_y, y_train_pt_transformed, y_test_pt_transformed)
    