In [10]:
import seaborn as sns
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

from matplotlib.colors import rgb2hex
import matplotlib.cm as cm
import matplotlib.colors 

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
import sklearn.metrics as metrics

sns.set(style="darkgrid")
cmap2 = cm.get_cmap('twilight',13)
colors1= []
for i in range(cmap2.N):
    rgb= cmap2(i)[:4]
    colors1.append(rgb2hex(rgb))
    #print(rgb2hex(rgb))

### Load the Data

In [2]:
ht_df=pd.read_csv("https://raw.githubusercontent.com/thunderstroke325/60-Days-of-Data-Science-and-ML/main/datasets/data36_train.csv")
htest = pd.read_csv('https://raw.githubusercontent.com/thunderstroke325/60-Days-of-Data-Science-and-ML/main/datasets/data36_test.csv')

Declare Features (X and Y)

In [3]:
X = ht_df[["TotalBsmtSF","1stFlrSF","FullBath","TotRmsAbvGrd","YearBuilt","YearRemodAdd","OverallQual","GrLivArea","GarageCars","GarageArea"]]
y = ht_df['SalePrice']

### Split into train and test data

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

### Rescale the data, Cross Validation and Evaluation Metrics

In [11]:
pipeline = Pipeline([
    ('std_scalar', StandardScaler())
])
X_train = pipeline.fit_transform(X_train)
X_test = pipeline.transform(X_test)

def cross_val(model):
    pred = cross_val_score(model, X, y, cv=5)
    return pred.mean()

def print_evaluate(t, p):  
    mae = metrics.mean_absolute_error(t, p)
    mse = metrics.mean_squared_error(t, p)
    rmse = np.sqrt(metrics.mean_squared_error(t, p))
    r2_square = metrics.r2_score(t, p)
    print('Mean Squared Error:', mse)
    print('Root Mean Squared Error:', rmse)
    print('Mean Absoulte Error:', mae)
    print('R2 Square:', r2_square)
    print('-'* 8)

### Linear Regression

In [12]:
from sklearn.linear_model import LinearRegression
linear_reg = LinearRegression(normalize=True)
linear_reg.fit(X_train,y_train)
test_pred = linear_reg.predict(X_test)
train_pred = linear_reg.predict(X_train)
print('Train set evaluation:')
print_evaluate(y_train, train_pred)
print('Test set evaluation:')
print_evaluate(y_test, test_pred)

Train set evaluation:
Mean Squared Error: 1443122011.1659107
Root Mean Squared Error: 37988.44575875552
Mean Absoulte Error: 24136.742772186506
R2 Square: 0.7457982125999305
--------
Test set evaluation:
Mean Squared Error: 1515969395.6813357
Root Mean Squared Error: 38935.451656316196
Mean Absoulte Error: 24839.632252832587
R2 Square: 0.7908320198833435
--------


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LinearRegression())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




### SVM Regression

In [13]:
from sklearn.svm import SVR
svm_reg_model = SVR(kernel='rbf', C=1000000, epsilon=0.001)
svm_reg_model.fit(X_train, y_train)
test_pred = svm_reg_model.predict(X_test)
train_pred = svm_reg_model.predict(X_train)
print('Train set evaluation:')
print_evaluate(y_train, train_pred)
print('Test set evaluation:')
print_evaluate(y_test, test_pred)

Train set evaluation:
Mean Squared Error: 337812543.56726295
Root Mean Squared Error: 18379.677460914892
Mean Absoulte Error: 9085.7086559685
R2 Square: 0.9404952930406869
--------
Test set evaluation:
Mean Squared Error: 1186154546.6443958
Root Mean Squared Error: 34440.594458348074
Mean Absoulte Error: 21280.0245937407
R2 Square: 0.8363386811537259
--------


### Random Forest Regression

In [14]:
from sklearn.ensemble import RandomForestRegressor
random_reg_model = RandomForestRegressor(n_estimators=1500)
random_reg_model.fit(X_train, y_train)
test_pred = random_reg_model.predict(X_test)
train_pred = random_reg_model.predict(X_train)
print('Train set evaluation:')
print_evaluate(y_train, train_pred)
print('Test set evaluation:')
print_evaluate(y_test, test_pred)

Train set evaluation:
Mean Squared Error: 156968074.0247644
Root Mean Squared Error: 12528.690036263344
Mean Absoulte Error: 7593.223968641734
R2 Square: 0.972350525684516
--------
Test set evaluation:
Mean Squared Error: 1020998725.0289819
Root Mean Squared Error: 31953.07066666648
Mean Absoulte Error: 19441.42459030496
R2 Square: 0.8591262847229106
--------


### SGD Regression

In [15]:
from sklearn.linear_model import SGDRegressor
sgd_reg_model = SGDRegressor(n_iter_no_change=150, penalty=None, eta0=0.0001, max_iter=1000, tol=1e-3)
sgd_reg_model.fit(X_train, y_train)
test_pred = sgd_reg_model.predict(X_test)
train_pred = sgd_reg_model.predict(X_train)
print('Train set evaluation:')
print_evaluate(y_train, train_pred)
print('Test set evaluation:')
print_evaluate(y_test, test_pred)

Train set evaluation:
Mean Squared Error: 1476562866.8268046
Root Mean Squared Error: 38426.07014549894
Mean Absoulte Error: 23950.873695498278
R2 Square: 0.7399077021542343
--------
Test set evaluation:
Mean Squared Error: 1576941269.5775003
Root Mean Squared Error: 39710.71983202395
Mean Absoulte Error: 24513.419512052624
R2 Square: 0.7824193410105906
--------




### Elastic Net

In [16]:
from sklearn.linear_model import ElasticNet
elastic_model = ElasticNet(alpha=0.1, l1_ratio=0.9, selection='random', random_state=42)
elastic_model.fit(X_train, y_train)
test_pred = elastic_model.predict(X_test)
train_pred = elastic_model.predict(X_train)
print('Train set evaluation:')
print_evaluate(y_train, train_pred)
print('Test set evaluation:')
print_evaluate(y_test, test_pred)

Train set evaluation:
Mean Squared Error: 1443387568.7624135
Root Mean Squared Error: 37991.940839636154
Mean Absoulte Error: 24095.566913047976
R2 Square: 0.7457514353938687
--------
Test set evaluation:
Mean Squared Error: 1519367069.6515937
Root Mean Squared Error: 38979.05937361231
Mean Absoulte Error: 24764.107436824193
R2 Square: 0.7903632211045037
--------


### Lasso Regression

In [17]:
from sklearn.linear_model import Lasso
lasso_model = Lasso(alpha=0.1, 
              precompute=True, 
              selection='random',
              random_state=42)
lasso_model.fit(X_train, y_train)
test_pred = lasso_model.predict(X_test)
train_pred = lasso_model.predict(X_train)
print('Train set evaluation:')
print_evaluate(y_train, train_pred)
print('Test set evaluation:')
print_evaluate(y_test, test_pred)

Train set evaluation:
Mean Squared Error: 1443122011.2726438
Root Mean Squared Error: 37988.445760160335
Mean Absoulte Error: 24136.740151374444
R2 Square: 0.7457982125811298
--------
Test set evaluation:
Mean Squared Error: 1515971336.5135422
Root Mean Squared Error: 38935.47658002329
Mean Absoulte Error: 24839.608081909846
R2 Square: 0.7908317520943278
--------


### Ridge Regression

In [18]:
from sklearn.linear_model import Ridge
ridge_model = Ridge(alpha=1.0, solver='cholesky', tol=0.0005, random_state=42)
ridge_model.fit(X_train, y_train)
pred = ridge_model.predict(X_test)
test_pred = ridge_model.predict(X_test)
train_pred = ridge_model.predict(X_train)
print('Train set evaluation:')
print_evaluate(y_train, train_pred)
print('Test set evaluation:')
print_evaluate(y_test, test_pred)

Train set evaluation:
Mean Squared Error: 1443125805.7719235
Root Mean Squared Error: 37988.495702935164
Mean Absoulte Error: 24131.603973071877
R2 Square: 0.7457975441910063
--------
Test set evaluation:
Mean Squared Error: 1516333340.34665
Root Mean Squared Error: 38940.12506845156
Mean Absoulte Error: 24830.20672916764
R2 Square: 0.7907818041133321
--------
