In [42]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

In [43]:
np.random.seed(42)

In [44]:
# additional setup of the tools
plt.rcParams['figure.figsize'] = (12, 8)
sns.set_theme(style="whitegrid")

In [45]:
trainDf = pd.read_csv("data/train.csv")
testDf = pd.read_csv("data/test.csv")

In [46]:
baselineFeatures = ['GrLivArea', 'OverallQual', 'GarageCars']

X = trainDf[baselineFeatures]
Y = trainDf['SalePrice']

In [47]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [48]:
baselineModel = LinearRegression().fit(X_train, y_train)
print('Model Coefficients: \n', baselineModel.coef_)

Model Coefficients: 
 [   45.49631151 26846.77504928 23096.99177154]


In [49]:
y_train_predicted = baselineModel.predict(X_train)
y_test_predicted = baselineModel.predict(X_test)

In [50]:
## Metrics from Test Data
print('[Train] Mean Squared Error: %.2f' % mean_squared_error(y_train, y_train_predicted))
# The coefficient of determination: 1 is perfect prediction
print('[Train] Coefficient of Determination: %.2f' % r2_score(y_train, y_train_predicted))

[Train] Mean Squared Error: 1646098493.67
[Train] Coefficient of Determination: 0.73


In [51]:
## Metrics from Test Data
print('[Test] Mean Squared Error: %.2f' % mean_squared_error(y_test, y_test_predicted))
# The coefficient of determination: 1 is perfect prediction
print('[Test] Coefficient of Determination: %.2f' % r2_score(y_test, y_test_predicted))

[Test] Mean Squared Error: 1672668246.90
[Test] Coefficient of Determination: 0.76
