## Multiple-regression - House price study

### 1. Import Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### 2. Import data (Train and test separate)

In [None]:
house_train = pd.read_csv('../Data/house_data/kc_house_train_data.csv')
house_test = pd.read_csv('../Data/house_data/kc_house_test_data.csv')

### 3. Simple model checks

In [None]:
X_house_train = house_train['sqft_living']
y_house_train = house_train['price']
X_train_shaped = X_house_train.values.reshape(-1, 1)
y_train_shaped = y_house_train.values.reshape(-1, 1)
print("Shape of X-data (training):" + str(X_train_shaped.shape))
print("Shape of Y-data (training):" + str(y_train_shaped.shape))

### 4. Simple Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(X_train_shaped, y_train_shaped)
print('linear model coeff (w): {}'
     .format(linreg.coef_))
print('linear model intercept (b): {}'
     .format(linreg.intercept_))
print('R-squared score (training): {}'
     .format(linreg.score(X_train_shaped, y_train_shaped)))

### 5. Plot the results - Simple Linear Regression

In [None]:
plt.figure(figsize=(5,4))
plt.scatter(X_train_shaped, y_train_shaped, marker= 'o', s=50, alpha=0.8)
plt.plot(X_train_shaped, linreg.coef_ * X_train_shaped + linreg.intercept_, 'r-')
plt.title('Least-squares linear regression')
plt.xlabel('Feature value (x)')
plt.ylabel('Target value (y)')
plt.show()

### 6. Multiple regression features

In [None]:
house_train['bedroom_squared'] = house_train['bedrooms'] * house_train['bedrooms']
house_train.head(5)

In [None]:
# Add other three additional columns also
house_test['bedroom_squared'] = house_test['bedrooms'] * house_test['bedrooms']
#Bed-bathroom multiple
house_train['bed_bath_rooms'] = house_train['bedrooms'] * house_train['bathrooms']
house_test['bed_bath_rooms'] = house_test['bedrooms'] * house_test['bathrooms']

#Log-sqft living
house_train['log_sqft_living'] = np.log(house_train['sqft_living'])
house_test['log_sqft_living'] = np.log(house_test['sqft_living'])

#Latitude + Longitude
house_train['lat_plus_long'] = house_train['lat'] + house_train['long']
house_test['lat_plus_long'] = house_test['lat'] + house_test['long']

house_test.head(5)

In [None]:
display(house_test.describe())

### 7. Three (03) different models

In [None]:
Model_1 = ['sqft_living', 'bedrooms', 'bathrooms', 'lat', 'long']
Model_2 = ['sqft_living', 'bedrooms', 'bathrooms', 'lat', 'long', 'bed_bath_rooms']
Model_3 = ['sqft_living', 'bedrooms', 'bathrooms', 'lat', 'long', 'bed_bath_rooms', 'bedroom_squared', 'log_sqft_living', 'lat_plus_long']

X_train_house_m1 = house_train[Model_1]
X_train_house_m2 = house_train[Model_2]
X_train_house_m3 = house_train[Model_3]
y_train_m = house_train['price']

X_test_house_m1 = house_test[Model_1]
X_test_house_m2 = house_test[Model_2]
X_test_house_m3 = house_test[Model_3]
y_test_m = house_test['price']

y_train_shaped_m = y_train_m.values.reshape(-1, 1)
y_test_shaped_m = y_test_m.values.reshape(-1, 1)

In [None]:
print(X_test_house_m1.shape)
print(X_test_house_m2.shape)
print(X_test_house_m3.shape)
print(y_test_shaped_m.shape)

### 8a. Model-1 Multi-Linear Regression Results

In [None]:
linreg_multi_house_m1 = LinearRegression()
linreg_multi_house_m1.fit(X_train_house_m1, y_train_shaped_m)
print('linear model coeff (w): {}'
     .format(linreg_multi_house_m1.coef_))
print('linear model intercept (b): {}'
     .format(linreg_multi_house_m1.intercept_))
print('R-squared score (training): {}'
     .format(linreg_multi_house_m1.score(X_train_house_m1, y_train_shaped_m)))
print('R-squared score (test): {}'
     .format(linreg_multi_house_m1.score(X_test_house_m1, y_test_shaped_m)))

### 8b. Model-2 Multi-Linear Regression Results

In [None]:
linreg_multi_house_m2 = LinearRegression()
linreg_multi_house_m2.fit(X_train_house_m2, y_train_shaped_m)
print('linear model coeff (w): {}'
     .format(linreg_multi_house_m2.coef_))
print('linear model intercept (b): {}'
     .format(linreg_multi_house_m2.intercept_))
print('R-squared score (training): {}'
     .format(linreg_multi_house_m2.score(X_train_house_m2, y_train_shaped_m)))
print('R-squared score (test): {}'
     .format(linreg_multi_house_m2.score(X_test_house_m2, y_test_shaped_m)))

### 8c. Model-3 Multi-Linear Regression Results

In [None]:
linreg_multi_house_m3 = LinearRegression()
linreg_multi_house_m3.fit(X_train_house_m3, y_train_shaped_m)
print('linear model coeff (w): {}'
     .format(linreg_multi_house_m3.coef_))
print('linear model intercept (b): {}'
     .format(linreg_multi_house_m3.intercept_))
print('R-squared score (training): {}'
     .format(linreg_multi_house_m3.score(X_train_house_m3, y_train_shaped_m)))
print('R-squared score (test): {}'
     .format(linreg_multi_house_m3.score(X_test_house_m3, y_test_shaped_m)))