# Multiple Linear Regression using SkLearn

## Import the relevant libraries

In [12]:
import numpy as np
import pandas as pd
import scipy
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from sklearn.linear_model import LinearRegression 
from sklearn.feature_selection import f_regression

## Load the data

In [3]:
df = pd.read_csv('real_estate_price.csv')
df.head()

Unnamed: 0,price,size,year
0,234314.144,643.09,2015
1,228581.528,656.22,2009
2,281626.336,487.29,2018
3,401255.608,1504.75,2015
4,458674.256,1275.46,2009


In [4]:
df.describe()

Unnamed: 0,price,size,year
count,100.0,100.0,100.0
mean,292289.47016,853.0242,2012.6
std,77051.727525,297.941951,4.729021
min,154282.128,479.75,2006.0
25%,234280.148,643.33,2009.0
50%,280590.716,696.405,2015.0
75%,335723.696,1029.3225,2018.0
max,500681.128,1842.51,2018.0


## Create the regression

### Declare the dependent and the independent variables

In [5]:
y = df['price']
x = df[['size','year']]

### Regression

In [6]:
reg = LinearRegression()
reg.fit(x,y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

### Coefficient

In [7]:
reg.coef_

array([ 227.70085401, 2916.78532684])

### Intercept

In [8]:
reg.intercept_

-5772267.017463278

### R-Squared

In [10]:
reg.score(x,y)

0.7764803683276796

### Adjusted R-Square

In [11]:
r2 = reg.score(x,y)
n = x.shape[0]
p = x.shape[1]

adjusted_r2 = 1-(1-r2)*(n-1)/(n-p-1)
adjusted_r2

0.7718717161282503

### F regression

In [13]:
f_regression(x,y)

(array([285.92105192,   0.85525799]), array([8.12763222e-31, 3.57340758e-01]))

In [14]:
p_values = f_regression(x,y)[1]
p_values

array([8.12763222e-31, 3.57340758e-01])

Since p-values for both the variables are less than 0.05 thus both the variables are significant.