# Multiple Linear Regression: Statsmodels

(Note: checking model assumptions and preprocessing excluded)

## Import libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
plt.style.use("seaborn-whitegrid")

  import pandas.util.testing as tm


## Load data

In [2]:
url = "https://raw.githubusercontent.com/lucaskienast/Regression/main/1)%20Linear%20Regression/real_estate_price_size_year.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,price,size,year
0,234314.144,643.09,2015
1,228581.528,656.22,2009
2,281626.336,487.29,2018
3,401255.608,1504.75,2015
4,458674.256,1275.46,2009


In [3]:
df.describe()

Unnamed: 0,price,size,year
count,100.0,100.0,100.0
mean,292289.47016,853.0242,2012.6
std,77051.727525,297.941951,4.729021
min,154282.128,479.75,2006.0
25%,234280.148,643.33,2009.0
50%,280590.716,696.405,2015.0
75%,335723.696,1029.3225,2018.0
max,500681.128,1842.51,2018.0


## Build Multiple Linear Regression Model

In [4]:
# declare targets and features
y = df["price"]
X1 = df[["size", "year"]]

In [5]:
# build model
X = sm.add_constant(X1) # y-intercept
model = sm.OLS(y, X)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                  price   R-squared:                       0.776
Model:                            OLS   Adj. R-squared:                  0.772
Method:                 Least Squares   F-statistic:                     168.5
Date:                Tue, 24 Aug 2021   Prob (F-statistic):           2.77e-32
Time:                        14:23:54   Log-Likelihood:                -1191.7
No. Observations:                 100   AIC:                             2389.
Df Residuals:                      97   BIC:                             2397.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const      -5.772e+06   1.58e+06     -3.647      0.0

In [6]:
# get model parameters
alpha = results.params[0]
beta_size = results.params[1]
beta_year = results.params[2]

## Make Prediction

In [7]:
# plug in values for feature variables and predict target
size = 500
year = 2009
yhat = alpha + beta_size*size + beta_year*year
yhat

201405.13115807343