# SLR using Python

### Data Frame Creation

In [1]:
import pandas as pd

price = [160,180,200,220,240,260,280]

sale = [126,103,82,75,82,40,20]

priceDF = pd.DataFrame(price, columns=list('x'))

saleDF = pd.DataFrame(sale, columns=list('y'))

houseDf = pd.concat((priceDF, saleDF),axis=1)

print(houseDf)

print(priceDF)

     x    y
0  160  126
1  180  103
2  200   82
3  220   75
4  240   82
5  260   40
6  280   20
     x
0  160
1  180
2  200
3  220
4  240
5  260
6  280


### Statsmodels Usage - Ordinary Least Squares (OLS)

In [2]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

smfModel = smf.ols('y~x',data=houseDf).fit()
print(smfModel.summary())



                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.911
Model:                            OLS   Adj. R-squared:                  0.893
Method:                 Least Squares   F-statistic:                     50.93
Date:                Thu, 09 May 2019   Prob (F-statistic):           0.000838
Time:                        10:36:33   Log-Likelihood:                -26.006
No. Observations:                   7   AIC:                             56.01
Df Residuals:                       5   BIC:                             55.90
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    249.8571     24.841     10.058      0.0

### RESULT Explanation:
#### Dep. Variable: The Dependent Variable
#### Model: Algorithm used. Here, it is Ordinary Least Squares
#### Method: Parameter Fitting method. Here, it is Least Squares
#### No. Observations: Number of rows used for model fitting.
#### DF Residuals: The degrees of freedom of the residuals (Difference between the number of observations and parameters).
#### DF Model: The degrees of freedom of the model (The number of parameters estimated in the model excluding the constant term) .
#### R-squared: Measure that says how well the model has performed with respect to the baseline model.

### Data Prep - execute the data set created using the following code

In [9]:
from sklearn.datasets import fetch_california_housing

from sklearn.datasets import load_boston

import pandas as pd 

boston = load_boston()

california = fetch_california_housing()

dataset = pd.DataFrame(boston.data, columns=boston.feature_names)

dataset['target'] = boston.target

print(dataset.head()) 

      CRIM    ZN  INDUS  CHAS    NOX     RM   AGE     DIS  RAD    TAX  \
0  0.00632  18.0   2.31   0.0  0.538  6.575  65.2  4.0900  1.0  296.0   
1  0.02731   0.0   7.07   0.0  0.469  6.421  78.9  4.9671  2.0  242.0   
2  0.02729   0.0   7.07   0.0  0.469  7.185  61.1  4.9671  2.0  242.0   
3  0.03237   0.0   2.18   0.0  0.458  6.998  45.8  6.0622  3.0  222.0   
4  0.06905   0.0   2.18   0.0  0.458  7.147  54.2  6.0622  3.0  222.0   

   PTRATIO       B  LSTAT  target  
0     15.3  396.90   4.98    24.0  
1     17.8  396.90   9.14    21.6  
2     17.8  392.83   4.03    34.7  
3     18.7  394.63   2.94    33.4  
4     18.7  396.90   5.33    36.2  


In [26]:
#assign column RM to var X
X = dataset['RM']
#assign column target to var Y
Y = dataset['target']

In [27]:
# adding constant to X :> Y = Xm + C
X = sm.add_constant(X)
# initializiing OLS
initia = sm.OLS(Y,X)

In [28]:
#get the best fit model
fitmodel = initia.fit()

In [30]:
#print the summary
fitmodel.summary()

0,1,2,3
Dep. Variable:,target,R-squared:,0.484
Model:,OLS,Adj. R-squared:,0.483
Method:,Least Squares,F-statistic:,471.8
Date:,"Thu, 09 May 2019",Prob (F-statistic):,2.49e-74
Time:,15:48:02,Log-Likelihood:,-1673.1
No. Observations:,506,AIC:,3350.0
Df Residuals:,504,BIC:,3359.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-34.6706,2.650,-13.084,0.000,-39.877,-29.465
RM,9.1021,0.419,21.722,0.000,8.279,9.925

0,1,2,3
Omnibus:,102.585,Durbin-Watson:,0.684
Prob(Omnibus):,0.0,Jarque-Bera (JB):,612.449
Skew:,0.726,Prob(JB):,1.02e-133
Kurtosis:,8.19,Cond. No.,58.4
