# Multiple Linear Regression

## Required modules

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import seaborn as sns
import pymc3 as pm

%matplotlib inline

## Data

In [None]:
auto_df = pd.read_csv("http://www-bcf.usc.edu/~gareth/ISL/Auto.csv")

## EDA

In [None]:
sns.scatterplot(x = 'weight', y = 'acceleration', data = auto_df)

## y X split

In [None]:
y = auto_df["mpg"]
X = auto_df.drop(["mpg"], axis = 1)

## Add constant

In [None]:
X["intercept"] = 1

## Multi-variate linear model y ~ f(X)

In [None]:
with pm.Model() as model:

    # priors
    alpha = pm.Normal('alpha', mu = 0, tau = 1/(1000**2))
    beta = pm.Normal('beta', mu = 0, tau = 1/(1000**2), shape = 2)
    
    log_sigma = pm.Uniform('log_sigma', lower = 0, upper = 10)

    # expected value
    mu = alpha + beta[0]*X['weight'].values + beta[1]*X['acceleration'].values

    # likelihood
    y_obs = pm.Normal('y', mu=mu, tau=1/np.exp(log_sigma)**2, observed=y.values)

### Samples

In [None]:
with model:
    trace = pm.sample(1000)

### Plot

In [None]:
pm.plot_posterior(trace)

## Compare with OLS

In [None]:
sm.OLS(y, X.loc[:,["intercept","weight", "acceleration"]]).fit().summary()