## **Linear Regression Implementation**

We will code a Linear Regression model (class) from scratch.

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(630)
n = 80

## marketing
tv = np.random.randint(10, 100, n)
social = np.random.randint(5, 60, n)
email = np.random.randint(1, 20, n)

## sales (oracle - true mechanism)
sales = 3.5*tv +2*social + 5*email + 10 + np.random.normal(0, 20, n)

## df
df = pd.DataFrame({'tv': tv, 'social': social, 'email': email, 'sales': sales})
df.head()

Unnamed: 0,tv,social,email,sales
0,98,32,6,458.920413
1,71,30,3,354.745338
2,57,13,6,239.369662
3,64,28,6,316.853841
4,45,31,8,288.29257


In [6]:
## Linear Regression needs Design Matrix X = [1, tv, social, email]
XD = df.drop('sales', axis = 1)
XD['intercept'] = 1
XD

Unnamed: 0,tv,social,email,intercept
0,98,32,6,1
1,71,30,3,1
2,57,13,6,1
3,64,28,6,1
4,45,31,8,1
...,...,...,...,...
75,96,10,2,1
76,17,53,17,1
77,15,29,1,1
78,72,15,6,1


In [7]:
## X: Feature matrix
X = df.drop('sales', axis = 1)

In [9]:
## y vector
y = df['sales']
y

Unnamed: 0,sales
0,458.920413
1,354.745338
2,239.369662
3,316.853841
4,288.292570
...,...
75,369.748021
76,252.005167
77,130.485337
78,302.592324


In [22]:
## RegressionLinear
class RegressionLinear:
  def __init__(self, *,  fit_intercept = False):
    self.X = X
    self.y = y
    self.fit_intercept = fit_intercept

  def fit(self, X, y):
    if self.fit_intercept:
      X['intercept'] = 1
    self.coef_ = np.linalg.inv(X.T.dot(X)).dot(X.T.dot(y))
    return self.coef_

  def predict(self, Xnew):
    if self.fit_intercept:
      X['intercept'] = 1
    return Xnew.dot(self.coef_)

In [23]:
## instantiate the model
reg1 = RegressionLinear(fit_intercept = True)

## fit (this should return coefficients)
reg1.fit(X, y)

## predict the whole X data set
yhat = reg1.predict(X)

In [19]:
yhat

Unnamed: 0,0
0,443.311207
1,329.648374
2,263.591386
3,317.182540
4,267.429571
...,...
75,372.423385
76,260.228342
77,122.266475
78,319.694867


In [24]:
## sklearn LinearRegression
from sklearn.linear_model import LinearRegression

## instance
reg2 = LinearRegression(fit_intercept=True)

## fit and predictions
yhat2 = reg2.fit(X, y).predict(X)

In [26]:
## NNRegressor (sklearn)
from sklearn.neural_network import MLPRegressor

## instance
nn = MLPRegressor(hidden_layer_sizes=(3,))

## fit
nn.fit(X, y)

## predict
yhat3 = nn.predict(X)



In [27]:
yhat4 = MLPRegressor(hidden_layer_sizes=(3,)).fit(X,y).predict(X)



## **Vectors and Matrices**

In [30]:
import numpy as np
import tensorflow as tf

## np.array (creates tensors, matrices, vectors)
## tf.constant (creates tensors, ...)

v1 = np.array([1,2,3])
t1 = tf.constant([1,2,3])

In [29]:
v1.T

array([1, 2, 3])

In [32]:
tf.transpose(t1)

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 2, 3], dtype=int32)>

In [33]:
from statistics import mean

mean(v1)

np.int64(2)

In [34]:
v1.mean()

np.float64(2.0)

In [37]:
## methods (attributes) that are useful: np and tf
v1.shape
t1.shape

v1.dtype
t1.dtype

tf.int32