# Simple Linear Regression

In [1]:
# Import packages and classes
import numpy as np
from sklearn.linear_model import LinearRegression

In [2]:
# Rocket Propellent Data from Montgomery's Book Regression Analysis
x = np.array([2158.7,1678.15,2316,2061.3,2207.5,1708.3,1784.7,2575,2357.9,2256.7,2165.2,2399.55,1779.8,2336.75,1765.3,2053.5,2414.4,2200.5,2654.2,1753.7]).reshape((-1, 1))
y = np.array([15.5,23.75,8,17,5.5,19,24,2.5,7.5,11,13,3.75,25,9.75,22,18,6,12.5,2,21.5])

Now, you have two arrays: the input x and output y. You should call .reshape() on x because this array is required to be two-dimensional, or to be more precise, to have one column and as many rows as necessary. That’s exactly what the argument (-1, 1) of .reshape() specifies.

In [3]:
print("x.ndim : ",x.ndim)
print("y.ndim : ",y.ndim)

x.ndim :  2
y.ndim :  1


In [4]:
# Let’s create an instance of the class LinearRegression
model = LinearRegression()

In [5]:
model.fit(x, y)

LinearRegression()

In [6]:
# Get results

print('coefficient of determination R^2:', model.score(x, y))

coefficient of determination R^2: 0.9018414316763039


In [7]:
print('intercept:', model.intercept_) # intercept Scaler

intercept: 65.09764727188451


In [8]:
print('slope:', model.coef_) # Array Quantity

slope: [-0.02427333]


In [9]:
# Predict response
y_pred = model.predict(x)
print('predicted response:', y_pred, sep='\n')
y_pred.ndim

predicted response:
[12.69880643 24.36335591  8.88061138 15.06302893 11.51426785 23.63151497
 21.77703244  2.5938185   7.86355878 10.32001994 12.54102978  6.85257453
 21.89597176  8.37693975 22.24793507 15.25236091  6.49211555 11.68418117
  0.67137064 22.52950571]


1

In [10]:
# Alternatively

y_pred = model.intercept_ + model.coef_ * x
print('predicted response:', y_pred, sep='\n')
y_pred.ndim

predicted response:
[[12.69880643]
 [24.36335591]
 [ 8.88061138]
 [15.06302893]
 [11.51426785]
 [23.63151497]
 [21.77703244]
 [ 2.5938185 ]
 [ 7.86355878]
 [10.32001994]
 [12.54102978]
 [ 6.85257453]
 [21.89597176]
 [ 8.37693975]
 [22.24793507]
 [15.25236091]
 [ 6.49211555]
 [11.68418117]
 [ 0.67137064]
 [22.52950571]]


2

# Multiple Linear Regression

In [11]:
import pandas as pd

data=pd.read_csv("DeliveryTimeData.csv")

In [12]:
n=data.shape[0] # Number of Observations

In [13]:
X=data.iloc[:,2:]
y=data.iloc[:,1]

In [14]:
model = LinearRegression().fit(X, y)

In [15]:
print('coefficient of determination:',model.score(X, y))
print('intercept:', model.intercept_)
print('slope:', model.coef_)

coefficient of determination: 0.9595937494832257
intercept: 2.3412311451922
slope: [1.61590721 0.01438483]


In [16]:
y_pred = model.predict(X)
print('predicted response:', y_pred, sep='\n')

predicted response:
[21.70808432 10.35361455 12.0797937   9.95564609 14.19439835 18.39957428
  7.15537645 16.67339513 71.8202938  19.12358708 38.09250698 21.5930409
 12.47299068 18.68246414 23.3287982  29.66292843 14.91363966 15.55137869
  7.70680652 40.88796994 20.51417893 56.00652789 23.35756786 24.4028535
 10.96258393]


In [18]:
model.rank_

2