In [73]:
# Linear Regression Using Matrices

import numpy as np
import pandas as pd

# Load the wine data
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
df = np.array(pd.read_csv(url, delimiter=';'))

# set m to be the number of data sets and n to be the number of features (including theta 0)
m = df.shape[0]
n = df.shape[1]

# Add a column of ones to X for intercepts
Xij = df[:,:-1]
X0 = np.ones((m,1))
X = np.append(X0, Xij, axis=1)
# Initial y is a normal array so need to reshape y to a m*1 vector
y = df[:,-1].reshape(m,1)

alpha = 0.1
iterations = 100
theta = np.ones((n,1))

for iteration in range(iterations):
    
    # Calculate predictions; dot returns a dot product
    y_hat = np.dot(X,theta)
    # Calculate the cost, J; multiply() is element wise
    J = (1/(2*m)) * ( np.multiply((y_hat - y),(y_hat - y)) ).sum(axis=0)
    # Gradient descent
    theta -= (alpha/m) * ( np.dot( np.transpose(X),(y_hat - y) ) )
    
    # Print out the last 10 iterations for examples
    if iteration >= 90:

        print(iteration)
        print(J)
        print(theta)

 


90
[inf]
[[-4.02266561e+231]
 [-3.29491040e+232]
 [-2.15507361e+231]
 [-1.10647338e+231]
 [-1.09826350e+232]
 [-3.57302806e+230]
 [-8.33502249e+232]
 [-2.72252583e+233]
 [-4.00990780e+231]
 [-1.32958038e+232]
 [-2.66802587e+231]
 [-4.13882054e+232]]
91
[inf]
[[1.47225920e+234]
 [1.20590738e+235]
 [7.88737436e+233]
 [4.04959243e+233]
 [4.01954503e+234]
 [1.30769593e+233]
 [3.05054279e+235]
 [9.96419812e+235]
 [1.46758996e+234]
 [4.86614384e+234]
 [9.76473321e+233]
 [1.51477086e+235]]
92
[inf]
[[-5.38833544e+236]
 [-4.41351188e+237]
 [-2.88670763e+236]
 [-1.48211418e+236]
 [-1.47111710e+237]
 [-4.78604875e+235]
 [-1.11647105e+238]
 [-3.64680633e+238]
 [-5.37124643e+236]
 [-1.78096460e+237]
 [-3.57380397e+236]
 [-5.54392424e+237]]
93
[inf]
[[1.97208200e+239]
 [1.61530540e+240]
 [1.05650886e+239]
 [5.42440374e+238]
 [5.38415543e+239]
 [1.75165052e+238]
 [4.08618300e+240]
 [1.33469811e+241]
 [1.96582758e+239]
 [6.51816925e+239]
 [1.30797991e+239]
 [2.02902609e+240]]
94
[inf]
[[-7.21764163e+

  return umr_sum(a, axis, dtype, out, keepdims)


In [95]:
# Linear Regression Using matrices and if conditons

import numpy as np
import pandas as pd

# Load the wine data
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
df = np.array(pd.read_csv(url, delimiter=';'))

# set m to be the number of data sets and n to be the number of features (including theta 0)
m = df.shape[0]
n = df.shape[1]

# Add a column of ones to X for intercepts
Xij = df[:,:-1]
X0 = np.ones((m,1))
X = np.append(X0, Xij, axis=1)
# Initial y is a normal array so need to reshape y to a m*1 vector
y = df[:,-1].reshape(m,1)

alpha = 0.1
iterations = 100
theta = np.ones((n,1))

for iteration in range(iterations):
    
    y_hat = np.dot(X,theta)
    
    # Calculate the cost, J
    J = (1/2*m) * (y_hat**2).sum(axis=0)
    
    # Gradient descent
    for i in range(0,m):
        for j in range(0,n):
            theta[j] -= (alpha/m) * (y_hat[i]-y[i]) * X[i][j]
            
    if iteration >= 90:
        print(iteration)
        print(J)
        print(theta)
        


        
    

  return umr_sum(a, axis, dtype, out, keepdims)


90
[inf]
[[-4.02266561e+231]
 [-3.29491040e+232]
 [-2.15507361e+231]
 [-1.10647338e+231]
 [-1.09826350e+232]
 [-3.57302806e+230]
 [-8.33502249e+232]
 [-2.72252583e+233]
 [-4.00990780e+231]
 [-1.32958038e+232]
 [-2.66802587e+231]
 [-4.13882054e+232]]
91
[inf]
[[1.47225920e+234]
 [1.20590738e+235]
 [7.88737436e+233]
 [4.04959243e+233]
 [4.01954503e+234]
 [1.30769593e+233]
 [3.05054279e+235]
 [9.96419812e+235]
 [1.46758996e+234]
 [4.86614384e+234]
 [9.76473321e+233]
 [1.51477086e+235]]
92
[inf]
[[-5.38833544e+236]
 [-4.41351188e+237]
 [-2.88670763e+236]
 [-1.48211418e+236]
 [-1.47111710e+237]
 [-4.78604875e+235]
 [-1.11647105e+238]
 [-3.64680633e+238]
 [-5.37124643e+236]
 [-1.78096460e+237]
 [-3.57380397e+236]
 [-5.54392424e+237]]
93
[inf]
[[1.97208200e+239]
 [1.61530540e+240]
 [1.05650886e+239]
 [5.42440374e+238]
 [5.38415543e+239]
 [1.75165052e+238]
 [4.08618300e+240]
 [1.33469811e+241]
 [1.96582758e+239]
 [6.51816925e+239]
 [1.30797991e+239]
 [2.02902609e+240]]
94
[inf]
[[-7.21764163e+