In [6]:
# Make a prediction
def predict(row, coefficients):
	yhat = coefficients[0]
	for i in range(len(row)-1):
		yhat += coefficients[i + 1] * row[i]
	return yhat

# Estimate linear regression coefficients using stochastic gradient descent
def coefficients_sgd(train, l_rate, n_epoch):
	coef = [0.0 for i in range(len(train[0]))]
	for epoch in range(n_epoch):
		sum_error = 0
		for row in train:
			yhat = predict(row, coef)
			error = yhat - row[-1]
			sum_error += error**2
			coef[0] = coef[0] - l_rate * error
			for i in range(len(row)-1):
				coef[i + 1] = coef[i + 1] - l_rate * error * row[i]
		print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error))
	return coef

# Calculate coefficients
dataset = [[1, 1], [2, 3], [4, 3], [3, 2], [5, 5]]
l_rate = 0.001
n_epoch = 50
coef = coefficients_sgd(dataset, l_rate, n_epoch)
print(coef)

>epoch=0, lrate=0.001, error=46.236
>epoch=1, lrate=0.001, error=41.305
>epoch=2, lrate=0.001, error=36.930
>epoch=3, lrate=0.001, error=33.047
>epoch=4, lrate=0.001, error=29.601
>epoch=5, lrate=0.001, error=26.543
>epoch=6, lrate=0.001, error=23.830
>epoch=7, lrate=0.001, error=21.422
>epoch=8, lrate=0.001, error=19.285
>epoch=9, lrate=0.001, error=17.389
>epoch=10, lrate=0.001, error=15.706
>epoch=11, lrate=0.001, error=14.213
>epoch=12, lrate=0.001, error=12.888
>epoch=13, lrate=0.001, error=11.712
>epoch=14, lrate=0.001, error=10.668
>epoch=15, lrate=0.001, error=9.742
>epoch=16, lrate=0.001, error=8.921
>epoch=17, lrate=0.001, error=8.191
>epoch=18, lrate=0.001, error=7.544
>epoch=19, lrate=0.001, error=6.970
>epoch=20, lrate=0.001, error=6.461
>epoch=21, lrate=0.001, error=6.009
>epoch=22, lrate=0.001, error=5.607
>epoch=23, lrate=0.001, error=5.251
>epoch=24, lrate=0.001, error=4.935
>epoch=25, lrate=0.001, error=4.655
>epoch=26, lrate=0.001, error=4.406
>epoch=27, lrate=0.001,

In [9]:
# Example of making a prediction with coefficients

# Make a prediction
def predict(row, coefficients):
    print(row)
    yhat = coefficients[0]
    for i in range(len(row)-1):
#         print(i)
#         print(row[i])
#         print(coefficients[i + 1])
        yhat += coefficients[i + 1] * row[i] #x value from row and second coefficient
    return yhat

dataset = [[1, 1], [2, 3], [4, 3], [3, 2], [5, 5]]
coef = [0.4, 0.8]
for row in dataset:
    yhat = predict(row, coef)
    print("Expected=%.3f, Predicted=%.3f" % (row[-1], yhat))

[1, 1]
Expected=1.000, Predicted=1.200
[2, 3]
Expected=3.000, Predicted=2.000
[4, 3]
Expected=3.000, Predicted=3.600
[3, 2]
Expected=2.000, Predicted=2.800
[5, 5]
Expected=5.000, Predicted=4.400


In [71]:
class MultivariateLinearRegression:   
    def train(self, train, learning_rate, epochs):      
        self.train = train
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.train_column_count = len(train[0])
        self.__coefficients = self.coefficients_sgd()
        
    def get_coefficients(self):
        return self.__coefficients
    
    def __predict_row(self, row, coefficients):
        #coefficients[0] is the b in y = mx + b; the bias that 
        #will be the intial value of the coefficient
        predicted_value = coefficients[0]        
        row_length_minus_1 = self.train_column_count - 1
        
        #for each item in the row (except that last column which should be the actual value), 
        #apply the appropriate coefficient to the value and add to predicted value
        for i in range(row_length_minus_1):
            predicted_value += coefficients[i + 1] * row[i]
        return predicted_value
    
    def coefficients_sgd(self):
        #start with zeroes for coefficients
        coefficients = [0.0 for i in range(self.train_column_count)]         
        for epoch in range(self.epochs):            
            sum_error = 0
            for row in self.train:
                #get predicted value for each row
                predicted_row_value = self.__predict_row(row, coefficients)                 
                #calculate error, which is the predicted value minus actual value
                error = predicted_row_value - row[-1]
                #square the error and add it to the total error for the epoch
                sum_error += error**2
                #upate the y-intercept coefficient (bias) based on the error and learning rate
                coefficients[0] = coefficients[0] - (self.learning_rate * error) 
                #update the other coefficients based on the error and the learning rate
                for i in range(len(row)-1):
                    coefficients[i + 1] = coefficients[i + 1] - self.learning_rate * error * row[i]
                #print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, self.learning_rate, sum_error))
        return coefficients
    
    def predict(self, data):
        

In [72]:
# Calculate coefficients
dataset = [[1, 1], [2, 3], [4, 3], [3, 2], [5, 5]]
l_rate = 0.001
n_epoch = 50
#coef = coefficients_sgd(dataset, l_rate, n_epoch)
#print(coef)

t = MultivariateLinearRegression()
t.train(dataset, l_rate, n_epoch)
print(t.get_coefficients())

[0.22998234937311363, 0.8017220304137576]


In [28]:
# Make a prediction
def predict(row, coefficients):
    yhat = coefficients[0] #this is the y-intercept
    print("yhat> " + str(yhat))
    for i in range(len(row)-1): #from 0 to the number of items in row - 1
        c = coefficients[i + 1]
        r = row[i] 
        y = yhat
        print('coef: ' + str(c))
        yhat += coefficients[i + 1] * row[i]
        
        print(str(yhat) + " = " + str(y) + " + (" + str(c) + " * " + str(r) + ")")
        print("RESULT: yhat: " + str(yhat) + " coef: " + str(c) + " row val: " + str(r))
    return yhat #return total for all rows


dataset = [[1, 1], [2, 3], [4, 3], [3, 2], [5, 5]]
coef = [0.4, 0.8]
for row in dataset:
  yhat = predict(row, coef)
  print("Expected=%.3f, Predicted=%.3f" % (row[-1], yhat))

yhat> 0.4
coef: 0.8
1.2000000000000002 = 0.4 + (0.8 * 1)
RESULT: yhat: 1.2000000000000002 coef: 0.8 row val: 1
Expected=1.000, Predicted=1.200
yhat> 0.4
coef: 0.8
2.0 = 0.4 + (0.8 * 2)
RESULT: yhat: 2.0 coef: 0.8 row val: 2
Expected=3.000, Predicted=2.000
yhat> 0.4
coef: 0.8
3.6 = 0.4 + (0.8 * 4)
RESULT: yhat: 3.6 coef: 0.8 row val: 4
Expected=3.000, Predicted=3.600
yhat> 0.4
coef: 0.8
2.8000000000000003 = 0.4 + (0.8 * 3)
RESULT: yhat: 2.8000000000000003 coef: 0.8 row val: 3
Expected=2.000, Predicted=2.800
yhat> 0.4
coef: 0.8
4.4 = 0.4 + (0.8 * 5)
RESULT: yhat: 4.4 coef: 0.8 row val: 5
Expected=5.000, Predicted=4.400


In [32]:
# Make a prediction
def predict(row, coefficients):
    yhat = coefficients[0] #this is the y-intercept
    print("yhat> " + str(yhat))
    for i in range(len(row)-1): #from 0 to the number of items in row - 1
        print(i)
        yhat += coefficients[i + 1] * row[i]
    return yhat

# Estimate linear regression coefficients using stochastic gradient descent
def coefficients_sgd(train, l_rate, n_epoch):
    coef = [0.0 for i in range(len(train[0]))] #start with zeroes for coefficients
    #the number of coefficients is the number of features in each row    
    print(coef)
    for epoch in range(n_epoch):
        print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
        sum_error = 0
        for row in train:
            #for each row
            yhat = predict(row, coef) #yhat is the predicted value
            #print(row[-1])
            #error = prediction - actual value
            error = yhat - row[-1] # yhat minus last colum in row (actual value)
            sum_error += error**2
            #update the y-intercept coefficient based on the error and the learning rate
            coef[0] = coef[0] - l_rate * error #y-intercept
            #update the other coefficients based on the error and the learning rate
            for i in range(len(row)-1):
                coef[i + 1] = coef[i + 1] - l_rate * error * row[i]
            print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error))
    return coef

# Calculate coefficients
dataset = [[1, 1], [2, 3], [4, 3], [3, 2], [5, 5]]
l_rate = 0.001
n_epoch = 10
coef = coefficients_sgd(dataset, l_rate, n_epoch)
print(coef)


[0.0, 0.0]
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
yhat> 0.0
0
>epoch=0, lrate=0.001, error=1.000
yhat> 0.001
0
>epoch=0, lrate=0.001, error=9.982
yhat> 0.0039970000000000006
0
>epoch=0, lrate=0.001, error=18.791
yhat> 0.006965027
0
>epoch=0, lrate=0.001, error=22.541
yhat> 0.008901463649000001
0
>epoch=0, lrate=0.001, error=46.236
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
yhat> 0.013769185095616001
0
>epoch=1, lrate=0.001, error=0.878
yhat> 0.014706401885340305
0
>epoch=1, lrate=0.001, error=9.204
yhat> 0.017591792999515358
0
>epoch=1, lrate=0.001, error=16.819
yhat> 0.020351313109723226
0
>epoch=1, lrate=0.001, error=19.985
yhat> 0.022130681482696545
0
>epoch=1, lrate=0.001, error=41.305
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
yhat> 0.026748059752424322
0
>epoch=2, lrate=0.001, error=0.771
yhat> 0.027626126591565353
0
>epoch=2, lrate=0.001, error=8.501
yhat> 0.030406374129082415
0
>epoch=2, lrate=0.001, error=15.070
yhat> 0.032969473102870454
0
>epoch=2, lrate=0.001, error=17.732
yhat> 0.034600875453019965

In [35]:

# Example of estimating coefficients
# Make a prediction
def predict(row, coefficients):
    yhat = coefficients[0]
    for i in range(len(row)-1):
        yhat += coefficients[i + 1] * row[i]
    return yhat
# Estimate linear regression coefficients using stochastic gradient descent
def coefficients_sgd(train, l_rate, n_epoch):
    coef = [0.0 for i in range(len(train[0]))]
    for epoch in range(n_epoch):
        sum_error = 0
        for row in train:
            yhat = predict(row, coef)
            error = yhat - row[-1]
            sum_error += error**2
            coef[0] = coef[0] - l_rate * error
            for i in range(len(row)-1):
                coef[i + 1] = coef[i + 1] - l_rate * error * row[i] 
            print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error))
    return coef

In [36]:
# Calculate coefficients
dataset = [[1, 1], [2, 3], [4, 3], [3, 2], [5, 5]]
l_rate = 0.001
n_epoch = 50
coef = coefficients_sgd(dataset, l_rate, n_epoch)
print(coef)

>epoch=0, lrate=0.001, error=1.000
>epoch=0, lrate=0.001, error=9.982
>epoch=0, lrate=0.001, error=18.791
>epoch=0, lrate=0.001, error=22.541
>epoch=0, lrate=0.001, error=46.236
>epoch=1, lrate=0.001, error=0.878
>epoch=1, lrate=0.001, error=9.204
>epoch=1, lrate=0.001, error=16.819
>epoch=1, lrate=0.001, error=19.985
>epoch=1, lrate=0.001, error=41.305
>epoch=2, lrate=0.001, error=0.771
>epoch=2, lrate=0.001, error=8.501
>epoch=2, lrate=0.001, error=15.070
>epoch=2, lrate=0.001, error=17.732
>epoch=2, lrate=0.001, error=36.930
>epoch=3, lrate=0.001, error=0.676
>epoch=3, lrate=0.001, error=7.865
>epoch=3, lrate=0.001, error=13.520
>epoch=3, lrate=0.001, error=15.746
>epoch=3, lrate=0.001, error=33.047
>epoch=4, lrate=0.001, error=0.593
>epoch=4, lrate=0.001, error=7.290
>epoch=4, lrate=0.001, error=12.146
>epoch=4, lrate=0.001, error=13.998
>epoch=4, lrate=0.001, error=29.601
>epoch=5, lrate=0.001, error=0.519
>epoch=5, lrate=0.001, error=6.769
>epoch=5, lrate=0.001, error=10.929
>epo