Skip to content

Commit

Permalink
add linear regression
Browse files Browse the repository at this point in the history
  • Loading branch information
llinjupt committed Apr 15, 2019
1 parent 3be2832 commit 9d35de9
Show file tree
Hide file tree
Showing 35 changed files with 1,380 additions and 15 deletions.
93 changes: 93 additions & 0 deletions footstone/db/bmi/BMI.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"Height M","Weight kg","BMI","%Fat"
1.6002,49.4415716,19.30828653,23.9
1.6510000000000002,62.5957512,22.96416767,28.8
1.6510000000000002,75.7499308,27.78997102,32.4
1.5303499999999999,48.9879792,20.91741385,25.8
1.45415,43.091278,20.37844114,22.5
1.60655,52.6167184,20.38615236,22.1
1.5621,47.9673963,19.65750888,19.6
1.4986000000000002,45.5860362,20.29833307,25.3
1.524,47.8539982,20.60384599,22.8
1.4795500000000001,44.4520552,20.30638203,26.4
1.4732,46.0396286,21.21330769,33.7
1.5493999999999999,53.0703108,22.10673106,27.9
1.5176500000000002,65.8842961,28.60478302,33.5
1.5367000000000002,46.0396286,19.4963642,23.4
1.4605000000000001,43.5448704,20.41427119,21.8
1.524,62.368955,26.85335378,37.9
1.4605000000000001,45.8128324,21.47751448,31.3
1.58115,74.3891536,29.75524103,40.6
1.524,55.565069,23.923897,36.3
1.4986000000000002,46.1530267,20.5507999,29.8
1.4859,47.8539982,21.67399972,31.9
1.4795500000000001,42.1840932,19.27034213,31.3
1.5875,45.8128324,18.17856825,21.6
1.5557500000000002,44.6788514,18.45960401,24.6
1.58115,42.6376856,17.05483327,20.1
1.56845,43.5448704,17.70091209,24.6
1.4986000000000002,37.3079749,16.61231737,18.1
1.524,39.3491407,16.94202502,22.9
1.4414500000000001,39.0089464,18.7743563,26.2
1.4859,40.5965198,18.38694763,27.2
1.4605000000000001,38.1017616,17.86248729,17.7
1.4986000000000002,40.3697236,17.97563824,20.8
1.5557500000000002,37.1945768,15.36738608,17.5
1.5303499999999999,44.1118609,18.83535646,21.3
1.45415,33.4524395,15.82010562,18.7
1.6319500000000002,47.1736096,17.71272628,28.8
1.41605,30.0504965,14.98630183,17.1
1.3843,32.0916623,16.74679776,26.2
1.45415,34.8132167,16.46363534,20.4
1.50495,35.9471977,15.87160686,19.5
1.4859,39.9161312,18.0787865,21.7
1.4478,32.6586528,15.58049033,18.1
1.3335000000000001,30.5040889,17.15426117,29.8
1.36525,29.483506,15.81811598,20.6
1.5493999999999999,44.6788514,18.6112223,22.9
1.4478,34.9266148,16.66246883,19.3
1.4732,54.5444861,25.132022159999998,38.4
1.5875,52.5033203,20.83335916,27.9
1.4414500000000001,51.029145,24.55947771,36.4
1.6002,51.7095336,20.19398775,25.1
1.58115,60.3277892,24.13077474,39.7
1.41605,47.8539982,23.86497876,33.6
1.58115,83.914594,33.56536335,46
1.5493999999999999,69.9666277,29.14498517,38.9
1.6446500000000002,77.3375042,28.59191038,42.2
1.49225,58.2866234,26.17494163,36.7
1.59385,87.5433332,34.46102452,38
1.5493999999999999,45.8128324,19.08358835,23.3
1.4224,47.627202,23.54027766,35.9
1.61925,53.0703108,20.24062038,24.1
1.6637000000000002,80.7394472,29.1699536,40.8
1.4732,45.2458419,20.847561,25.7
1.49225,50.4621545,22.66118487,37.6
1.6510000000000002,81.9868263,30.07806744,35.9
1.5303499999999999,52.9569127,22.61211173,36.3
1.5875,61.234974,24.29808628,33
1.5303499999999999,73.3685707,31.32770084,40.5
1.5748,59.8741968,24.14286958,26.4
1.42875,47.9673963,23.49814928,27.3
1.60655,63.9565284,24.77971967,32.2
1.5493999999999999,46.7200172,19.46148119,19.6
1.4732,41.2769084,19.01882758,24.5
1.4986000000000002,45.35924,20.19734634,22.6
1.6637000000000002,57.2660405,20.68936316,30.2
1.4224,38.7821502,19.16851181,26.9
1.50495,46.9468134,20.72821843,30.2
1.3462,29.2567098,16.14382018,21
1.4224,35.8337996,17.71125653,19.4
1.4732,34.9266148,16.09285411,21.1
1.5176500000000002,38.555354,16.73945995,17.3
1.4605000000000001,40.3697236,18.92573058,20.5
1.4605000000000001,36.7409844,17.22454131,19.3
1.4224,37.1945768,18.38383589,28.7
1.4732,39.4625388,18.18283516,18.3
1.46685,36.7409844,17.07573388,15.6
1.5875,44.4520552,17.63861078,23.9
1.49225,41.6171027,18.68911202,24.5
1.49225,39.0089464,17.51785977,23.3
1.6002,41.2769084,16.11976215,20.1
1.4224,38.1017616,18.83222213,30.3
1.397,30.1638946,15.45591079,20.6
1.4478,38.555354,18.39363442,26
292 changes: 292 additions & 0 deletions footstone/lineregress.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,292 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Jan 28 12:58:42 2018
@author: Red
"""

import numpy as np
import dbload
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d

class LRegress():
def __init__(self, b=None, w1=None, eta=0.001, tol=0.001):
self.eta = eta
self.tol = tol

np.random.seed(None)
self.b = b
if self.b is None:
self.b = np.random.randn(1)[0]

self.w1 = w1
if self.b is None:
self.w1 = np.random.randn(1)[0]

# both w and b is verctor, and X is 2D array
def hypothesis(self, X):
return self.b + self.w1 * X[:,0]

def predict(self, X):
try:
X = (X - self.mean) / self.std
finally:
print(X)
return self.hypothesis(X)

# MSE/LSE Least square method
def cost(self, X, y):
return np.sum((self.hypothesis(X) - y)**2) / X.shape[0] / 2

def delta_b(self, X, y):
return np.sum(self.b + self.w1*X[:,0] - y) / X.shape[0]

def delta_w(self, X, y):
derective = (self.b + self.w1*X[:,0] - y) * X[:,0]
return np.sum(derective) / X.shape[0]

def standard(self, X):
self.mean = np.mean(X, axis=0)
self.std = np.std(X, axis=0)
assert(np.std(X, axis=0).any())
return (X - self.mean) / self.std

def bgd(self, X, y, max_iter=1000, standard=True):
# for drawing Gradient Decent Path
self.costs_ = []
self.bs_ = []
self.w1s_ = []

self.steps_ = 1
self.complex = 0

if standard: X = self.standard(X)
for loop in range(max_iter):
cost = self.cost(X, y)
if(cost < self.tol):
print("cost reduce very tiny less than tol, just quit!")
return X

delta_b = self.eta * self.delta_b(X, y)
delta_w1 = self.eta * self.delta_w(X, y)

# update weights and b together
if self.complex % self.steps_ == 0:
self.bs_.append(self.b)
self.w1s_.append(self.w1)
cost = self.cost(X,y)
self.costs_.append(cost)

self.b -= delta_b
self.w1 -= delta_w1
self.complex += 1

# return standard X
return X

def sgd(self, X, y, max_iter=1000, standard=True):
# for drawing Gradient Decent Path
self.costs_ = []
self.bs_ = []
self.w1s_ = []

self.steps_ = 1
self.complex = 0

STDX = self.standard(X) if standard else X
import scaler
X,y = scaler.shuffle(STDX, y)
for loop in range(max_iter):
for Xi, yi in zip(X, y):
Xi = Xi.reshape(Xi.size, 1)
cost = self.cost(Xi, yi)
if(cost < self.tol):
print("cost reduce very tiny less than tol, just quit!")
return STDX

delta_b = self.eta * self.delta_b(Xi, yi)
delta_w1 = self.eta * self.delta_w(Xi, yi)

self.b -= delta_b
self.w1 -= delta_w1

# update weights and b together
if self.complex % self.steps_ == 0:
self.bs_.append(self.b)
self.w1s_.append(self.w1)
cost = self.cost(X,y)
self.costs_.append(cost)
self.complex += 1

return STDX

def draw_costs(self):
'''Draw errors info with matplotlib'''
if len(self.costs_) <= 1:
print("can't plot costs for less data")
return

plt.figure()
plt.title("Cost values J(w) state")
plt.xlabel("Iterations")
plt.ylabel("Cost values J(w)")

x = np.arange(1, 1 + len(self.costs_), 1) * self.steps_
plt.xlim(1 * self.steps_, len(self.costs_) * self.steps_)
plt.ylim(0, max(self.costs_) + 1)

plt.plot(x, self.costs_, c='grey')
plt.scatter(x, self.costs_, c='black')

def draw_points(self, X, y, title='', coordinate=False):
plt.figure()

plt.title(title)
plt.xlabel("BMI") #'x1'
plt.ylabel("Fat%") #'y'

# x1 and x2 features
x1 = X[:,0]
x2 = y

plt.scatter(x1, x2, c='black', marker='o')
if coordinate:
for index, x, y in zip(range(X.shape[0]), x1, x2):
plt.annotate('(%.2f,%.2f)'%(x,y), xy=(x,y), xytext=(-20,-20),
textcoords = 'offset pixels', ha='left', va='bottom')
return plt

def draw_line(self, plt, x1, y, c='black'):
# draw line: h(x1) = b + w1x1
if self.b == 0 and self.w1 == 0:
print("Can't plot a line when both w1 and w2 are 0!")
return
elif self.w1 == 0: # x1 = -b/w1, a line vertical to x-axis
line_x1 = [-self.b / self.w1] * 2
line_x2 = [-100, 100]
else:
max_ = np.max(x1)
min_ = np.min(x1)

line_x1 = np.arange(min_ - 1, max_ + 1, 0.5)
line_x2 = line_x1 * self.w1 + self.b

plt.plot(line_x1, line_x2, c)

def draw_vertical_line(self, plt, X, y):
for i in range(5):
idx = (np.random.randint(X.shape[0], size=1))
x = X[idx][0]
plt.plot([x, x], [y[idx], self.hypothesis(X[idx])], c='blue')

def draw_separate_line(self, X, y, title='', vertical=False):
title = "Linear Regression" + (' ' + title) if len(title) else ''
plt = self.draw_points(X, y, title)
plt.tight_layout()

# x1 and x2 features
x1 = X[:, 0]

self.draw_line(plt, x1, y, c='red')
if vertical: self.draw_vertical_line(plt, X, y)
plt.show()

def draw_cost_surface(self, X, y, gdpath=True):
x1 = np.linspace(0, 40, 50, endpoint=True)
x2 = np.linspace(-10, 30, 50, endpoint=True)

title = 'Quadratic Cost Function Surface and Contour'
x1, x2 = np.meshgrid(x1, x2)
costs = np.zeros(x1.shape)
backup_b = self.b
backup_w1 = self.w1
for i in range(x1.shape[0]):
for j in range(x1.shape[1]):
self.b = x1[i,j],
self.w1 = x2[i,j]
costs[i,j] = self.cost(X, y)

self.b = backup_b
self.w1 = backup_w1

plt.figure(figsize=(6, 6))
ax = plt.axes(projection='3d')
ax.plot_surface(x1, x2, costs, rstride=1, cstride=1, cmap='hot',
edgecolor='none', alpha=0.8)
ax.set_title(title)
ax.set_xlabel("b")
ax.set_ylabel("w1")

if gdpath: ax.plot3D(self.bs_, self.w1s_, self.costs_, c='red')
ax0 = plt.axes([0.1, 0.5, 0.3, 0.3])
ax0.contour(x1, x2, costs, 20, cmap='hot')
if gdpath: ax0.plot(self.bs_, self.w1s_, 'red')
plt.show()

def load_linear_dataset(random_state=None, features=1, points=50):
rng = np.random.RandomState(random_state)

# Generate sample data
x = 20 * rng.rand(points, features) + 2
y = 0.5 * (x[:,0] - rng.rand(1, points)).ravel() - 1

return x, y

def LRTest():
samples = 50
X, y = load_linear_dataset(random_state=0, features=1, points=samples)

lr = LRegress(b=15, w1=15, eta=0.1, tol=1e-4)
X = lr.sgd(X, y, max_iter=100, standard=True)
lr.draw_costs()
lr.draw_separate_line(X, y)
lr.draw_cost_surface(X, y)
print(lr.costs_[-1])
print(lr.predict(np.array([[0],[5],[10],[15]])))

def BMITest():
X,y = dbload.load_bmi_dataset(standard=False)
X = X[:,2].reshape(X.shape[0],1) # last column is BMI
lr = LRegress(b=5, w1=5, eta=0.1, tol=0.001)

X = lr.bgd(X, y, max_iter=100, standard=True)
lr.draw_costs()
lr.draw_separate_line(X, y)
lr.draw_cost_surface(X, y)
print(lr.costs_[-1])

# extend style as [x1,x2] to [1, x1, x2, x2x1, x1^2, x2^2]
def poly_extend_feature(X,degree=2):
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=degree)
return poly.fit_transform(X)

def BMISklearnTest():
from sklearn.linear_model import LinearRegression

lr = LinearRegression(fit_intercept=False, n_jobs=None, normalize=True)
X,y = dbload.load_bmi_dataset(standard=False)
X = X[:,2].reshape(X.shape[0],1) # last column is BMI

extend_X = poly_extend_feature(X, degree=2)
# y = b + w1x + w2* x** 2
lr.fit(extend_X, y)
print(lr.coef_)

cost = np.sum((lr.predict(extend_X) - y)**2) / extend_X.shape[0] / 2
print("cost:\t%f" % cost)
print("score:\t%f" % lr.score(extend_X, y))
print(lr.get_params())

plt.figure()
x1 = np.linspace(10, 40, 50, endpoint=True).reshape(50,1)
extend_x1 = poly_extend_feature(x1, degree=2)
plt.plot(x1, lr.predict(extend_x1), c='red')
plt.scatter(X, y, c='black', marker='o')
plt.xlabel("BMI") #'x1'
plt.ylabel("Fat%") #'y'
plt.show()

if __name__ == "__main__":
BMISklearnTest()
Binary file added imgs/lg/bmic.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/bmics.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/bmis.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/cost.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/cs.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/data.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/diverge.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/divergecs.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/eta05.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/eta05cs.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/gd.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/pbmi.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/se.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/sgd.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/sgdcs.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/ssc.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/sscs.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/ssp.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/lg/vertical.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/practice/exore.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/practice/nouse/ld.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/practice/nouse/ls.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/practice/nouse/qd.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/practice/nouse/qs.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/practice/quick.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added imgs/practice/regu.png
Binary file added imgs/practice/slow.png
Binary file added imgs/practice/te.png
Binary file added imgs/practice/tv.png
4 changes: 4 additions & 0 deletions imgs/practice/tv.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Accuracy rate 97.95% on trainset 32000
Accuracy rate 95.75% on testset 8000
test evalution reduce very tiny, just quit!
Accuracy rate 95.70% on trainset 10000
Binary file added imgs/practice/vt.png

0 comments on commit 9d35de9

Please sign in to comment.