-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
35 changed files
with
1,380 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
"Height M","Weight kg","BMI","%Fat" | ||
1.6002,49.4415716,19.30828653,23.9 | ||
1.6510000000000002,62.5957512,22.96416767,28.8 | ||
1.6510000000000002,75.7499308,27.78997102,32.4 | ||
1.5303499999999999,48.9879792,20.91741385,25.8 | ||
1.45415,43.091278,20.37844114,22.5 | ||
1.60655,52.6167184,20.38615236,22.1 | ||
1.5621,47.9673963,19.65750888,19.6 | ||
1.4986000000000002,45.5860362,20.29833307,25.3 | ||
1.524,47.8539982,20.60384599,22.8 | ||
1.4795500000000001,44.4520552,20.30638203,26.4 | ||
1.4732,46.0396286,21.21330769,33.7 | ||
1.5493999999999999,53.0703108,22.10673106,27.9 | ||
1.5176500000000002,65.8842961,28.60478302,33.5 | ||
1.5367000000000002,46.0396286,19.4963642,23.4 | ||
1.4605000000000001,43.5448704,20.41427119,21.8 | ||
1.524,62.368955,26.85335378,37.9 | ||
1.4605000000000001,45.8128324,21.47751448,31.3 | ||
1.58115,74.3891536,29.75524103,40.6 | ||
1.524,55.565069,23.923897,36.3 | ||
1.4986000000000002,46.1530267,20.5507999,29.8 | ||
1.4859,47.8539982,21.67399972,31.9 | ||
1.4795500000000001,42.1840932,19.27034213,31.3 | ||
1.5875,45.8128324,18.17856825,21.6 | ||
1.5557500000000002,44.6788514,18.45960401,24.6 | ||
1.58115,42.6376856,17.05483327,20.1 | ||
1.56845,43.5448704,17.70091209,24.6 | ||
1.4986000000000002,37.3079749,16.61231737,18.1 | ||
1.524,39.3491407,16.94202502,22.9 | ||
1.4414500000000001,39.0089464,18.7743563,26.2 | ||
1.4859,40.5965198,18.38694763,27.2 | ||
1.4605000000000001,38.1017616,17.86248729,17.7 | ||
1.4986000000000002,40.3697236,17.97563824,20.8 | ||
1.5557500000000002,37.1945768,15.36738608,17.5 | ||
1.5303499999999999,44.1118609,18.83535646,21.3 | ||
1.45415,33.4524395,15.82010562,18.7 | ||
1.6319500000000002,47.1736096,17.71272628,28.8 | ||
1.41605,30.0504965,14.98630183,17.1 | ||
1.3843,32.0916623,16.74679776,26.2 | ||
1.45415,34.8132167,16.46363534,20.4 | ||
1.50495,35.9471977,15.87160686,19.5 | ||
1.4859,39.9161312,18.0787865,21.7 | ||
1.4478,32.6586528,15.58049033,18.1 | ||
1.3335000000000001,30.5040889,17.15426117,29.8 | ||
1.36525,29.483506,15.81811598,20.6 | ||
1.5493999999999999,44.6788514,18.6112223,22.9 | ||
1.4478,34.9266148,16.66246883,19.3 | ||
1.4732,54.5444861,25.132022159999998,38.4 | ||
1.5875,52.5033203,20.83335916,27.9 | ||
1.4414500000000001,51.029145,24.55947771,36.4 | ||
1.6002,51.7095336,20.19398775,25.1 | ||
1.58115,60.3277892,24.13077474,39.7 | ||
1.41605,47.8539982,23.86497876,33.6 | ||
1.58115,83.914594,33.56536335,46 | ||
1.5493999999999999,69.9666277,29.14498517,38.9 | ||
1.6446500000000002,77.3375042,28.59191038,42.2 | ||
1.49225,58.2866234,26.17494163,36.7 | ||
1.59385,87.5433332,34.46102452,38 | ||
1.5493999999999999,45.8128324,19.08358835,23.3 | ||
1.4224,47.627202,23.54027766,35.9 | ||
1.61925,53.0703108,20.24062038,24.1 | ||
1.6637000000000002,80.7394472,29.1699536,40.8 | ||
1.4732,45.2458419,20.847561,25.7 | ||
1.49225,50.4621545,22.66118487,37.6 | ||
1.6510000000000002,81.9868263,30.07806744,35.9 | ||
1.5303499999999999,52.9569127,22.61211173,36.3 | ||
1.5875,61.234974,24.29808628,33 | ||
1.5303499999999999,73.3685707,31.32770084,40.5 | ||
1.5748,59.8741968,24.14286958,26.4 | ||
1.42875,47.9673963,23.49814928,27.3 | ||
1.60655,63.9565284,24.77971967,32.2 | ||
1.5493999999999999,46.7200172,19.46148119,19.6 | ||
1.4732,41.2769084,19.01882758,24.5 | ||
1.4986000000000002,45.35924,20.19734634,22.6 | ||
1.6637000000000002,57.2660405,20.68936316,30.2 | ||
1.4224,38.7821502,19.16851181,26.9 | ||
1.50495,46.9468134,20.72821843,30.2 | ||
1.3462,29.2567098,16.14382018,21 | ||
1.4224,35.8337996,17.71125653,19.4 | ||
1.4732,34.9266148,16.09285411,21.1 | ||
1.5176500000000002,38.555354,16.73945995,17.3 | ||
1.4605000000000001,40.3697236,18.92573058,20.5 | ||
1.4605000000000001,36.7409844,17.22454131,19.3 | ||
1.4224,37.1945768,18.38383589,28.7 | ||
1.4732,39.4625388,18.18283516,18.3 | ||
1.46685,36.7409844,17.07573388,15.6 | ||
1.5875,44.4520552,17.63861078,23.9 | ||
1.49225,41.6171027,18.68911202,24.5 | ||
1.49225,39.0089464,17.51785977,23.3 | ||
1.6002,41.2769084,16.11976215,20.1 | ||
1.4224,38.1017616,18.83222213,30.3 | ||
1.397,30.1638946,15.45591079,20.6 | ||
1.4478,38.555354,18.39363442,26 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,292 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Mon Jan 28 12:58:42 2018 | ||
@author: Red | ||
""" | ||
|
||
import numpy as np | ||
import dbload | ||
import matplotlib.pyplot as plt | ||
from mpl_toolkits import mplot3d | ||
|
||
class LRegress(): | ||
def __init__(self, b=None, w1=None, eta=0.001, tol=0.001): | ||
self.eta = eta | ||
self.tol = tol | ||
|
||
np.random.seed(None) | ||
self.b = b | ||
if self.b is None: | ||
self.b = np.random.randn(1)[0] | ||
|
||
self.w1 = w1 | ||
if self.b is None: | ||
self.w1 = np.random.randn(1)[0] | ||
|
||
# both w and b is verctor, and X is 2D array | ||
def hypothesis(self, X): | ||
return self.b + self.w1 * X[:,0] | ||
|
||
def predict(self, X): | ||
try: | ||
X = (X - self.mean) / self.std | ||
finally: | ||
print(X) | ||
return self.hypothesis(X) | ||
|
||
# MSE/LSE Least square method | ||
def cost(self, X, y): | ||
return np.sum((self.hypothesis(X) - y)**2) / X.shape[0] / 2 | ||
|
||
def delta_b(self, X, y): | ||
return np.sum(self.b + self.w1*X[:,0] - y) / X.shape[0] | ||
|
||
def delta_w(self, X, y): | ||
derective = (self.b + self.w1*X[:,0] - y) * X[:,0] | ||
return np.sum(derective) / X.shape[0] | ||
|
||
def standard(self, X): | ||
self.mean = np.mean(X, axis=0) | ||
self.std = np.std(X, axis=0) | ||
assert(np.std(X, axis=0).any()) | ||
return (X - self.mean) / self.std | ||
|
||
def bgd(self, X, y, max_iter=1000, standard=True): | ||
# for drawing Gradient Decent Path | ||
self.costs_ = [] | ||
self.bs_ = [] | ||
self.w1s_ = [] | ||
|
||
self.steps_ = 1 | ||
self.complex = 0 | ||
|
||
if standard: X = self.standard(X) | ||
for loop in range(max_iter): | ||
cost = self.cost(X, y) | ||
if(cost < self.tol): | ||
print("cost reduce very tiny less than tol, just quit!") | ||
return X | ||
|
||
delta_b = self.eta * self.delta_b(X, y) | ||
delta_w1 = self.eta * self.delta_w(X, y) | ||
|
||
# update weights and b together | ||
if self.complex % self.steps_ == 0: | ||
self.bs_.append(self.b) | ||
self.w1s_.append(self.w1) | ||
cost = self.cost(X,y) | ||
self.costs_.append(cost) | ||
|
||
self.b -= delta_b | ||
self.w1 -= delta_w1 | ||
self.complex += 1 | ||
|
||
# return standard X | ||
return X | ||
|
||
def sgd(self, X, y, max_iter=1000, standard=True): | ||
# for drawing Gradient Decent Path | ||
self.costs_ = [] | ||
self.bs_ = [] | ||
self.w1s_ = [] | ||
|
||
self.steps_ = 1 | ||
self.complex = 0 | ||
|
||
STDX = self.standard(X) if standard else X | ||
import scaler | ||
X,y = scaler.shuffle(STDX, y) | ||
for loop in range(max_iter): | ||
for Xi, yi in zip(X, y): | ||
Xi = Xi.reshape(Xi.size, 1) | ||
cost = self.cost(Xi, yi) | ||
if(cost < self.tol): | ||
print("cost reduce very tiny less than tol, just quit!") | ||
return STDX | ||
|
||
delta_b = self.eta * self.delta_b(Xi, yi) | ||
delta_w1 = self.eta * self.delta_w(Xi, yi) | ||
|
||
self.b -= delta_b | ||
self.w1 -= delta_w1 | ||
|
||
# update weights and b together | ||
if self.complex % self.steps_ == 0: | ||
self.bs_.append(self.b) | ||
self.w1s_.append(self.w1) | ||
cost = self.cost(X,y) | ||
self.costs_.append(cost) | ||
self.complex += 1 | ||
|
||
return STDX | ||
|
||
def draw_costs(self): | ||
'''Draw errors info with matplotlib''' | ||
if len(self.costs_) <= 1: | ||
print("can't plot costs for less data") | ||
return | ||
|
||
plt.figure() | ||
plt.title("Cost values J(w) state") | ||
plt.xlabel("Iterations") | ||
plt.ylabel("Cost values J(w)") | ||
|
||
x = np.arange(1, 1 + len(self.costs_), 1) * self.steps_ | ||
plt.xlim(1 * self.steps_, len(self.costs_) * self.steps_) | ||
plt.ylim(0, max(self.costs_) + 1) | ||
|
||
plt.plot(x, self.costs_, c='grey') | ||
plt.scatter(x, self.costs_, c='black') | ||
|
||
def draw_points(self, X, y, title='', coordinate=False): | ||
plt.figure() | ||
|
||
plt.title(title) | ||
plt.xlabel("BMI") #'x1' | ||
plt.ylabel("Fat%") #'y' | ||
|
||
# x1 and x2 features | ||
x1 = X[:,0] | ||
x2 = y | ||
|
||
plt.scatter(x1, x2, c='black', marker='o') | ||
if coordinate: | ||
for index, x, y in zip(range(X.shape[0]), x1, x2): | ||
plt.annotate('(%.2f,%.2f)'%(x,y), xy=(x,y), xytext=(-20,-20), | ||
textcoords = 'offset pixels', ha='left', va='bottom') | ||
return plt | ||
|
||
def draw_line(self, plt, x1, y, c='black'): | ||
# draw line: h(x1) = b + w1x1 | ||
if self.b == 0 and self.w1 == 0: | ||
print("Can't plot a line when both w1 and w2 are 0!") | ||
return | ||
elif self.w1 == 0: # x1 = -b/w1, a line vertical to x-axis | ||
line_x1 = [-self.b / self.w1] * 2 | ||
line_x2 = [-100, 100] | ||
else: | ||
max_ = np.max(x1) | ||
min_ = np.min(x1) | ||
|
||
line_x1 = np.arange(min_ - 1, max_ + 1, 0.5) | ||
line_x2 = line_x1 * self.w1 + self.b | ||
|
||
plt.plot(line_x1, line_x2, c) | ||
|
||
def draw_vertical_line(self, plt, X, y): | ||
for i in range(5): | ||
idx = (np.random.randint(X.shape[0], size=1)) | ||
x = X[idx][0] | ||
plt.plot([x, x], [y[idx], self.hypothesis(X[idx])], c='blue') | ||
|
||
def draw_separate_line(self, X, y, title='', vertical=False): | ||
title = "Linear Regression" + (' ' + title) if len(title) else '' | ||
plt = self.draw_points(X, y, title) | ||
plt.tight_layout() | ||
|
||
# x1 and x2 features | ||
x1 = X[:, 0] | ||
|
||
self.draw_line(plt, x1, y, c='red') | ||
if vertical: self.draw_vertical_line(plt, X, y) | ||
plt.show() | ||
|
||
def draw_cost_surface(self, X, y, gdpath=True): | ||
x1 = np.linspace(0, 40, 50, endpoint=True) | ||
x2 = np.linspace(-10, 30, 50, endpoint=True) | ||
|
||
title = 'Quadratic Cost Function Surface and Contour' | ||
x1, x2 = np.meshgrid(x1, x2) | ||
costs = np.zeros(x1.shape) | ||
backup_b = self.b | ||
backup_w1 = self.w1 | ||
for i in range(x1.shape[0]): | ||
for j in range(x1.shape[1]): | ||
self.b = x1[i,j], | ||
self.w1 = x2[i,j] | ||
costs[i,j] = self.cost(X, y) | ||
|
||
self.b = backup_b | ||
self.w1 = backup_w1 | ||
|
||
plt.figure(figsize=(6, 6)) | ||
ax = plt.axes(projection='3d') | ||
ax.plot_surface(x1, x2, costs, rstride=1, cstride=1, cmap='hot', | ||
edgecolor='none', alpha=0.8) | ||
ax.set_title(title) | ||
ax.set_xlabel("b") | ||
ax.set_ylabel("w1") | ||
|
||
if gdpath: ax.plot3D(self.bs_, self.w1s_, self.costs_, c='red') | ||
ax0 = plt.axes([0.1, 0.5, 0.3, 0.3]) | ||
ax0.contour(x1, x2, costs, 20, cmap='hot') | ||
if gdpath: ax0.plot(self.bs_, self.w1s_, 'red') | ||
plt.show() | ||
|
||
def load_linear_dataset(random_state=None, features=1, points=50): | ||
rng = np.random.RandomState(random_state) | ||
|
||
# Generate sample data | ||
x = 20 * rng.rand(points, features) + 2 | ||
y = 0.5 * (x[:,0] - rng.rand(1, points)).ravel() - 1 | ||
|
||
return x, y | ||
|
||
def LRTest(): | ||
samples = 50 | ||
X, y = load_linear_dataset(random_state=0, features=1, points=samples) | ||
|
||
lr = LRegress(b=15, w1=15, eta=0.1, tol=1e-4) | ||
X = lr.sgd(X, y, max_iter=100, standard=True) | ||
lr.draw_costs() | ||
lr.draw_separate_line(X, y) | ||
lr.draw_cost_surface(X, y) | ||
print(lr.costs_[-1]) | ||
print(lr.predict(np.array([[0],[5],[10],[15]]))) | ||
|
||
def BMITest(): | ||
X,y = dbload.load_bmi_dataset(standard=False) | ||
X = X[:,2].reshape(X.shape[0],1) # last column is BMI | ||
lr = LRegress(b=5, w1=5, eta=0.1, tol=0.001) | ||
|
||
X = lr.bgd(X, y, max_iter=100, standard=True) | ||
lr.draw_costs() | ||
lr.draw_separate_line(X, y) | ||
lr.draw_cost_surface(X, y) | ||
print(lr.costs_[-1]) | ||
|
||
# extend style as [x1,x2] to [1, x1, x2, x2x1, x1^2, x2^2] | ||
def poly_extend_feature(X,degree=2): | ||
from sklearn.preprocessing import PolynomialFeatures | ||
poly = PolynomialFeatures(degree=degree) | ||
return poly.fit_transform(X) | ||
|
||
def BMISklearnTest(): | ||
from sklearn.linear_model import LinearRegression | ||
|
||
lr = LinearRegression(fit_intercept=False, n_jobs=None, normalize=True) | ||
X,y = dbload.load_bmi_dataset(standard=False) | ||
X = X[:,2].reshape(X.shape[0],1) # last column is BMI | ||
|
||
extend_X = poly_extend_feature(X, degree=2) | ||
# y = b + w1x + w2* x** 2 | ||
lr.fit(extend_X, y) | ||
print(lr.coef_) | ||
|
||
cost = np.sum((lr.predict(extend_X) - y)**2) / extend_X.shape[0] / 2 | ||
print("cost:\t%f" % cost) | ||
print("score:\t%f" % lr.score(extend_X, y)) | ||
print(lr.get_params()) | ||
|
||
plt.figure() | ||
x1 = np.linspace(10, 40, 50, endpoint=True).reshape(50,1) | ||
extend_x1 = poly_extend_feature(x1, degree=2) | ||
plt.plot(x1, lr.predict(extend_x1), c='red') | ||
plt.scatter(X, y, c='black', marker='o') | ||
plt.xlabel("BMI") #'x1' | ||
plt.ylabel("Fat%") #'y' | ||
plt.show() | ||
|
||
if __name__ == "__main__": | ||
BMISklearnTest() |
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.
Oops, something went wrong.
Oops, something went wrong.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
Accuracy rate 97.95% on trainset 32000 | ||
Accuracy rate 95.75% on testset 8000 | ||
test evalution reduce very tiny, just quit! | ||
Accuracy rate 95.70% on trainset 10000 |
Oops, something went wrong.
Oops, something went wrong.