Skip to content

Commit d73083b

Browse files
committed
Add example codes for ML
1 parent 99d041b commit d73083b

8 files changed

+299
-0
lines changed

examples/iris_classification.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from sklearn import (datasets, svm)
4+
from matplotlib.lines import Line2D # For the custom legend
5+
6+
# Load a dataset
7+
iris = datasets.load_iris()
8+
9+
# Train a model
10+
model = svm.SVC() # Accuracy: 0.973 (146/150)
11+
model.fit(iris.data, iris.target) # Try 'iris.data[:,0:2]' (Accuracy: 0.820)
12+
13+
# Test the model
14+
predict = model.predict(iris.data) # Try 'iris.data[:,0:2]' (Accuracy: 0.820)
15+
n_correct = sum(predict == iris.target)
16+
accuracy = n_correct / len(iris.data)
17+
18+
# Visualize testing results
19+
cmap = np.array([(1, 0, 0), (0, 1, 0), (0, 0, 1)])
20+
clabel = [Line2D([0], [0], marker='o', lw=0, label=iris.target_names[i], color=cmap[i]) for i in range(len(cmap))]
21+
for (x, y) in [(0, 1), (2, 3)]:
22+
plt.figure ()
23+
plt.title(f'svm.SVC ({n_correct}/{len(iris.data)}={accuracy:.3f})')
24+
plt.scatter(iris.data[:,x], iris.data[:,y], c=cmap[iris.target], edgecolors=cmap[predict])
25+
plt.xlabel(iris.feature_names[x])
26+
plt.ylabel(iris.feature_names[y])
27+
plt.legend(handles=clabel, framealpha=0.5)
28+
plt.show()
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from sklearn import (datasets, naive_bayes, metrics)
4+
from matplotlib.colors import ListedColormap
5+
from scipy.stats import multivariate_normal
6+
7+
# Load a dataset partially
8+
iris = datasets.load_iris()
9+
iris.data = iris.data[:,0:2]
10+
iris.feature_names = iris.feature_names[0:2]
11+
iris.color = np.array([(1, 0, 0), (0, 1, 0), (0, 0, 1)])
12+
13+
# Train a model
14+
model = naive_bayes.GaussianNB()
15+
model.fit(iris.data, iris.target)
16+
#model.class_prior_ = [0.1, 0.6, 0.3] # Try this to give manual prior
17+
18+
# Validate training
19+
for c in range(len(model.classes_)):
20+
data = iris.data[iris.target == c,:]
21+
print(f'## Class {c}')
22+
print(' * Trained prior = ' + np.array2string(model.class_prior_[c], precision=3))
23+
print(' * Manual prior = ' + '{:.3f}'.format(len(data) / len(iris.data)))
24+
print(' * Trained mean = ' + np.array2string(model.theta_[c], precision=3))
25+
print(' * Manual mean = ' + np.array2string(np.mean(data, axis=0), precision=3))
26+
print(' * Trained Sigma = ' + np.array2string(model.sigma_[c], precision=3))
27+
print(' * Manual Sigma = ' + np.array2string(np.var(data, axis=0), precision=3))
28+
29+
# Visualize training results (decision boundaries)
30+
x_min, x_max = iris.data[:, 0].min() - 1, iris.data[:, 0].max() + 1
31+
y_min, y_max = iris.data[:, 1].min() - 1, iris.data[:, 1].max() + 1
32+
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))
33+
xy = np.vstack((xx.flatten(), yy.flatten())).T
34+
zz = model.predict(xy)
35+
plt.contourf(xx, yy, zz.reshape(xx.shape), cmap=ListedColormap(iris.color), alpha=0.2)
36+
37+
# Visualize training results (trained Gaussians)
38+
for c in range(len(model.classes_)):
39+
likelihood = multivariate_normal(model.theta_[c], np.diag(model.sigma_[c]))
40+
zz = model.class_prior_[c] * likelihood.pdf(xy)
41+
plt.contour(xx, yy, zz.reshape(xx.shape), cmap=ListedColormap(iris.color[c]), alpha=0.4)
42+
43+
# Test the model
44+
predict = model.predict(iris.data)
45+
accuracy = metrics.balanced_accuracy_score(iris.target, predict)
46+
47+
# Visualize testing results
48+
plt.title(f'naive_bayes.Gaussian ({accuracy:.3f})')
49+
plt.scatter(iris.data[:,0], iris.data[:,1], c=iris.color[iris.target], edgecolors=iris.color[predict])
50+
plt.xlabel(iris.feature_names[0])
51+
plt.ylabel(iris.feature_names[1])
52+
plt.show()
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from sklearn import (datasets, linear_model, naive_bayes, neural_network, neighbors, svm, tree, ensemble, metrics)
4+
from matplotlib.colors import ListedColormap
5+
6+
# Load a dataset partially
7+
iris = datasets.load_iris()
8+
iris.data = iris.data[:,0:2]
9+
iris.feature_names = iris.feature_names[0:2]
10+
iris.color = np.array([(1, 0, 0), (0, 1, 0), (0, 0, 1)])
11+
12+
# Instantiate training models
13+
models = [
14+
{'name': 'linear_model.SGD', 'obj': linear_model.SGDClassifier()},
15+
{'name': 'naive_bayes.Gaussian', 'obj': naive_bayes.GaussianNB()},
16+
{'name': 'neural_network.MLP', 'obj': neural_network.MLPClassifier()},
17+
{'name': 'neighbors.KNN', 'obj': neighbors.KNeighborsClassifier()},
18+
19+
{'name': 'svm.LinearSVC', 'obj': svm.LinearSVC()},
20+
{'name': 'svm.SVC(linear)', 'obj': svm.SVC(kernel='linear')},
21+
{'name': 'svm.SVC(poly,2)', 'obj': svm.SVC(kernel='poly', degree=2)},
22+
{'name': 'svm.SVC(poly,3)', 'obj': svm.SVC(kernel='poly')},
23+
{'name': 'svm.SVC(poly,4)', 'obj': svm.SVC(kernel='poly', degree=4)},
24+
{'name': 'svm.SVC(rbf)', 'obj': svm.SVC(kernel='rbf')},
25+
{'name': 'svm.SVC(rbf,$\gamma$=1)', 'obj': svm.SVC(kernel='rbf', gamma=1)},
26+
{'name': 'svm.SVC(rbf,$\gamma$=4)', 'obj': svm.SVC(kernel='rbf', gamma=4)},
27+
{'name': 'svm.SVC(rbf,$\gamma$=16)', 'obj': svm.SVC(kernel='rbf', gamma=16)},
28+
{'name': 'svm.SVC(rbf,$\gamma$=64)', 'obj': svm.SVC(kernel='rbf', gamma=64)},
29+
{'name': 'svm.SVC(sigmoid)', 'obj': svm.SVC(kernel='sigmoid')},
30+
31+
{'name': 'tree.DecisionTree(2)', 'obj': tree.DecisionTreeClassifier(max_depth=2)},
32+
{'name': 'tree.DecisionTree(4)', 'obj': tree.DecisionTreeClassifier(max_depth=4)},
33+
{'name': 'tree.DecisionTree(N)', 'obj': tree.DecisionTreeClassifier()},
34+
{'name': 'tree.ExtraTree', 'obj': tree.ExtraTreeClassifier()},
35+
36+
{'name': 'ensemble.RandomForest(10)', 'obj': ensemble.RandomForestClassifier(n_estimators=10)},
37+
{'name': 'ensemble.RandomForest(100)', 'obj': ensemble.RandomForestClassifier()},
38+
{'name': 'ensemble.ExtraTrees(10)', 'obj': ensemble.ExtraTreesClassifier(n_estimators=10)},
39+
{'name': 'ensemble.ExtraTrees(100)', 'obj': ensemble.ExtraTreesClassifier()},
40+
{'name': 'ensemble.AdaBoost(DTree)', 'obj': ensemble.AdaBoostClassifier(tree.DecisionTreeClassifier())},
41+
]
42+
43+
x_min, x_max = iris.data[:, 0].min() - 1, iris.data[:, 0].max() + 1
44+
y_min, y_max = iris.data[:, 1].min() - 1, iris.data[:, 1].max() + 1
45+
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))
46+
xy = np.vstack((xx.flatten(), yy.flatten())).T
47+
48+
for model in models:
49+
# Train a model
50+
model['obj'].fit(iris.data, iris.target)
51+
52+
# Test the model
53+
predict = model['obj'].predict(iris.data)
54+
model['acc'] = metrics.balanced_accuracy_score(iris.target, predict)
55+
56+
# Visualize training results (decision boundaries)
57+
zz = model['obj'].predict(xy)
58+
plt.figure()
59+
plt.contourf(xx, yy, zz.reshape(xx.shape), cmap=ListedColormap(iris.color), alpha=0.2)
60+
61+
# Visualize testing results
62+
plt.title(model['name'] + f' ({model["acc"]:.3f})')
63+
plt.scatter(iris.data[:,0], iris.data[:,1], c=iris.color[iris.target], edgecolors=iris.color[predict])
64+
plt.xlabel(iris.feature_names[0])
65+
plt.ylabel(iris.feature_names[1])
66+
67+
plt.show()
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from sklearn import (datasets, svm, metrics)
4+
from matplotlib.colors import ListedColormap
5+
6+
# Load a dataset partially
7+
iris = datasets.load_iris()
8+
iris.data = iris.data[:,0:2]
9+
iris.feature_names = iris.feature_names[0:2]
10+
iris.color = np.array([(1, 0, 0), (0, 1, 0), (0, 0, 1)])
11+
12+
# Instantiate training models
13+
models = [
14+
{'name': 'svm.LinearSVC', 'obj': svm.LinearSVC()},
15+
{'name': 'svm.SVC(linear)', 'obj': svm.SVC(kernel='linear')},
16+
{'name': 'svm.SVC(poly,2)', 'obj': svm.SVC(kernel='poly', degree=2)},
17+
{'name': 'svm.SVC(poly,3)', 'obj': svm.SVC(kernel='poly')},
18+
{'name': 'svm.SVC(poly,4)', 'obj': svm.SVC(kernel='poly', degree=4)},
19+
{'name': 'svm.SVC(rbf)', 'obj': svm.SVC(kernel='rbf')},
20+
{'name': 'svm.SVC(rbf,$\gamma$=1)', 'obj': svm.SVC(kernel='rbf', gamma=1)},
21+
{'name': 'svm.SVC(rbf,$\gamma$=4)', 'obj': svm.SVC(kernel='rbf', gamma=4)},
22+
{'name': 'svm.SVC(rbf,$\gamma$=16)', 'obj': svm.SVC(kernel='rbf', gamma=16)},
23+
{'name': 'svm.SVC(rbf,$\gamma$=64)', 'obj': svm.SVC(kernel='rbf', gamma=64)},
24+
{'name': 'svm.SVC(sigmoid)', 'obj': svm.SVC(kernel='sigmoid')},
25+
]
26+
27+
x_min, x_max = iris.data[:, 0].min() - 1, iris.data[:, 0].max() + 1
28+
y_min, y_max = iris.data[:, 1].min() - 1, iris.data[:, 1].max() + 1
29+
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))
30+
xy = np.vstack((xx.flatten(), yy.flatten())).T
31+
32+
for model in models:
33+
# Train a model
34+
model['obj'].fit(iris.data, iris.target)
35+
36+
# Test the model
37+
predict = model['obj'].predict(iris.data)
38+
model['acc'] = metrics.balanced_accuracy_score(iris.target, predict)
39+
40+
# Visualize training results (decision boundaries)
41+
zz = model['obj'].predict(xy)
42+
plt.figure()
43+
plt.contourf(xx, yy, zz.reshape(xx.shape), cmap=ListedColormap(iris.color), alpha=0.2)
44+
45+
# Visualize testing results
46+
plt.title(model['name'] + f' ({model["acc"]:.3f})')
47+
plt.scatter(iris.data[:,0], iris.data[:,1], c=iris.color[iris.target], edgecolors=iris.color[predict])
48+
plt.xlabel(iris.feature_names[0])
49+
plt.ylabel(iris.feature_names[1])
50+
51+
plt.show()
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from sklearn import (datasets, tree, metrics)
4+
from matplotlib.colors import ListedColormap
5+
6+
# Load a dataset partially
7+
iris = datasets.load_iris()
8+
iris.data = iris.data[:,0:2]
9+
iris.feature_names = iris.feature_names[0:2]
10+
iris.color = np.array([(1, 0, 0), (0, 1, 0), (0, 0, 1)])
11+
12+
# Train a model
13+
model = tree.DecisionTreeClassifier(max_depth=2) # Try deeper
14+
model.fit(iris.data, iris.target)
15+
16+
# Visualize training results (decision boundaries)
17+
x_min, x_max = iris.data[:, 0].min() - 1, iris.data[:, 0].max() + 1
18+
y_min, y_max = iris.data[:, 1].min() - 1, iris.data[:, 1].max() + 1
19+
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))
20+
xy = np.vstack((xx.flatten(), yy.flatten())).T
21+
zz = model.predict(xy)
22+
plt.contourf(xx, yy, zz.reshape(xx.shape), cmap=ListedColormap(iris.color), alpha=0.2)
23+
24+
# Test the model
25+
predict = model.predict(iris.data)
26+
accuracy = metrics.balanced_accuracy_score(iris.target, predict)
27+
28+
# Visualize testing results
29+
plt.figure()
30+
plt.title(f'Decision tree ({accuracy:.3f})')
31+
plt.scatter(iris.data[:,0], iris.data[:,1], c=iris.color[iris.target], edgecolors=iris.color[predict])
32+
plt.xlabel(iris.feature_names[0])
33+
plt.ylabel(iris.feature_names[1])
34+
35+
# Visualize training results (the trained tree)
36+
plt.figure()
37+
tree.plot_tree(model, feature_names=iris.feature_names, class_names=iris.target_names, impurity=False)
38+
plt.show()

examples/iris_clustering_kmeans.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from sklearn import (datasets, cluster)
4+
from matplotlib.colors import ListedColormap
5+
6+
# Load a dataset partially
7+
iris = datasets.load_iris()
8+
iris.data = iris.data[:,0:2] # Try [:,2:4]
9+
iris.feature_names = iris.feature_names[0:2] # Try [:,2:4]
10+
iris.color = np.array([(1, 0, 0), (0, 1, 0), (0, 0, 1)])
11+
12+
# Train a model
13+
model = cluster.KMeans(n_clusters=3)
14+
model.fit(iris.data)
15+
16+
# Visualize training results (decision boundaries)
17+
x_min, x_max = iris.data[:, 0].min() - 1, iris.data[:, 0].max() + 1
18+
y_min, y_max = iris.data[:, 1].min() - 1, iris.data[:, 1].max() + 1
19+
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))
20+
xy = np.vstack((xx.flatten(), yy.flatten())).T
21+
zz = model.predict(xy)
22+
plt.contourf(xx, yy, zz.reshape(xx.shape), cmap=ListedColormap(iris.color), alpha=0.2)
23+
24+
# Visualize testing results
25+
plt.title('cluster.KMeans')
26+
plt.scatter(iris.data[:,0], iris.data[:,1], c=iris.color[iris.target])
27+
plt.xlabel(iris.feature_names[0])
28+
plt.ylabel(iris.feature_names[1])
29+
30+
# Visualize training results (mean values)
31+
for c in range(model.n_clusters):
32+
plt.scatter(*model.cluster_centers_[c], marker='+', s=200, color='k')
33+
plt.show()

examples/line_fitting_sklearn.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from sklearn import linear_model
4+
5+
true_line = lambda x: -2/3*x + 14/3
6+
data_range = np.array([-4, 12])
7+
data_num = 100
8+
noise_std = 0.5
9+
10+
# Generate the true data
11+
x = np.random.uniform(data_range[0], data_range[1], size=data_num)
12+
y = true_line(x) # y = -2/3*x + 10/3
13+
14+
# Add Gaussian noise
15+
xn = x + np.random.normal(scale=noise_std, size=x.shape)
16+
yn = y + np.random.normal(scale=noise_std, size=y.shape)
17+
18+
# Train a model
19+
model = linear_model.LinearRegression()
20+
model.fit(xn.reshape(-1, 1), yn.reshape(-1, 1))
21+
score = model.score(xn.reshape(-1, 1), yn.reshape(-1, 1))
22+
23+
# Plot the data and result
24+
plt.title(f'Line: y={model.coef_[0][0]:.3f}*x + {model.intercept_[0]:.3f} (score={score:.3f})')
25+
plt.plot(data_range, true_line(data_range), 'r-', label='The true line')
26+
plt.plot(xn, yn, 'b.', label='Noisy data')
27+
plt.plot(data_range, model.coef_[0]*data_range + model.intercept_, 'g-', label='Estimate')
28+
plt.xlim(data_range)
29+
plt.legend()
30+
plt.show()

slides/ml_tutorial.pdf

-103 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)