-
Notifications
You must be signed in to change notification settings - Fork 4
/
09_GLM.py
103 lines (94 loc) · 4.25 KB
/
09_GLM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import numpy as np
from ml_algo import BinaryLogisticRegression, FeatureConstructor, circle_cluster
from matplotlib import pylab as plt
from random import seed
from math import log
seed(12345)
def main():
X = [[1,20],[3,40],[5,60],[7,80],[9,100]]
f = FeatureConstructor([lambda x:x[0]*x[1],lambda x:x[0]**2,lambda x:x[1]**2,lambda x:x[0]-x[1]])
print("Example of additional feature constructing:\n",f(X))
#init random cluster of dots
num_train = 400
num_test = 200
red_dots = np.array([circle_cluster(10,10,10) for i in range(num_train//2)])
blue_dots = np.array([circle_cluster(20,0,10) for i in range(num_train//2)])
Xtrain = np.concatenate((red_dots,blue_dots))
ytrain = np.array([0 for _ in range(num_train//2)]+[1 for _ in range(num_train//2)])
red_dots_test = np.array([circle_cluster(10,10,10) for i in range(num_test//2)])
blue_dots_test = np.array([circle_cluster(20,0,10) for i in range(num_test//2)])
Xtest = np.concatenate((red_dots_test,blue_dots_test))
ytest = np.array([0 for _ in range(num_test//2)]+[1 for _ in range(num_test//2)])
#use logistic regression, calculating on test data and output metrics
model1 = BinaryLogisticRegression(0.03,2)
model1.fit(Xtrain,ytrain,1000,1e-5)
_Xtrain = f(Xtrain)
model2 = BinaryLogisticRegression(0.001,len(_Xtrain[0]),momentum=0.5)
model2.fit(_Xtrain,ytrain,1000,1e-5)
#preparing visualisation
threshold = lambda x: 0 if x<0.5 else 1
confmat1 = np.zeros((2,2))
confmat2 = np.zeros((2,2))
confusion_dots1 = [[[],[]],[[],[]]]
confusion_dots2 = [[[],[]],[[],[]]]
for i in range(len(Xtest)):
result = threshold(model1.predict(Xtest[i]))
confmat1[result][ytest[i]]+=1
confusion_dots1[result][ytest[i]].append(Xtest[i])
result = threshold(model2.predict(f(Xtest[i])))
confmat2[result][ytest[i]]+=1
confusion_dots2[result][ytest[i]].append(Xtest[i])
print('\nLearning 2 logistic regression models with and without additional handcrafted features\n\nBasic features\nConfusion matrix:')
print(confmat1)
accuracy = (confmat1[0][0]+confmat1[1][1])/np.sum(confmat1)
precision = confmat1[0][0]/(confmat1[0][0]+confmat1[1][0])
recall = confmat1[0][0]/(confmat1[0][0]+confmat1[0][1])
f1score = 2*precision*recall/(precision+recall)
print('\nAccuracy:\t{}\nPrecision:\t{}\nRecall: \t{}\nF1 score:\t{}'.format(accuracy,precision,recall,f1score))
print('\nSome additional features\nConfusion matrix:')
print(confmat2)
accuracy = (confmat2[0][0]+confmat2[1][1])/np.sum(confmat2)
precision = confmat2[0][0]/(confmat2[0][0]+confmat2[1][0])
recall = confmat2[0][0]/(confmat2[0][0]+confmat2[0][1])
f1score = 2*precision*recall/(precision+recall)
print('\nAccuracy:\t{}\nPrecision:\t{}\nRecall: \t{}\nF1 score:\t{}'.format(accuracy,precision,recall,f1score))
print('As we see with more features our algorithm generalizes slightly better')
#plotting graph
_, plots = plt.subplots(2, 2)
plots[0][0].plot(*np.array(confusion_dots1[0][0]).T,'b.')
plots[0][0].plot(*np.array(confusion_dots1[1][0]).T,'bx')
plots[0][0].plot(*np.array(confusion_dots1[0][1]).T,'rx')
plots[0][0].plot(*np.array(confusion_dots1[1][1]).T,'r.')
plots[0][0].set_title('Classification task (without new features)')
plots[1][0].plot(*np.array(confusion_dots2[0][0]).T,'b.')
plots[1][0].plot(*np.array(confusion_dots2[1][0]).T,'bx')
plots[1][0].plot(*np.array(confusion_dots2[0][1]).T,'rx')
plots[1][0].plot(*np.array(confusion_dots2[1][1]).T,'r.')
plots[1][0].set_title('Classification task (with new features)')
plots[0][1].plot(model1.cost_func_log)
plots[0][1].set_title('Cost function')
plots[1][1].plot(model2.cost_func_log)
plots[1][1].set_title('Cost function')
#contours
x = np.linspace(-10, 30, 100)
y = np.linspace(-10, 30, 100)
X, Y = np.meshgrid(x, x)
Z1 = np.zeros((len(y),len(x)))
for i in range(len(x)):
for j in range(len(y)):
Z1[j][i] = model1.predict(np.array([X[j][i],Y[j][i]]))
Z2 = np.zeros((len(y),len(x)))
for i in range(len(x)):
for j in range(len(y)):
a = np.array([X[j][i],Y[j][i]])
#print(a)
#print(f(a))
Z2[j][i] = model2.predict(f(a))
levels = np.linspace(0, 1, 3)
cs = plots[0][0].contour(X, Y, Z1, levels=levels)
plots[0][0].clabel(cs, inline=1, fontsize=10)
cs = plots[1][0].contour(X, Y, Z2, levels=levels)
plots[1][0].clabel(cs, inline=1, fontsize=10)
plt.show()
if __name__ == '__main__':
main()