In [None]:
#Assume we have an initial data set (X,y) 
# and want to know whether to label a new point x1 or x2
import numpy as np 
X = np.random.random_sample((5,3))
print(X)
y = X @ np.array([1.0,1.0,1.0]) + np.random.normal(0,0.25,len(X))
print('y:',y)
w = np.linalg.inv(X.T@X)@ X.T@y
print('w:',w)
x1 = np.random.random_sample((1,3))
x2 = np.random.random_sample((1,3))

[[0.58160247 0.08239375 0.94080134]
 [0.94777716 0.40859616 0.61581903]
 [0.8239238  0.7931139  0.63265291]
 [0.99197352 0.28222987 0.02458456]
 [0.31504212 0.59148018 0.99573592]]
y: [1.42552021 2.39198928 2.33319205 1.06008647 1.53724875]
w: [1.11623725 1.01058632 0.82192974]


In [None]:
# We want to minimize the variance of our estimate of our parameter estimate of w
# so pick the new point to label which will minimize (X'X)^{-1}
X1 = np.vstack([X,x1])
X2 = np.vstack([X,x2])

# Compute the Trace, for A optimal
X1A = np.trace(np.linalg.inv(X1.T@X1))
X2A = np.trace(np.linalg.inv(X2.T@X2))
print("Trace norms from X1 and X2", X1A, X2A)
# you will get different results each time, but pick the new x that 
# gave the smaller value



# instead or the log determinant, for D optimal
X1D = np.log(np.linalg.det(np.linalg.inv(X1.T@X1)))
X2D = np.log(np.linalg.det(np.linalg.inv(X2.T@X2)))

print("log determinant norms from X1 and X2", X1D, X2D)




Trace norms from X1 and X2 4.405754320432505 3.7714438358907585
log determinant norms from X1 and X2 -0.5427493140017275 -0.7416018411622476


In [None]:
# an aside on matrix norms:
# For any symmetric and positive-definite matrix A:
#    Frobenius norm of A = [trace(AA')]^1/2

A = np.linalg.inv(X1.T@X1)
print(A)
print("Frobenious norm", np.linalg.norm(A))
print("from the trace", np.sqrt(np.trace(A@A.T)))



[[ 0.93636999 -0.84475036 -0.29342892]
 [-0.84475036  2.61245702 -0.62682121]
 [-0.29342892 -0.62682121  0.85692731]]
Frobenious norm 3.289568652115702
from the trace 3.289568652115702


In [None]:
# Now, let's see which prediction is "most uncertain" for active learning
# we'll turn to a classification problem
from sklearn.linear_model import LogisticRegression
ys = np.array([0,0,1,1,1]).T
print(X)
print(ys)
logreg = LogisticRegression(random_state=0).fit(X,ys)
print('p(x1) ', logreg.predict_proba(x1)[0][0])
print('p(x2) ', logreg.predict_proba(x2)[0][0])
# the "most uncertain x" is the one that gives a prediction closest to 0.5
# that is the one we want to label

[[0.58160247 0.08239375 0.94080134]
 [0.94777716 0.40859616 0.61581903]
 [0.8239238  0.7931139  0.63265291]
 [0.99197352 0.28222987 0.02458456]
 [0.31504212 0.59148018 0.99573592]]
[0 0 1 1 1]
p(x1)  0.47487652367026756
p(x2)  0.40403338143166057


