-
Notifications
You must be signed in to change notification settings - Fork 0
/
noisemodel.py
242 lines (204 loc) · 10.3 KB
/
noisemodel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
from __future__ import absolute_import
from __future__ import print_function
from future.standard_library import install_aliases
install_aliases()
import matplotlib.pyplot as plt
import matplotlib.image
import autograd.numpy as np
import autograd.numpy.random as npr
from autograd import grad
from autograd.optimizers import adam
from autograd.util import quick_grad_check
from builtins import range
from data import mnist_processed
from data import save_images
from data import display_images
from data import unflatten
from data import prob2gray
from data import boost
import os
import argparse
# run interactively: exec(open("noisemodel-simple.py"),globals())
# shotgun softmax
# input: list of rows
# output: list of softmaxed rows
# note: since e^x gets very large, this max trick improves numerical stability (tested)
def softmax(x):
e_x = np.exp(x - np.max(x))
return np.divide(e_x,np.sum(e_x,axis=1,keepdims=True))
# construct weight initialization objective/gradient
def build_init_objective(L2_VAR_2, NUM_TRAIN, train_images, train_labels, C, D, L):
# shotgun predictions: matrix of probabilities of each datapoint being in each class
# input weight shape: ( flattened weights ) flattened weights are indexed by class, then flattened pixels
# ( reshaped weight shape: ( class [j], pixel [k]) )
# input image shape: ( training sample [l], flattened pixel [k] )
# output prediction shape: ( class [j], training sample [l] )
def predict(weights, inputs):
return softmax(np.einsum('jk,lk->jl',weights.reshape(C,D), inputs))
# Specify an inference problem by its unnormalized log-density.
# input weight shape: ( flattened weights ) flattened weights are indexed by class, then flattened pixels
# input image shape: ( training sample , flattened pixel )
# input label shape: ( training sample , one-hot class encoding )
# ( prediction shape: ( class , training sample ) )
# output density shape: (scalar)
def log_density(weights, t):
return (np.einsum('jk,kj->', np.log(predict(weights, train_images)), train_labels) - (np.sum((weights**2))/(2*L2_VAR_2)) )
def objective(params, t):
return -log_density(params, t)
gradient = grad(objective)
return objective, gradient
# construct noise-aware optimization objective/gradient
def build_noise_objective(L2_VAR_2, NUM_TRAIN, train_images, train_labels, C, D, N, L):
# our first C**2 = N weights are for P(c|r), our next C*D = L weights are for logistic regression
def unpack_params(params):
c_l_r = params[:N].reshape(C,C)
weights = params[N:]
return c_l_r, weights
# shotgun predictions: matrix of probabilities of each datapoint being in each class with simple logistic regression
# input weight shape: ( flattened weights ) flattened weights are indexed by class, then flattened pixels
# ( reshaped weight shape: ( class [j], pixel [k]) )
# input image shape: ( training sample [l], flattened pixel [k] )
# output prediction shape: ( class [j], training sample [l] )
def logisticpredict(weights, inputs):
return softmax(np.einsum('jk,lk->jl',weights.reshape(C,D), inputs))
# shotgun predictions: matrix of probabilities of each datapoint being in each class with bayes' rule
def bayespredict(c_l_r, weights, inputs):
numerator = np.einsum('cr,rs->cs', softmax(c_l_r), logisticpredict(weights, inputs))
denominator = np.sum(numerator,axis=0)
return numerator / denominator
# calculate predictive accuracy on train set and test set
def accuracies(params):
# unpack logged parameters
optimized_c_l_r, optimized_w = unpack_params(params)
predicted_train_classes = np.argmax(bayespredict(optimized_c_l_r, optimized_w, train_images),axis=0)
predicted_test_classes = np.argmax(bayespredict(optimized_c_l_r, optimized_w, test_images),axis=0)
# compute real classes
real_train_classes = np.argmax(train_labels,axis=1)
real_test_classes = np.argmax(test_labels,axis=1)
# compute accuracy
train_accuracy = np.average(np.equal(predicted_train_classes,real_train_classes).astype(float))
test_accuracy = np.average(np.equal(predicted_test_classes,real_test_classes).astype(float))
# output accuracy
return train_accuracy, test_accuracy
# Specify an inference problem by its unnormalized log-density.
# input weight shape: ( flattened weights ) flattened weights are indexed by class, then flattened pixels
# input image shape: ( training sample , flattened pixel )
# input label shape: ( training sample , one-hot class encoding )
# ( prediction shape: ( class , training sample ) )
# output density shape: (scalar)
def log_density(c_l_r, weights, t):
return (np.einsum('jk,kj->', np.log(np.einsum('cr,rs->cs', softmax(c_l_r), logisticpredict(weights, train_images))), train_labels) - (np.sum((weights**2))/(2*L2_VAR_2)) )
def objective(params, t):
c_l_r, weights = unpack_params(params)
return -log_density(c_l_r, weights, t)
gradient = grad(objective)
return accuracies, objective, gradient, unpack_params
if (__name__ == '__main__'):
# determine the size of the train/test sets from command-line args
parser = argparse.ArgumentParser(description='no description yet')
parser.add_argument('-corruptionone', type=float, default=0.00)
parser.add_argument('-corruptiontwo', type=float, default=0.00)
parser.add_argument('-train', type=int, default=10000)
parser.add_argument('-test', type=int, default=10000)
parser.add_argument('--mini', action='store_true') # SMALL RUN: force train and test set to be small, and both iters to be tiny
parser.add_argument('-var', type=float, default=0.1)
parser.add_argument('-initers', type=int, default=200)
parser.add_argument('-iters', type=int, default=100)
parser.add_argument('--fullaccuracy', action='store_true')
args = parser.parse_args()
if not args.mini:
ARGS_TRAIN = args.train
ARGS_TEST = args.test
INIT_ITERS = args.initers
NUM_ITERATIONS = args.iters
else:
ARGS_TRAIN = 30
ARGS_TEST = 30
INIT_ITERS = 5
NUM_ITERATIONS = 5
L2_VAR_2 = args.var
P_C_ONE = args.corruptionone
P_C_TWO = args.corruptiontwo
# load the data
NUM_TRAIN, NUM_TEST, train_images, train_labels, test_images, test_labels = mnist_processed(num_train=ARGS_TRAIN, num_test=ARGS_TEST, binarize_img=True, flatten_img=True, one_hot_label=True, corrupt=True, p_c_one=P_C_ONE, p_c_two = P_C_TWO)
C = 10 # # [C]lasses
D = 784 # # per-class feature [D]imensions
N = C**2 # # [N]oise weights
L = C * D # # [L]ogistic weights
###############################################################
# PERFORM NAIVE LOGISTIC REGRESSION TO INITIALIZE THE WEIGHTS #
###############################################################
filename = "initialization_weights_" + str(P_C_ONE) + "_" + str(P_C_TWO) + "_" + str(L2_VAR_2) + "_" + str(NUM_TRAIN) + ".npz"
picklefilename = os.path.join('cache', filename)
if os.path.isfile(picklefilename):
# restore pickled weights from /cache (if exist)
print("restoring pickled initialization weights")
unpickled = np.load(picklefilename)
init_weights = unpickled['init_weights']
else:
# Build initialization objective
objective, init_gradient = build_init_objective(L2_VAR_2, NUM_TRAIN, train_images, train_labels, C, D, L)
# Build callback for ADAM optimizer
def init_callback(params, t, g):
lik = -objective(params, t)
print("Initialization iteration {} log-likelihood {}".format(t, lik))
# initialize weights
pre_init_weights = np.ones(L)
# optimize weights
print("Initializing weights...")
init_weights = adam(init_gradient, pre_init_weights, step_size=0.1, num_iters=INIT_ITERS, callback=init_callback)
# pickle processed data in /cache (if doesn't already exist)
if not os.path.exists('cache'):
print('creating cache folder')
os.makedirs('cache')
if not os.path.isfile(picklefilename):
print('saving pickled regression initalization data')
np.savez(picklefilename, init_weights=init_weights)
###############################################################
###############################################################
# OPTIMIZE NOISE-AWARE LIKELIHOOD #
###############################################################
# Build noise-aware logistic regression objective.
accuracies, objective, gradient, unpack_params = build_noise_objective(L2_VAR_2, NUM_TRAIN, train_images, train_labels, C, D, N, L)
# Build callback for ADAM optimizer
liklog = np.zeros(NUM_ITERATIONS)
param_log = np.zeros((NUM_ITERATIONS, N+L))
def callback(params, t, g):
lik = -objective(params, t)
liklog[t] = lik
param_log[t] = params
if not args.fullaccuracy:
print("Iteration {} log-likelihood {}".format(t, lik))
# Initialize noise weights, carry over initialized logistic regression weights
init_c_l_r = np.identity(C).reshape(-1) + 0.0001
init_params = np.concatenate([init_c_l_r, init_weights])
# Optimize all parameters
print("Optimizing parameters...")
optimized_params = adam(gradient, init_params, step_size=0.1, num_iters=NUM_ITERATIONS, callback=callback)
###############################################################
# output remaining accuracy info
if args.fullaccuracy:
for i in range(0,NUM_ITERATIONS):
print("Iteration {} log-likelihood {}".format(i, liklog[i]))
train_accuracy, test_accuracy = accuracies(param_log[i])
print("Iteration {} train_accuracy: {}".format(i, train_accuracy))
print("Iteration {} test_accuracy: {}".format(i, test_accuracy))
else:
train_accuracy, test_accuracy = accuracies(optimized_params)
print("train_accuracy: {}".format(train_accuracy))
print("test_accuracy: {}".format(test_accuracy))
#return estimated convergence point (first log-likelihood in the log within <margin> of the average of the last <finalsamples> log-liklihoods)
finalsamples = 5
margin = 10
rejectmargin = 50
goal = np.average(liklog[-finalsamples:])
convergenceindex = np.argmax(liklog > (goal - margin)) #argmax returns first index of a True in this case
if (np.abs(goal - liklog[-1]) > rejectmargin):
print("log-likelihood has not yet converged")
else:
print("log-likelihood converges by iteration " + str(convergenceindex))
# Plot weights
optimized_c_l_r, optimized_w = unpack_params(optimized_params)
print(softmax(optimized_c_l_r))
weights = optimized_w.reshape(C,28,28)
save_images(weights, "weights.jpg")