-
Notifications
You must be signed in to change notification settings - Fork 53
/
pset1-solutions.jl
348 lines (310 loc) · 11.1 KB
/
pset1-solutions.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
# This file contains just the final implementations from the solutions
# notebook, with no tests, explanations, or benchmarks. Refer to the
# pset1-solutions.ipynb notebook for more information.
########################################################################
# problem 1(a)
function circshift_perm2!(X::AbstractVector, s::Int, B=BitVector(s))
length(B) == s || throw(DimensionMismatch("wrong number of bits"))
fill!(B, false)
n = length(X)
@assert 0 ≤ s < n
s == 0 && return X
ncycles = gcd(s,n)
@inbounds for i = 1:s
if !B[i] # cycle starting at i has not been visited yet
Xi = X[i]
j = i
while true
k = j - s + n
X[j] = X[k]
B[j] = true
j = k
while j > s
k = j - s # j should be > s
X[j] = X[k]
j = k
end
j == i && break # end of cycle
end
k = j + s
X[k > n ? k - n : k] = Xi
ncycles -= 1
ncycles == 0 && break
end
end
return X
end
function circshift_reverse!(a::AbstractVector, s::Int)
n = length(a)
# these optimizations are implemented below
# s = mod(s, n)
# s == 0 && return a
@assert 0 ≤ s < n
s′ = n-s
reverse!(a, 1, s′)
reverse!(a, s′+1, n)
reverse!(a)
end
# I would use the copy! function here, which is highly optimized, but I
# ran into a problem with views: https://github.com/JuliaLang/julia/issues/20069
function mycopy!(a, s, s′)
@simd for i = s′:-1:1 # copy a[1:s′] to a[s+1:n]
@inbounds a[s+i] = a[i]
end
end
mycopy!(a::Array, s, s′) = copy!(a, s+1, a, 1, s′)
function circshift_buf1!{T}(a::AbstractVector{T}, buf::AbstractVector{T})
n = length(a)
s = length(buf)
s′ = n - s
copy!(buf, 1, a, s′+1, s) # copy a[s′+1:n] to buf[1:s]
mycopy!(a, s, s′) # copy a[1:s′] to a[s+1:n]
copy!(a, 1, buf, 1, s) # copy buf to a[1:s]
return a
end
circshift_buf1!{T}(a::AbstractVector{T}, s::Int) = circshift_buf1!(a, Array{T}(s))
function circshift_buf2!{T}(a::AbstractVector{T}, buf::AbstractVector{T})
n = length(a)
s′ = length(buf)
s = n - s′
copy!(buf, 1, a, 1, s′) # copy a[1:s′] to buf
copy!(a, 1, a, s′+1, s) # copy a[s′+1:n] to a[1:s]
copy!(a, s+1, buf, 1, s′) # copy buf to a[s+1:n]
return a
end
circshift_buf2!{T}(a::AbstractVector{T}, s::Int) = circshift_buf2!(a, Array{T}(length(a)-s))
function circularshift!(X::AbstractVector, s::Int)
n = length(X)
n == 0 && return X
s = mod(s, n)
s == 0 && return X
if n <= 100
return circshift_reverse!(X, s)
elseif sizeof(X) ≥ 32n
return circshift_perm2!(X, s)
elseif 8s ≤ n
return circshift_buf1!(X, s)
elseif 8(n-s) ≤ n
return circshift_buf2!(X, s)
else
return circshift_reverse!(X, s)
end
end
# convert `s` to an `Int`, to prevent both type instabilities and
# bad performance if someone is perverse and passes, say, a `BigInt`.
circularshift!(X::AbstractVector, s::Integer) = circularshift!(X, Int(s))
########################################################################
# problem 1(b)
# efficient code to copy a chunk of rows in a matrix. (These are only called
# internally below when we can guaranteee that the indices are in-bounds.)
function _copyrows!(Xi::Vector, X::AbstractMatrix, chunkrow, i) # X[:,i] to Xi
chunkrow -= 1
@simd for j = 1:length(Xi)
@inbounds Xi[j] = X[chunkrow+j, i]
end
end
function _copyrows!(X::AbstractMatrix, chunklen, chunkrow, k, i) # X[:,i] to X[:,k]
@simd for j = chunkrow:chunkrow+chunklen-1
@inbounds X[j, k] = X[j, i]
end
end
function _copyrows!(X::AbstractMatrix, chunkrow, i, Xi::Vector) # Xi to X[:,i]
chunkrow -= 1
@simd for j = 1:length(Xi)
@inbounds X[chunkrow+j, i] = Xi[j]
end
end
# like circshift_perm2! above, but permute the rows of X by s, permuting
# several rows at a time in chunks. (It would be nicer to avoid the copy-paste
# duplication with some better abstractions here.)
function circshift_perm2!{T}(X::AbstractMatrix{T}, s::Int)
n = size(X,2)
n == 0 && return X
s = mod(s, n)
s == 0 && return X
B = BitVector(s)
ncycles_per_row = gcd(s,n)
# figure out how big of a chunk we can allocate according to the rules
Xi = Array{T}(1) # allocate array of 1 element to get size.
# We need to satisfy:
# chunklen*sizeof(Xi) + sizeof(B) ≤ (sizeof(Xi)*size(X,2))÷8 + 128
chunklen = max(1, size(X,2)÷8 + (128-sizeof(B))÷sizeof(Xi))
for chunkrow = 1:chunklen:size(X,1) # start of each row chunk
# the last chunk of rows might be smaller
chunklen = min(chunklen, size(X,1) - chunkrow + 1)
resize!(Xi, chunklen)
fill!(B, false)
ncycles = ncycles_per_row
@inbounds for i = 1:s
if !B[i] # cycle starting at i has not been visited yet
# Xi = X[i]
_copyrows!(Xi, X, chunkrow, i)
j = i
while true
k = j - s + n
# X[j] = X[k]
_copyrows!(X, chunklen, chunkrow, j, k)
B[j] = true
j = k
while j > s
k = j - s # j should be > s
# X[j] = X[k]
_copyrows!(X, chunklen, chunkrow, j, k)
j = k
end
j == i && break # end of cycle
end
k = j + s
# X[k > n ? k - n : k] = Xi
_copyrows!(X, chunkrow, k > n ? k - n : k, Xi)
ncycles -= 1
ncycles == 0 && break
end
end
end
return X
end
circularshift!(X::AbstractMatrix, s::Integer) = circshift_perm2!(X, Int(s))
########################################################################
# problem 1(c)
function circshift_bybatch!(X::AbstractMatrix, batchsize::Int)
m, n = size(X)
n==0 && return X # we assume n>0 below
for i = 1:batchsize:m # i is the start of each batch
b = min(batchsize, m-i+1) # size of the current batch
s = mod(i, n)
s′ = s == 0 ? 0 : n - s
# perform reversals of 1:s′ and s′+1:n for each row j in the batch
for j = i:i+b-1
v = view(X, j, :)
reverse!(v, 1, s′) # reverse first s′ elements
reverse!(v, s′+1, n) # reverse last s elements
s′ -= 1
if s′ < 0
s′ += n
end
end
# reverse the whole row for every element in the block at once:
k = 1
k′ = n
@inbounds while k < k′
@simd for j = i:i+b-1
t = X[j, k]
X[j, k] = X[j, k′]
X[j, k′] = t
end
k += 1
k′ -= 1
end
end
return X
end
circularshift!(X::AbstractMatrix) = circshift_bybatch!(X, size(X,1))
########################################################################
########################################################################
# problem 2
using StaticArrays
# since we store the spheres in a tree, we also need to keep track of the index
# with which that sphere appeared in the original data, so that we can still return
# that index from findsphere. We also convert to a floating-point type, since
# otherwise the radius^2 computation can easily overflow. (Note that initial "slow"
# solution converted to floating-point in the norm computations anyway.)
immutable Sphere{T<:AbstractFloat}
center::SVector{3,T}
radius2::T # the radius^2
index::Int # index (row) of this sphere in the original data
end
function Sphere{T<:Real,S<:Real}(center::AbstractVector{T}, radius::S, i::Integer)
R = float(promote_type(T, S))
return Sphere{R}(SVector{3,R}(center), R(radius)^2, Int(i))
end
type KDTree{T<:Real}
o::Vector{Sphere{T}}
ix::Int # dimension being sliced (1 to 3, or 0 for leaf nodes)
x::Float64 # the coordinate of the slice plane
left::KDTree # objects ≤ x in coordinate ix
right::KDTree # objects > x in coordinate ix
KDTree(o::Vector{Sphere{T}}) = new(o, 0)
function KDTree(x::Real, ix::Integer, left::KDTree{T}, right::KDTree{T})
1 ≤ ix ≤ 3 || throw(BoundsError())
new(Sphere{T}[], ix, x, left, right)
end
end
function KDTree{T}(o::AbstractVector{Sphere{T}})
length(o) <= 8 && return KDTree{T}(o)
# figure out the best dimension ix to divide over,
# the dividing plane x, and the number (nl,nr) of
# objects that fall into the left and right subtrees
ix = 0
x = zero(T)
nl = nr = typemax(Int)
for i = 1:3
mx = median(map(s -> s.center[i], o))
mnl = count(s -> s.center[i] - sqrt(s.radius2) ≤ mx, o) # lower bound ≤ mx
mnr = count(s -> s.center[i] + sqrt(s.radius2) > mx, o) # upper bound > mx
if max(mnl,mnr) < max(nl,nr)
ix = i
x = mx
nl = mnl
nr = mnr
end
end
# don't bother subdividing if it doesn't reduce the # of objects much
4*max(nl,nr) > 3*length(o) && return KDTree{T}(o)
# create the arrays of objects in each subtree
ol = Array{Sphere{T}}(nl)
or = Array{Sphere{T}}(nr)
il = ir = 0
for k in eachindex(o)
s = o[k]
r = sqrt(s.radius2)
if s.center[ix] - r ≤ x
ol[il += 1] = o[k]
end
if s.center[ix] + r > x
or[ir += 1] = o[k]
end
end
return KDTree{T}(x, ix, KDTree(ol), KDTree(or))
end
makespheres_{T<:Real}(data::AbstractMatrix{T}) = [Sphere(data[i,1:3], data[i,4], i) for i = 1:size(data,1)]
makespheres{T<:Real}(data::AbstractMatrix{T}) = KDTree(makespheres_(data))
depth(kd::KDTree) = kd.ix == 0 ? 0 : max(depth(kd.left), depth(kd.right)) + 1
Base.show{T}(io::IO, kd::KDTree{T}) = print(io, "KDTree{$T} of depth ", depth(kd))
function _show(io::IO, kd::KDTree, indent)
indentstr = " "^indent
if kd.ix == 0
println(io, indentstr, length(kd.o), " objects")
else
println(io, indentstr, "if x[", kd.ix, "] ≤ ", kd.x, ':')
_show(io, kd.left, indent + 2)
println(io, indentstr, "else:")
_show(io, kd.right, indent + 2)
end
end
function Base.show(io::IO, ::MIME"text/plain", kd::KDTree)
println(io, kd, ':')
_show(io, kd, 0)
end
Base.in{T}(p::SVector{3}, S::Sphere{T}) = sumabs2(p - S.center) ≤ S.radius2
function findsphere{T}(o::AbstractVector{Sphere{T}}, p::SVector{3})
for i in eachindex(o)
if p in o[i]
return o[i].index
end
end
return 0
end
function findsphere{T}(kd::KDTree{T}, p::SVector{3})
if isempty(kd.o)
if p[kd.ix] ≤ kd.x
return findsphere(kd.left, p)
else
return findsphere(kd.right, p)
end
else
return findsphere(kd.o, p)
end
end
findsphere{T}(S::AbstractVector{Sphere{T}}, p::AbstractVector) = findsphere(S, SVector{3}(p))
findsphere{T}(kd::KDTree{T}, p::AbstractVector) = findsphere(kd, SVector{3}(p))