/
utils.jl
60 lines (51 loc) · 1.89 KB
/
utils.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Handy helpers
function include_everywhere(filepath)
include(filepath) # Load on Node 1 first, triggering any precompile
if nprocs() > 1
fullpath = joinpath(@__DIR__, filepath)
@sync for p in workers()
@async remotecall_wait(include, p, fullpath)
end
end
end
function zip_word_sets(U, V)
zipped = ""
for (u, v) in zip(U, V)
zipped *= u * " " * v * " "
end
return zipped
end
# Make a bunch of test "documents" filled with WEAT words
function make_test_docs(weat_word_set)
SA = zip_word_sets(weat_word_set.S, weat_word_set.A)
SB = zip_word_sets(weat_word_set.S, weat_word_set.B)
TA = zip_word_sets(weat_word_set.T, weat_word_set.A)
TB = zip_word_sets(weat_word_set.T, weat_word_set.B)
return (SA=SA, SB=SB, TA=TA, TB=TB)
end
# Regex Helper
function extract(string, regex; trim=(0,0), cast=nothing)
m = match(regex, string)
tmp = nothing
if (m != nothing)
tmp = m.match[1+trim[1]:end-trim[2]]
if (cast != nothing)
tmp = parse(cast, tmp)
end
end
return tmp
end
# Get file info from naming convention
function fileinfo(filepath)
corpus = extract(filepath, r"C[0-9]+", trim=(1,0), cast=Int64)
min_vocab = extract(filepath, r"V[0-9]+", trim=(1,0), cast=Int64)
window = extract(filepath, r"W[0-9]+", trim=(1,0), cast=Int64)
dimension = extract(filepath, r"D[0-9]+", trim=(1,0), cast=Int64)
eta = extract(filepath, r"R[0-9]+.[0-9]+", trim=(1,0), cast=Float64)
max_iters = extract(filepath, r"E[0-9]+", trim=(1,0), cast=Int64)
seed = extract(filepath, r"S[0-9]+", trim=(1,0), cast=Int64)
tmp = extract(filepath, r".[0-9]{3}.bin$", trim=(1, 4), cast=Int64)
iters = tmp == nothing ? max_iters : tmp
return (corpus=corpus, min_vocab=min_vocab, window=window, dimension=dimension,
eta=eta, max_iters=max_iters, seed=seed, iters=iters)
end