Skip to content

Commit

Permalink
simplifies create_bow_from_corpus; removes io.jl
Browse files Browse the repository at this point in the history
  • Loading branch information
sadit committed Jan 12, 2021
1 parent 930c57c commit 0ffa549
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 128 deletions.
7 changes: 1 addition & 6 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@ version = "0.4.2"

[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
GZip = "92fee26a-97fe-5a0c-ad85-20a5f3185b63"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
KCenters = "5d8de97f-65f8-4dd6-a15b-0f89c36a43ce"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Expand All @@ -18,10 +15,8 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

[compat]
GZip = "0.5.0"
JSON = "0.21.0"
CategoricalArrays = "0.8.3"
KCenters = "0.2.3"
SimilaritySearch = "0.3.21"
StatsBase = "0.32.0"
julia = "0.7, 1"
CategoricalArrays = "0.8.3"
1 change: 0 additions & 1 deletion src/TextSearch.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ module TextSearch

include("textconfig.jl")
include("dvec.jl")
include("io.jl")
include("basicmodels.jl")
include("distmodel.jl")
include("entmodel.jl")
Expand Down
39 changes: 23 additions & 16 deletions src/basicmodels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# License is Apache 2.0: https://www.apache.org/licenses/LICENSE-2.0.txt

export VectorModel, fit, vectorize, TfidfModel, TfModel, IdfModel, FreqModel, prune, prune_select_top
using Distributed

"""
abstract type Model
Expand Down Expand Up @@ -38,23 +37,31 @@ mutable struct VectorModel <: Model
n::Int # collection size
end

## function create_bow_from_corpus(config, corpus; batch_size=128)
## m = nworkers()
## n = length(corpus)
##
## L = []
## for _corpus in Iterators.partition(corpus, batch_size)
## b = @spawn begin
## bow = BOW()
## for text in _corpus
## compute_bow(tokenize(config, text), bow)
## end
## bow
## end
## push!(L, b)
## end
##
## sum(fetch.(L))
## end

function create_bow_from_corpus(config, corpus)
m = nworkers()
n = length(corpus)

L = []
for _corpus in Iterators.partition(corpus, floor(Int, n / m))
b = @spawn begin
bow = BOW()
for text in _corpus
compute_bow(tokenize(config, text), bow)
end
bow
end
push!(L, b)
bow = BOW()
for text in corpus
compute_bow(tokenize(config, text), bow)
end

sum(fetch.(L))
bow
end

"""
Expand Down
95 changes: 0 additions & 95 deletions src/io.jl

This file was deleted.

10 changes: 0 additions & 10 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,16 +115,6 @@ end
@test dot(normalize!(u + v - v), normalize!(u)) > 0.99
end

@testset "io" begin
buff = IOBuffer("""{"key1": "value1a", "key2c": "value2a"}
{"key1": "value1b", "key2c": "value2b"}
{"key1": "value1c", "key2b": "value2c"}
{"key1": "value1d", "key2a": "value2d"}""")
itertweets(buff) do x
@info x
end
end

_corpus = [
"la casa roja",
"la casa verde",
Expand Down

2 comments on commit 0ffa549

@sadit
Copy link
Owner Author

@sadit sadit commented on 0ffa549 Jan 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request updated: JuliaRegistries/General/26734

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.4.2 -m "<description of version>" 0ffa549efbf5ca76e826fe9fdc62052629abbcc1
git push origin v0.4.2

Please sign in to comment.