Skip to content

Commit

Permalink
Just update to Flux.jl 0.14 without breakage, keeping CUDA as a stron…
Browse files Browse the repository at this point in the history
…g dep (#88)

- Update to Flux.jl 0.14 without breakage here, keeping CUDA as a strong dep
- Add a use_gpu=true model kwarg to allow the user to opt out of using Flux.gpu which now warns if a gpu isn't available
- Move to a PrecompileTools based precompilation approach which is more robust. This avoids downloading weights files (200MB) by loading dummy data. But the conv modelling isn't fully precompiled due to the dummy data
  • Loading branch information
IanButterworth committed Jul 17, 2023
1 parent b28ac4b commit 64f45f2
Show file tree
Hide file tree
Showing 12 changed files with 81 additions and 138 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/RunTests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ jobs:
julia-arch: x86

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- uses: julia-actions/setup-julia@latest
with:
version: ${{ matrix.julia-version }}
- uses: julia-actions/julia-runtest@master
- uses: julia-actions/julia-runtest@v1
with:
coverage: false
# - uses: julia-actions/julia-processcoverage@v1
Expand Down
42 changes: 0 additions & 42 deletions .github/workflows_disabled/SnoopCompile.yml

This file was deleted.

11 changes: 8 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,29 +1,34 @@
name = "ObjectDetector"
uuid = "3dfc1049-5314-49cf-8447-288dfd02f9fb"
authors = ["Robert Luciani"]
version = "0.2.9"
version = "0.2.10"

[deps]
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
ImageCore = "a09fc81d-aa75-5fe9-8630-4744c3626534"
ImageDraw = "4381153b-2b60-58ae-a1ba-fd683676385f"
ImageFiltering = "6a3955dd-da59-5b1f-98d4-e7296123deb5"
ImageTransformations = "02fcd773-0e25-5acc-982a-7f6622650795"
LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"

cuDNN = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd"

[compat]
BenchmarkTools = "0.4, 0.5, 0.6, 0.7, 1.0"
Flux = "0.12, 0.13"
CUDA = "4"
Flux = "0.12, 0.13, 0.14"
ImageCore = "0.8, 0.9"
ImageDraw = "0.2"
ImageFiltering = "0.6, 0.7"
ImageTransformations = "0.8, 0.9"
LazyArtifacts = "1.3"
PrettyTables = "2.0"
PrecompileTools = "1"
cuDNN = "1"
julia = "1.3"

[extras]
Expand Down
14 changes: 0 additions & 14 deletions deps/SnoopCompile/precompile/precompile_ObjectDetector.jl

This file was deleted.

3 changes: 0 additions & 3 deletions deps/SnoopCompile/snoopBenchmark.jl

This file was deleted.

8 changes: 0 additions & 8 deletions deps/SnoopCompile/snoopCompile.jl

This file was deleted.

23 changes: 0 additions & 23 deletions dev/compilation/compiler.jl

This file was deleted.

6 changes: 0 additions & 6 deletions dev/compilation/precompile.jl

This file was deleted.

16 changes: 14 additions & 2 deletions src/ObjectDetector.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ module ObjectDetector
export YOLO
export prepareImage, prepareImage!, resizekern, sizethatfits, emptybatch, drawBoxes

using CUDA
import cuDNN # not used but needed to load Flux CUDA Exts in Flux 0.14+
import Flux.gpu
import Flux.cpu
export gpu, cpu
Expand All @@ -16,6 +18,7 @@ using ImageCore
using BenchmarkTools
using PrettyTables
using ImageDraw
using PrecompileTools

abstract type AbstractModel end
function getModelInputSize end
Expand All @@ -28,7 +31,16 @@ import .YOLO

include("utils.jl")

include("../deps/SnoopCompile/precompile/precompile_ObjectDetector.jl")
_precompile_()
@setup_workload begin
@compile_workload begin
# don't use GPU here because GPU compilation of Conv requires realistic weights not dummy weights
yolomod = YOLO.v3_COCO(dummy=true, silent=true, use_gpu=false)
batch = emptybatch(yolomod)
res = yolomod(batch)
res = nothing
batch = nothing
yolomod = nothing
end
end

end #module
7 changes: 6 additions & 1 deletion src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@ Create an empty batched input array on the GPU if available.
"""
function emptybatch(model::T) where {T<:AbstractModel}
modelInputSize = getModelInputSize(model)
gpu(zeros(Float32, modelInputSize...))
batch = zeros(Float32, modelInputSize...)
if YOLO.uses_gpu(model)
gpu(batch)
else
batch
end
end

"""
Expand Down
35 changes: 18 additions & 17 deletions src/yolo/pretrained.jl
Original file line number Diff line number Diff line change
@@ -1,32 +1,33 @@
using Pkg.Artifacts
## YOLOV2
function v2_COCO(;batch=1, silent=false, w=608, h=608, cfgchanges=[(:net, 1, :width, w), (:net, 1, :height, h)])
yolo(joinpath(models_dir,"yolov2-608.cfg"), joinpath(artifact"yolov2-COCO", "yolov2-COCO.weights"), batch, silent=silent, cfgchanges=cfgchanges)
function v2_COCO(;batch=1, silent=false, w=608, h=608, cfgchanges=[(:net, 1, :width, w), (:net, 1, :height, h)], kwargs...)
yolo(joinpath(models_dir,"yolov2-608.cfg"), joinpath(artifact"yolov2-COCO", "yolov2-COCO.weights"), batch; silent, cfgchanges, kwargs...)
end
v2_608_COCO(;batch=1, silent=false, cfgchanges=nothing) = v2_COCO(w=608, h=608, batch=batch, silent=silent, cfgchanges=cfgchanges)
v2_608_COCO(;cfgchanges=nothing, kwargs...) = v2_COCO(;w=608, h=608, cfgchanges, kwargs...)

## YOLOV2-tiny
function v2_tiny_COCO(;batch=1, silent=false, w=416, h=416, cfgchanges=[(:net, 1, :width, w), (:net, 1, :height, h)])
yolo(joinpath(models_dir,"yolov2-tiny.cfg"), joinpath(artifact"yolov2-tiny-COCO", "yolov2-tiny-COCO.weights"), batch, silent=silent, cfgchanges=cfgchanges)
function v2_tiny_COCO(;batch=1, silent=false, w=416, h=416, cfgchanges=[(:net, 1, :width, w), (:net, 1, :height, h)], kwargs...)
yolo(joinpath(models_dir,"yolov2-tiny.cfg"), joinpath(artifact"yolov2-tiny-COCO", "yolov2-tiny-COCO.weights"), batch; silent, cfgchanges, kwargs...)
end
v2_tiny_416_COCO(;batch=1, silent=false, cfgchanges=nothing) = v2_tiny_COCO(w=416, h=416, batch=batch, silent=silent, cfgchanges=cfgchanges)
v2_tiny_416_COCO(;cfgchanges=nothing, kwargs...) = v2_tiny_COCO(;w=416, h=416, cfgchanges, kwargs...)

## YOLOV3
function v3_COCO(;batch=1, silent=false, w=416, h=416, cfgchanges=[(:net, 1, :width, w), (:net, 1, :height, h)])
yolo(joinpath(models_dir,"yolov3-416.cfg"), joinpath(artifact"yolov3-COCO", "yolov3-COCO.weights"), batch, silent=silent, cfgchanges=cfgchanges)
function v3_COCO(;batch=1, silent=false, w=416, h=416, cfgchanges=[(:net, 1, :width, w), (:net, 1, :height, h)], dummy::Bool=false, kwargs...)
weightsfile = dummy ? nothing : joinpath(artifact"yolov3-COCO", "yolov3-COCO.weights")
yolo(joinpath(models_dir,"yolov3-416.cfg"), weightsfile, batch; silent, cfgchanges, kwargs...)
end
v3_320_COCO(;batch=1, silent=false, cfgchanges=nothing) = v3_COCO(w=320, h=320, batch=batch, silent=silent, cfgchanges=cfgchanges)
v3_416_COCO(;batch=1, silent=false, cfgchanges=nothing) = v3_COCO(w=416, h=416, batch=batch, silent=silent, cfgchanges=cfgchanges)
v3_608_COCO(;batch=1, silent=false, cfgchanges=nothing) = v3_COCO(w=608, h=608, batch=batch, silent=silent, cfgchanges=cfgchanges)
v3_320_COCO(;cfgchanges=nothing, kwargs...) = v3_COCO(;w=320, h=320, cfgchanges, kwargs...)
v3_416_COCO(;cfgchanges=nothing, kwargs...) = v3_COCO(;w=416, h=416, cfgchanges, kwargs...)
v3_608_COCO(;cfgchanges=nothing, kwargs...) = v3_COCO(;w=608, h=608, cfgchanges, kwargs...)

## YOLOV3 SPP
function v3_SPP_COCO(;batch=1, silent=false, w=608, h=608, cfgchanges=[(:net, 1, :width, w), (:net, 1, :height, h)])
yolo(joinpath(models_dir,"yolov3-spp.cfg"), joinpath(artifact"yolov3-spp-COCO", "yolov3-spp-COCO.weights"), batch, silent=silent, cfgchanges=cfgchanges)
function v3_SPP_COCO(;batch=1, silent=false, w=608, h=608, cfgchanges=[(:net, 1, :width, w), (:net, 1, :height, h)], kwargs...)
yolo(joinpath(models_dir,"yolov3-spp.cfg"), joinpath(artifact"yolov3-spp-COCO", "yolov3-spp-COCO.weights"), batch; silent, cfgchanges, kwargs...)
end
v3_spp_608_COCO(;batch=1, silent=false, cfgchanges=nothing) = v3_SPP_COCO(w=608, h=608, batch=batch, silent=silent, cfgchanges=cfgchanges)
v3_spp_608_COCO(;cfgchanges=nothing, kwargs...) = v3_SPP_COCO(;w=608, h=608, cfgchanges, kwargs...)

## YOLOV3-tiny
function v3_tiny_COCO(;batch=1, silent=false, w=416, h=416, cfgchanges=[(:net, 1, :width, w), (:net, 1, :height, h)])
yolo(joinpath(models_dir,"yolov3-tiny.cfg"), joinpath(artifact"yolov3-tiny-COCO", "yolov3-tiny-COCO.weights"), batch, silent=silent, cfgchanges=cfgchanges)
function v3_tiny_COCO(;batch=1, silent=false, w=416, h=416, cfgchanges=[(:net, 1, :width, w), (:net, 1, :height, h)], kwargs...)
yolo(joinpath(models_dir,"yolov3-tiny.cfg"), joinpath(artifact"yolov3-tiny-COCO", "yolov3-tiny-COCO.weights"), batch; silent, cfgchanges, kwargs...)
end
v3_tiny_416_COCO(;batch=1, silent=false, cfgchanges=nothing) = v3_tiny_COCO(w=416, h=416, batch=batch, silent=silent, cfgchanges=cfgchanges)
v3_tiny_416_COCO(;cfgchanges=nothing, kwargs...) = v3_tiny_COCO(;w=416, h=416, cfgchanges, kwargs...)
50 changes: 33 additions & 17 deletions src/yolo/yolo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ import ..AbstractModel, ..getModelInputSize

const models_dir = joinpath(@__DIR__, "models")

using CUDA
import cuDNN # not used but needed to load Flux CUDA Exts in Flux 0.14+
using Flux
import Flux.gpu
using Flux.CUDA

using LazyArtifacts

const CU_FUNCTIONAL = Ref{Bool}(false)
Expand Down Expand Up @@ -74,19 +75,20 @@ end
Read the YOLO binary weights
"""
function readweights(bytes::IOBuffer, kern::Int, ch::Int, fl::Int, bn::Bool)
function readweights(bytes::Union{IOBuffer,Nothing}, kern::Int, ch::Int, fl::Int, bn::Bool)
dummy = isnothing(bytes)
if bn
bb = reinterpret(Float32, read(bytes, fl*4))
bw = reinterpret(Float32, read(bytes, fl*4))
bm = reinterpret(Float32, read(bytes, fl*4))
bv = reinterpret(Float32, read(bytes, fl*4))
bb = dummy ? ones(Float32, fl) : reinterpret(Float32, read(bytes, fl*4))
bw = dummy ? ones(Float32, fl) : reinterpret(Float32, read(bytes, fl*4))
bm = dummy ? ones(Float32, fl) : reinterpret(Float32, read(bytes, fl*4))
bv = dummy ? ones(Float32, fl) : reinterpret(Float32, read(bytes, fl*4))
cb = zeros(Float32, fl)
cw = reshape(reinterpret(Float32, read(bytes, kern*kern*ch*fl*4)), kern, kern, ch, fl)
cw = dummy ? ones(Float32, kern, kern, ch, fl) : reshape(reinterpret(Float32, read(bytes, kern*kern*ch*fl*4)), kern, kern, ch, fl)
cw = Float32.(flip(cw))
return cw, cb, bb, bw, bm, bv
else
cb = reinterpret(Float32, read(bytes, fl*4))
cw = reshape(reinterpret(Float32, read(bytes, kern*kern*ch*fl*4)), kern, kern, ch, fl)
cb = dummy ? ones(Float32, fl) : reinterpret(Float32, read(bytes, fl*4))
cw = dummy ? ones(Float32, kern, kern, ch, fl) : reshape(reinterpret(Float32, read(bytes, kern*kern*ch*fl*4)), kern, kern, ch, fl)
cw = Float32.(flip(cw))
return cw, cb, 0.0, 0.0, 0.0, 0.0
end
Expand Down Expand Up @@ -202,6 +204,9 @@ function assertdimconform(cfgvec::Vector{Pair{Symbol,Dict{Symbol,T}}}) where {T}
return true
end

gpu(x, use::Bool) = use ? Flux.gpu(x) : x
uses_gpu(model::T) where {T<:AbstractModel} = model.uses_gpu

########################################################
##### THE YOLO OBJECT AND CONSTRUCTOR ##################
########################################################
Expand All @@ -210,9 +215,12 @@ mutable struct yolo <: AbstractModel
chain::Array{Any, 1} # This holds chains of weights and functions
W::Dict{Int64, T} where T <: DenseArray # This holds arrays that the model writes to
out::Array{Dict{Symbol, Any}, 1} # This holds values and arrays needed for inference
uses_gpu::Bool # Whether the gpu was requested to be used

# The constructor takes the official YOLO config files and weight files
yolo(cfgfile::String, weightfile::String, batchsize::Int = 1; silent::Bool = false, cfgchanges=nothing) = begin
yolo(cfgfile::String, weightfile::Union{Nothing,String}, batchsize::Int = 1; silent::Bool = false, cfgchanges=nothing, use_gpu::Bool=true) = begin
# load dummy weights (avoids download for precompilation)
dummy = isnothing(weightfile)
# read the config file and return [:layername => Dict(:setting => value), ...]
# the first 'layer' is not a real layer, and has overarching YOLO settings
cfgvec = cfgread(cfgfile)
Expand All @@ -226,10 +234,18 @@ mutable struct yolo <: AbstractModel
cfg = cfgvec[1][2]
yoloversion = any(first.(cfgvec) .== :region) ? 2 : 3 #v2 calls the last stage "region", v3 uses "yolo"
cfg[:yoloversion] = yoloversion
weightbytes = IOBuffer(read(weightfile)) # read weights file sequentially like byte stream
weightbytes = if dummy
nothing # readweights knows to make up dummy weights if this is nothing
else
IOBuffer(read(weightfile)) # read weights file sequentially like byte stream
end
# these settings are populated as the network is constructed below
# some settings are re-read later for the last part of construction
maj, min, subv, im1, im2 = reinterpret(Int32, read(weightbytes, 4*5))
maj, min, subv, im1, im2 = if dummy
ones(Int32, 5)
else
reinterpret(Int32, read(weightbytes, 4*5))
end
cfg[:darknetversion] = VersionNumber("$maj.$min.$subv")
cfg[:batchsize] = batchsize
cfg[:output] = []
Expand All @@ -248,8 +264,8 @@ mutable struct yolo <: AbstractModel
act = ACT[block[:activation]]
bn = haskey(block, :batch_normalize)
cw, cb, bb, bw, bm, bv = readweights(weightbytes, kern, ch[end], filters, bn)
push!(stack, gpu(Conv(cw, cb; stride = stride, pad = pad, dilation = 1)))
bn && push!(stack, gpu(BatchNorm(identity, bb, bw, bm, bv, 1f-5, 0.1f0, true, true, nothing, length(bb))))
push!(stack, gpu(Conv(cw, cb; stride = stride, pad = pad, dilation = 1), use_gpu))
bn && push!(stack, gpu(BatchNorm(identity, bb, bw, bm, bv, 1f-5, 0.1f0, true, true, nothing, length(bb)), use_gpu))
push!(stack, let; _act(x) = act.(x) end)
push!(fn, Chain(stack...))
push!(ch, filters)
Expand Down Expand Up @@ -314,7 +330,7 @@ mutable struct yolo <: AbstractModel
# PART 2 - THE SKIPS
####################
# Create test image. Note that darknet is row-major, so width-first
testimgs = [gpu(rand(Float32, cfg[:width], cfg[:height], cfg[:channels], batchsize))]
testimgs = [gpu(rand(Float32, cfg[:width], cfg[:height], cfg[:channels], batchsize), use_gpu)]
# find all skip-layers and all YOLO layers
needout = sort(vcat(0, [l[1] for l in filter(f -> typeof(f) <: Tuple, fn)], findall(x -> x == nothing, fn) .- 1))
chainstack = Flux.Chain[] # layers that just feed forward can be grouped together in chains
Expand Down Expand Up @@ -413,7 +429,7 @@ mutable struct yolo <: AbstractModel
out[i][:ignore] = get(cfg[:output][i], :ignore_thresh, 0.3) # for ignoring detections of same object (overlapping)
end

return new(cfg, chainstack, W, out)
return new(cfg, chainstack, W, out, use_gpu)
end
end

Expand Down

2 comments on commit 64f45f2

@IanButterworth
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/87647

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.2.10 -m "<description of version>" 64f45f23c81a0ff088cf01d278b595b4167430e3
git push origin v0.2.10

Please sign in to comment.