diff --git a/src/parse.jl b/src/parse.jl index 5c625e8..6e22f22 100644 --- a/src/parse.jl +++ b/src/parse.jl @@ -9,11 +9,24 @@ function parse_header(io) seek(io, 10) # start, end positions of TEXT, DATA, and ANALYSIS sections offsets = Array{Int64}(undef, 6) + for i in 1:6 # offsets are encoded as ASCII strings raw_str = Array{UInt8}(undef, 8) read!(io, raw_str) offsets_str = String(raw_str) + + # the last two numbers are for the analysis segment + # the analysis segment is facultative, although the bytes should + # always be there + # (FCS 3.1 ref at https://isac-net.org/page/Data-Standards) + # some cytometers (BD Accuri) do not put the last two bytes + # putting "0" bytes in their files is what other cytometers do + # see github discussion: + # https://github.com/tlnagy/FCSFiles.jl/pull/13#discussion_r985251676 + if isempty(lstrip(offsets_str)) && i>4 + offsets_str="0" + end offsets[i] = parse(Int, strip(join(offsets_str))) end @@ -23,7 +36,7 @@ function parse_header(io) offsets[3] = parse(Int64, text_mappings["\$BEGINDATA"]) offsets[4] = parse(Int64, text_mappings["\$ENDDATA"]) end - offsets + return offsets end @@ -58,10 +71,17 @@ function parse_data(io, end_data::Int, text_mappings::Dict{String, String}) seek(io, start_data) - # Add support for data types other than float - (text_mappings["\$DATATYPE"] != "F") && error("Non float32 support not implemented yet. Please see github issues for this project.") - flat_data = Array{Float32}(undef, (end_data - start_data + 1) ÷ 4) + # data type in FCS3.1 can be I (integer), F (float32), A (Ascii) + if text_mappings["\$DATATYPE"] == "I" + dtype = Int32 + elseif text_mappings["\$DATATYPE"] == "F" + dtype = Float32 + else + error("Only float and integer data types are implemented for now, the required .fcs file is using another number encoding.") + end + + flat_data = Array{dtype}(undef, (end_data - start_data + 1) ÷ 4) read!(io, flat_data) endian_func = get_endian_func(text_mappings) map!(endian_func, flat_data, flat_data) @@ -71,7 +91,7 @@ function parse_data(io, # data should be in multiples of `n_params` for list mode (mod(length(flat_data), n_params) != 0) && error("FCS file is corrupt. DATA and TEXT sections don't match.") - data = Dict{String, Vector{Float32}}() + data = Dict{String, Vector{dtype}}() for i in 1:n_params data[text_mappings["\$P$(i)N"]] = flat_data[i:n_params:end] diff --git a/test/Project.toml b/test/Project.toml index ce23579..e071e2f 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,4 +1,3 @@ [deps] FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" -HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/runtests.jl b/test/runtests.jl index af616fa..a1cde72 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,6 @@ using FCSFiles using FileIO -using Test, HTTP +using Test project_root = isfile("runtests.jl") ? abspath("..") : abspath(".") testdata_dir = joinpath(project_root, "test", "fcsexamples") @@ -27,4 +27,18 @@ end @test length(flowrun.data) == 50 @test length(flowrun.params) == 268 end + + @testset "Loading float-encoded file" begin + flowrun = load(joinpath(testdata_dir, "Applied Biosystems - Attune.fcs")) + + @test length(flowrun["SSC-A"]) == 22188 + @test flowrun["FSC-A"][2] == 244982.11f0 + end + + @testset "Loading Accuri file" begin + flowrun = load(joinpath(testdata_dir, "Accuri - C6.fcs")) + @test length(flowrun["SSC-A"]) == 63273 + @test flowrun["SSC-A"][2] == 370971 + + end end