Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Redesign ReadStatMeta and add ReadStatColMeta for DataAPI.jl v1.13 #6

Merged
merged 7 commits into from Nov 17, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Expand Up @@ -13,7 +13,7 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
CategoricalArrays = "0.10"
DataAPI = "1.6"
DataAPI = "1.11"
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prepare this PR against DataAPI.jl 1.13 (it will be released this week)

DataFrames = "1"
DataValues = "0.4"
PrettyTables = "1"
Expand Down
2 changes: 1 addition & 1 deletion src/ReadStatTables.jl
Expand Up @@ -6,7 +6,7 @@ using PrettyTables: pretty_table
using ReadStat: read_data_file
using Tables

import DataAPI: refarray, unwrap
import DataAPI: refarray, unwrap, hasmetadata, metadata
import PrettyTables: compact_type_str
import Tables: columnnames

Expand Down
47 changes: 47 additions & 0 deletions src/table.jl
Expand Up @@ -123,6 +123,53 @@ Retrieve the metadata parsed from a data file.
"""
getmeta(tb::ReadStatTable) = getfield(tb, :meta)

hasmetadata(tb::ReadStatTable) = getmeta(tb) !== nothing
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it should be metadatasupport and it takes type as an argument.

hasmetadata(tb::ReadStatTable, col::Symbol) = getmeta(tb) !== nothing && haskey(tb, col)

function metadata(tb::ReadStatTable)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also style kwarg should be supported.

meta = getmeta(tb)
meta === nothing && throw(ArgumentError("File has no metadata"))

ret = Dict{String, Any}("file_modified" => meta.timestamp,
"file_extension" => meta.fileext)

label = meta.filelabel
if label != ""
ret["label"] = label
end

return ret
end

function metadata(tb::ReadStatTable, col::Symbol)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be colmetadata

meta = getmeta(tb)
meta === nothing && throw(ArgumentError("File has no metadata"))
haskey(tb, col) || throw(ArgumentError("File has no column :$col"))

ret = Dict{String, Any}()

format = meta.formats[col]
if format != ""
ret["variable_format"] = format
end

label = meta.labels[col]
if label != ""
ret["label"] = label
end

# ReadStat.jl does not handle value labels for SAS at this moment
if meta.fileext != ".xpt" && meta.fileext != ".sas7bdat"
val_label = get(meta.val_label_keys, col, nothing)
if val_label != ""
ret["value_label_name"] = val_label
ret["value_labels"] = meta.val_label_dict[val_label]
end
end

return ret
end

varlabels(tb::ReadStatTable) = varlabels(getmeta(tb))
varformats(tb::ReadStatTable) = varformats(getmeta(tb))
val_label_keys(tb::ReadStatTable) = val_label_keys(getmeta(tb))
Expand Down
119 changes: 119 additions & 0 deletions test/readstat.jl
Expand Up @@ -41,6 +41,31 @@ end
@test first(ss, 40) == "ReadStatMeta:\n variable labels: Dict"
@test last(ss, 27) == "\n file extension: .dta"

@test DataAPI.hasmetadata(d)
meta = DataAPI.metadata(d)
@test meta isa Dict{String, Any}
@test meta ==
Dict("label" => "A test file",
"file_modified" => DateTime("2021-04-22T19:36:00"),
"file_extension" => ".dta")
for col in Tables.columnnames(d)
@test DataAPI.hasmetadata(d, col)
colmeta = DataAPI.metadata(d, col)
@test colmeta isa Dict{String, Any}
@test colmeta["label"] != ""
@test colmeta["variable_format"] != ""
end
@test DataAPI.metadata(d, :mylabl) ==
Dict("label" => "labeled", "variable_format" => "%16.0f",
"value_label_name" => "mylabl",
"value_labels" => Dict(2 => "Female", 1 => "Male"))
@test DataAPI.metadata(d, :myord) ==
Dict("label" => "ordinal", "variable_format" => "%16.0f",
"value_label_name" => "myord",
"value_labels" => Dict(2 => "medium", 3 => "high", 1 => "low"))
@test !DataAPI.hasmetadata(d, :col)
@test_throws ArgumentError DataAPI.metadata(d, :col)

df = DataFrame(d)
@test all(n->isequal(df[!, n], getproperty(d, n)), columnnames(d))
df = DataFrame(d, copycols=false)
Expand Down Expand Up @@ -116,6 +141,30 @@ end
3 │ c -1000.3 1960-01-01T00:00:00 1960-01-01T00:00:00 Male high 1582-10-14T00:00:00
4 │ d -1.4 1583-01-01T00:00:00 1583-01-01T00:00:00 Female low 1582-10-14T16:10:10
5 │ e 1000.3 missing missing Male low missing"""

@test DataAPI.hasmetadata(d)
meta = DataAPI.metadata(d)
@test meta isa Dict{String, Any}
@test meta ==
Dict("file_modified" => DateTime("2018-08-16T15:22:33"),
"file_extension" => ".sav")
for col in Tables.columnnames(d)
@test DataAPI.hasmetadata(d, col)
colmeta = DataAPI.metadata(d, col)
@test colmeta isa Dict{String, Any}
@test colmeta["label"] != ""
@test colmeta["variable_format"] != ""
end
@test DataAPI.metadata(d, :mylabl) ==
Dict("label" => "labeled", "variable_format" => "F8.2",
"value_label_name" => "labels0",
"value_labels" => Dict(2 => "Female", 1 => "Male"))
@test DataAPI.metadata(d, :myord) ==
Dict("label" => "ordinal", "variable_format" => "F8.2",
"value_label_name" => "labels1",
"value_labels" => Dict(2 => "medium", 3 => "high", 1 => "low"))
@test !DataAPI.hasmetadata(d, :col)
@test_throws ArgumentError DataAPI.metadata(d, :col)
end

@testset "readstat por" begin
Expand All @@ -131,6 +180,30 @@ end
3 │ c -1000.3 1960-01-01T00:00:00 1960-01-01T00:00:00 Male high 1582-10-14T00:00:00
4 │ d -1.4 1583-01-01T00:00:00 1583-01-01T00:00:00 Female low 1582-10-14T16:10:10
5 │ e 1000.3 missing missing Male low missing"""

@test DataAPI.hasmetadata(d)
meta = DataAPI.metadata(d)
@test meta isa Dict{String, Any}
@test meta ==
Dict("file_modified" => DateTime("2018-12-16T16:28:21"),
"file_extension" => ".por")
for col in Tables.columnnames(d)
@test DataAPI.hasmetadata(d, col)
colmeta = DataAPI.metadata(d, col)
@test colmeta isa Dict{String, Any}
@test colmeta["label"] != ""
@test colmeta["variable_format"] != ""
end
@test DataAPI.metadata(d, :MYLABL) ==
Dict("label" => "labeled", "variable_format" => "F8.2",
"value_label_name" => "labels0",
"value_labels" => Dict(2 => "Female", 1 => "Male"))
@test DataAPI.metadata(d, :MYORD) ==
Dict("label" => "ordinal", "variable_format" => "F8.2",
"value_label_name" => "labels1",
"value_labels" => Dict(2 => "medium", 3 => "high", 1 => "low"))
@test !DataAPI.hasmetadata(d, :col)
@test_throws ArgumentError DataAPI.metadata(d, :col)
end

@testset "readstat sas7bdat" begin
Expand All @@ -146,6 +219,29 @@ end
3 │ c -1000.3 1960-01-01 1960-01-01T00:00:00 1.0 3.0 1960-01-01T00:00:00
4 │ d -1.4 1583-01-01 1583-01-01T00:00:00 2.0 1.0 1960-01-01T16:10:10
5 │ e 1000.3 missing missing 1.0 1.0 missing"""

@test DataAPI.hasmetadata(d)
meta = DataAPI.metadata(d)
@test meta isa Dict{String, Any}
@test meta ==
Dict("file_modified" => DateTime("2018-08-16T15:21:52"),
"file_extension" => ".sas7bdat")
for col in Tables.columnnames(d)
@test DataAPI.hasmetadata(d, col)
colmeta = DataAPI.metadata(d, col)
@test colmeta isa Dict{String, Any}
@test !haskey(colmeta, "label")
@test colmeta["variable_format"] != ""
end
# ReadStat.jl does not handle value labels for SAS at this moment
@test DataAPI.metadata(d, :mylabl) == Dict("variable_format" => "BEST")
@test_broken haskey(DataAPI.metadata(d, :mylabl), "value_label_name")
@test_broken haskey(DataAPI.metadata(d, :mylabl), "value_labels")
@test DataAPI.metadata(d, :myord) == Dict("variable_format" => "BEST")
@test_broken haskey(DataAPI.metadata(d, :myord), "value_label_name")
@test_broken haskey(DataAPI.metadata(d, :myord), "value_labels")
@test !DataAPI.hasmetadata(d, :col)
@test_throws ArgumentError DataAPI.metadata(d, :col)
end

@testset "readstat xpt" begin
Expand All @@ -161,4 +257,27 @@ end
3 │ c -1000.3 1960-01-01 1960-01-01T00:00:00 1.0 3.0 1960-01-01T00:00:00
4 │ d -1.4 1583-01-01 1583-01-01T00:00:00 2.0 1.0 1960-01-01T16:10:10
5 │ e 1000.3 missing missing 1.0 1.0 missing"""

@test DataAPI.hasmetadata(d)
meta = DataAPI.metadata(d)
@test meta isa Dict{String, Any}
@test meta ==
Dict("file_modified" => DateTime("2018-08-14T08:55:46"),
"file_extension" => ".xpt")
for col in Tables.columnnames(d)
@test DataAPI.hasmetadata(d, col)
colmeta = DataAPI.metadata(d, col)
@test colmeta isa Dict{String, Any}
@test !haskey(colmeta, "label")
@test colmeta["variable_format"] != ""
end
# ReadStat.jl does not handle value labels for SAS at this moment
@test DataAPI.metadata(d, :MYLABL) == Dict("variable_format" => "BEST12")
@test_broken haskey(DataAPI.metadata(d, :MYLABL), "value_label_name")
@test_broken haskey(DataAPI.metadata(d, :MYLABL), "value_labels")
@test DataAPI.metadata(d, :MYORD) == Dict("variable_format" => "BEST12")
@test_broken haskey(DataAPI.metadata(d, :MYORD), "value_label_name")
@test_broken haskey(DataAPI.metadata(d, :MYORD), "value_labels")
@test !DataAPI.hasmetadata(d, :col)
@test_throws ArgumentError DataAPI.metadata(d, :col)
end
1 change: 1 addition & 0 deletions test/runtests.jl
Expand Up @@ -2,6 +2,7 @@ using Test
using ReadStatTables

using CategoricalArrays
using DataAPI
using DataFrames
using Dates
using ReadStat
Expand Down
19 changes: 19 additions & 0 deletions test/table.jl
Expand Up @@ -4,12 +4,22 @@
@test length(tb) == 0
@test isempty(tb)
@test sprint(show, MIME("text/plain"), tb) == "0×0 ReadStatTable"
@test !DataAPI.hasmetadata(tb)
@test !DataAPI.hasmetadata(tb, :col)
@test_throws ArgumentError DataAPI.metadata(tb)
@test_throws ArgumentError DataAPI.metadata(tb, :col)

tb = ReadStatTable(AbstractVector[[]], Symbol[:c])
@test size(tb) == (0, 1)
@test length(tb) == 1
@test isempty(tb)
@test sprint(show, MIME("text/plain"), tb) == "0×1 ReadStatTable"
@test !DataAPI.hasmetadata(tb)
@test !DataAPI.hasmetadata(tb, :col)
@test !DataAPI.hasmetadata(tb, :c)
@test_throws ArgumentError DataAPI.metadata(tb)
@test_throws ArgumentError DataAPI.metadata(tb, :col)
@test_throws ArgumentError DataAPI.metadata(tb, :c)

c1 = collect(1:10)
c2 = collect(10.0:-1.0:1.0)
Expand Down Expand Up @@ -58,4 +68,13 @@
@test filelabel(tb) === nothing
@test filetimestamp(tb) === nothing
@test fileext(tb) === nothing

@test !DataAPI.hasmetadata(tb)
@test !DataAPI.hasmetadata(tb, :col)
@test !DataAPI.hasmetadata(tb, :c1)
@test !DataAPI.hasmetadata(tb, :c2)
@test_throws ArgumentError DataAPI.metadata(tb)
@test_throws ArgumentError DataAPI.metadata(tb, :col)
@test_throws ArgumentError DataAPI.metadata(tb, :c1)
@test_throws ArgumentError DataAPI.metadata(tb, :c2)
end