Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

calibre sax json parser #11922

Merged
merged 9 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion base
51 changes: 51 additions & 0 deletions plugins/calibre.koplugin/main.lua
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@ function Calibre:addToMainMenu(menu_items)
keep_menu_open = true,
sub_item_table = self:getWirelessMenuTable(),
},
{
text = _("JSON parser"),
keep_menu_open = true,
sub_item_table = self:getParserMenuTable(),
},
}
}
-- insert the metadata search
Expand Down Expand Up @@ -414,4 +419,50 @@ function Calibre:getWirelessMenuTable()
return t
end

function Calibre:getParserMenuTable()
return {
{
text = _("Automatic"),
help_text = _("The program will decide based on the size of the JSON file. Recommended"),
checked_func = function()
return G_reader_settings:hasNot("calibre_json_parser")
end,
callback = function()
G_reader_settings:delSetting("calibre_json_parser")
end,
},
{
text = _("Fast"),
help_text = _("Faster parsing, but may not take too kindly to malformed input files"),
checked_func = function()
return G_reader_settings:readSetting("calibre_json_parser") == "fast"
end,
callback = function()
G_reader_settings:saveSetting("calibre_json_parser", "fast")
end,
},
{
text = _("Safe"),
help_text = _("Slower, but safer. Useful if you're experiencing problems with the other modes"),
checked_func = function()
return G_reader_settings:readSetting("calibre_json_parser") == "safe"
end,
callback = function()
G_reader_settings:saveSetting("calibre_json_parser", "safe")
end,
},
{
text = _("Legacy"),
help_text = _("Fast, but requires more RAM, only recommended on modest library sizes (or beefier devices)"),
checked_func = function()
return G_reader_settings:readSetting("calibre_json_parser") == "legacy"
end,
callback = function()
G_reader_settings:saveSetting("calibre_json_parser", "legacy")
end,
},
}
end


return Calibre
13 changes: 8 additions & 5 deletions plugins/calibre.koplugin/metadata.lua
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ local function slim(book, is_search)
return slim_book
end

-- this is the max file size we attempt to decode using json. For larger
-- files we want to attempt to manually parse the file to avoid OOM errors
local MAX_JSON_FILESIZE = 30 * 1000 * 1000
-- This is the max file size we attempt to decode using rapidjson.
-- For larger files we use a sax parser to avoid OOM errors
local MAX_JSON_FILESIZE = 50 * 1024 * 1024

--- find calibre files for a given dir
local function findCalibreFiles(dir)
Expand Down Expand Up @@ -121,8 +121,11 @@ function CalibreMetadata:loadBookList()
return {}
end
local books, err
if attr.size > MAX_JSON_FILESIZE then
books, err = parser.parseFile(self.metadata)
local impl = G_reader_settings:readSetting("calibre_json_parser") or attr.size > MAX_JSON_FILESIZE and "safe" or "fast"
if impl == "fast" then
books, err = rapidjson.load_calibre(self.metadata)
elseif impl == "safe" then
books, err = parser.parseFile(self.metadata)
else
books, err = rapidjson.load(self.metadata)
end
Expand Down
154 changes: 80 additions & 74 deletions plugins/calibre.koplugin/parser.lua
Original file line number Diff line number Diff line change
@@ -1,90 +1,96 @@
-- A parser for metadata.calibre
local util = require("util")
-- parse "metadata.calibre" files
local lj = require("lunajson")

-- removes leading and closing characters and converts hex-unicodes
local function replaceHexChars(s, n, j)
local l = string.len(s)
if string.sub(s, l, l) == "\"" then
s = string.sub(s, n, string.len(s)-1)
else
s = string.sub(s, n, string.len(s)-j)
end
s = string.gsub(s, "\\u([a-f0-9][a-f0-9][a-f0-9][a-f0-9])", function(w)
return util.unicodeCodepointToUtf8(tonumber(w, 16))
end)
return s
end
local array_fields = {
authors = true,
tags = true,
series = true,
}

-- a couple of string helper functions for dealing with raw json strings
local function isEqual(str, key)
if str:sub(1, key:len() + 6) == string.format(" \"%s\"", key) then
return true
end
return false
end
local required_fields = {
authors = true,
last_modified = true,
lpath = true,
series = true,
series_index = true,
size = true,
tags = true,
title = true,
uuid = true,
}

local function getValue(str, key)
if str == string.format(" \"%s\": null, ", key) then
return nil
local field
local t = {}
local function append(v)
-- Some fields *may* be arrays, so check whether we ran through startarray first or not
if t[field] then
table.insert(t[field], v)
else
return replaceHexChars(str, key:len() + 10, key == "series_index" and 2 or 3)
t[field] = v
field = nil
end
end

local jsonStr = getmetatable("")
jsonStr.__index["equals"] = isEqual
jsonStr.__index["value"] = getValue
local depth = 0
local result = {}
local sax = {
startobject = function()
depth = depth + 1
end,
endobject = function()
if depth == 1 then
table.insert(result, t)
t = {}
end
depth = depth - 1
end,
startarray = function()
if array_fields[field] then
t[field] = {}
end
end,
endarray = function()
if field then
field = nil
end
end,
key = function(s)
if required_fields[s] then
field = s
end
end,
string = function(s)
if field then
append(s)
end
end,
number = function(n)
if field then
append(n)
end
end,
boolean = function(b)
if field then
append(b)
end
end,
}

local function parse_unsafe(path)
local p = lj.newfileparser(path, sax)
p.run()
end

local parser = {}

-- read metadata from file, line by line, and keep just the data we need
function parser.parseFile(file)
assert(type(file) == "string", "wrong type (expected a string")
local f, err = io.open(file, "rb")
if not f then
return nil, string.format("error parsing %s: %s", file, err)
end
f:close()
local add = function(t, line)
if type(t) ~= "table" or type(line) ~= "string" then
return {}
end
line = replaceHexChars(line, 8, 3)
table.insert(t, #t + 1, line)
return t
end
local books, book = {}, {}
local is_author, is_tag = false, false
for line in io.lines(file) do
if line == " }, " or line == " }" then
if type(book) == "table" then
table.insert(books, #books + 1, book)
end
book = {}
elseif line == " \"authors\": [" then
is_author = true
elseif line == " \"tags\": [" then
is_tag = true
elseif line == " ], " or line == " ]" then
is_author, is_tag = false, false
else
for _, key in ipairs({"title", "uuid", "lpath", "size",
"last_modified", "series", "series_index"})
do
if line:equals(key) then
book[key] = line:value(key)
break
end
end
end
if is_author then
book.authors = add(book.authors, line)
elseif is_tag then
book.tags = add(book.tags, line)
end
result = {}
local ok, err = pcall(parse_unsafe, file)
field = nil
if not ok then
return nil, err
end
return books
return result
end

return parser