Skip to content

Commit

Permalink
Slightly less crappy screenshot performance (#1206)
Browse files Browse the repository at this point in the history
* Support encoding PNGs in something other than RGBA
* Make screenshots slightly less crappy
Defer the actual copy to our usual blit methods
And do that in a pixel format closest to the input format
* Use calloc instead of malloc + memset when creating a BB
* Drop redundant cdefs
* Move strcoll wrapper shenanigans to ffi/util
  • Loading branch information
NiLuJe committed Oct 5, 2020
1 parent f6448ab commit 5cc289e
Show file tree
Hide file tree
Showing 8 changed files with 186 additions and 27 deletions.
1 change: 1 addition & 0 deletions ffi-cdecl/lodepng_decl.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ cdecl_func(lodepng_decode24)
cdecl_func(lodepng_decode_memory)
cdecl_func(lodepng_decode_file)
cdecl_func(lodepng_encode32_file)
cdecl_func(lodepng_encode_file)
cdecl_func(lodepng_state_init)
cdecl_func(lodepng_state_cleanup)
cdecl_func(lodepng_state_copy)
Expand Down
9 changes: 8 additions & 1 deletion ffi-cdecl/posix_decl.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include <sys/mman.h>
#include <stropts.h>
//#include <stropts.h>
#include <unistd.h>
#include <sys/stat.h>
#include <fcntl.h>
Expand Down Expand Up @@ -71,11 +71,18 @@ cdecl_func(basename) // NOTE: We'll want the GNU one (c.f., https://github.com/k
cdecl_func(dirname)

cdecl_func(malloc)
cdecl_func(calloc)
cdecl_func(free)
cdecl_func(memset)

cdecl_func(strdup)
cdecl_func(strndup)
cdecl_func(strcoll)
cdecl_func(strcmp)
cdecl_func(strcasecmp)

cdecl_const(F_OK)
cdecl_func(access)

cdecl_func(fopen)
cdecl_func(fclose)
Expand Down
136 changes: 113 additions & 23 deletions ffi/blitbuffer.lua
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,6 @@ void BB_invert_blit_from(BlitBuffer *dest, BlitBuffer *source, int dest_x, int d
int offs_x, int offs_y, int w, int h);
void BB_color_blit_from(BlitBuffer *dest, BlitBuffer *source, int dest_x, int dest_y,
int offs_x, int offs_y, int w, int h, Color8A *color);

void *malloc(int size);
void free(void *ptr);
]]

-- NOTE: This works-around a number of corner-cases which may end up with LuaJIT's optimizer blacklisting our inner loops,
Expand Down Expand Up @@ -1734,37 +1731,131 @@ function BB_mt.__index:viewport(x, y, w, h)
end

--[[
write blitbuffer contents to a PNG file
write blitbuffer contents to a PNG file (in a PNG pixel format as close as possible as the input one)
@param filename the name of the file to be created
--]]
local Png -- lazy load ffi/png
function BB_mt.__index:writePNG(filename, bgr)

function BB4_mt.__index:writePNG(filename)
if not Png then Png = require("ffi/png") end
local hook, mask, _ = debug.gethook()
debug.sethook()

local w, h = self:getWidth(), self:getHeight()
-- Convert to Y8, I'm not sure how 4-bit grayscale works in PNG...
local bbdump = BB.new(w, h, TYPE_BB8, nil, w, w)
bbdump:blitFrom(self)

Png.encodeToFile(filename, ffi.cast("const unsigned char*", bbdump.data), w, h, 1)
bbdump:free()
debug.sethook(hook, mask)
end

function BB8_mt.__index:writePNG(filename)
if not Png then Png = require("ffi/png") end
local hook, mask, _ = debug.gethook()
debug.sethook()

local w, h = self:getWidth(), self:getHeight()
-- Create a copy of the input BB, but with no padding and no soft rotation.
-- NOTE: We've tried feeding self.data directly to LodePNG when it would be possible (i.e., rota 0, w == pixel_stride),
-- and it turned out to be hilariously slower. Cache trashing?
local bbdump = BB.new(w, h, TYPE_BB8, nil, w, w)
bbdump:blitFrom(self)

Png.encodeToFile(filename, ffi.cast("const unsigned char*", bbdump.data), w, h, 1)
bbdump:free()
debug.sethook(hook, mask)
end

function BB8A_mt.__index:writePNG(filename)
if not Png then Png = require("ffi/png") end
local hook, mask, _ = debug.gethook()
debug.sethook()

local w, h = self:getWidth(), self:getHeight()
local cdata = C.malloc(w * h * 4)
-- Create a copy of the input BB, but with no padding and no soft rotation.
local bbdump = BB.new(w, h, TYPE_BB8A, nil, w * 2, w)
bbdump:blitFrom(self)

Png.encodeToFile(filename, ffi.cast("const unsigned char*", bbdump.data), w, h, 2)
bbdump:free()
debug.sethook(hook, mask)
end

function BBRGB16_mt.__index:writePNG(filename)
if not Png then Png = require("ffi/png") end
local hook, mask, _ = debug.gethook()
debug.sethook()

local w, h = self:getWidth(), self:getHeight()
-- RGB565 is the worst, convert to RGB24
local bbdump = BB.new(w, h, TYPE_BBRGB24, nil, w * 3, w)
bbdump:blitFrom(self)

Png.encodeToFile(filename, ffi.cast("const unsigned char*", bbdump.data), w, h, 3)
bbdump:free()
debug.sethook(hook, mask)
end

function BBRGB24_mt.__index:writePNG(filename, bgr)
-- If input is BGR, devolve straight away to the crap fallback...
if bgr then return self:writePNGFromBGR(filename) end

if not Png then Png = require("ffi/png") end
local hook, mask, _ = debug.gethook()
debug.sethook()

local w, h = self:getWidth(), self:getHeight()
-- Create a copy of the input BB, but with no padding and no soft rotation.
local bbdump = BB.new(w, h, TYPE_BBRGB24, nil, w * 3, w)
bbdump:blitFrom(self)

Png.encodeToFile(filename, ffi.cast("const unsigned char*", bbdump.data), w, h, 3)
bbdump:free()
debug.sethook(hook, mask)
end

function BBRGB32_mt.__index:writePNG(filename, bgr)
-- If input is BGR, devolve straight away to the crap fallback...
if bgr then return self:writePNGFromBGR(filename) end

if not Png then Png = require("ffi/png") end
local hook, mask, _ = debug.gethook()
debug.sethook()

local w, h = self:getWidth(), self:getHeight()
-- Create a copy of the input BB, but with no padding and no soft rotation.
local bbdump = BB.new(w, h, TYPE_BBRGB32, nil, w * 4, w)
bbdump:blitFrom(self)

Png.encodeToFile(filename, ffi.cast("const unsigned char*", bbdump.data), w, h, 4)
bbdump:free()
debug.sethook(hook, mask)
end

-- Crap manual fallback when a have a BGR <-> RGB swap to handle...
function BB_mt.__index:writePNGFromBGR(filename)
if not Png then Png = require("ffi/png") end
local hook, mask, _ = debug.gethook()
debug.sethook()
local w, h = self:getWidth(), self:getHeight()
local stride = w * 3
local cdata = C.malloc(stride * h)
local mem = ffi.cast("char*", cdata)
for y = 0, h-1 do
local offset = 4 * w * y
local offset = stride * y
for x = 0, w-1 do
local c = self:getPixel(x, y):getColorRGB32()
-- NOTE: Kobo's FB is BGR(A), we already trick MuPDF into doing it that way for us, so, keep faking it here!
if bgr then
mem[offset] = c.b
mem[offset + 1] = c.g
mem[offset + 2] = c.r
else
mem[offset] = c.r
mem[offset + 1] = c.g
mem[offset + 2] = c.b
end
mem[offset + 3] = 0xFF
offset = offset + 4
local c = self:getPixel(x, y):getColorRGB24()
-- NOTE: Thankfully, this crap fallback is only ever used on BGR fbs, so, no branching here...
mem[offset] = c.b
mem[offset + 1] = c.g
mem[offset + 2] = c.r
offset = offset + 3
end
end
Png.encodeToFile(filename, mem, w, h)
Png.encodeToFile(filename, mem, w, h, 3)
C.free(cdata)
debug.sethook(hook, mask)
end
Expand Down Expand Up @@ -1818,9 +1909,8 @@ function BB.new(width, height, buffertype, dataptr, stride, pixel_stride)
end
bb:setType(buffertype)
if dataptr == nil then
dataptr = C.malloc(stride*height)
dataptr = C.calloc(stride*height, 1)
assert(dataptr, "cannot allocate memory for blitbuffer")
ffi.fill(dataptr, stride*height)
bb:setAllocated(1)
end
bb.data = ffi.cast(bb.data, dataptr)
Expand Down
1 change: 1 addition & 0 deletions ffi/lodepng_h.lua
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ unsigned int lodepng_decode24(unsigned char **, unsigned int *, unsigned int *,
unsigned int lodepng_decode_memory(unsigned char **, unsigned int *, unsigned int *, const unsigned char *, size_t, LodePNGColorType, unsigned int);
unsigned int lodepng_decode_file(unsigned char **, unsigned int *, unsigned int *, const char *, LodePNGColorType, unsigned int);
unsigned int lodepng_encode32_file(const char *, const unsigned char *, unsigned int, unsigned int);
unsigned int lodepng_encode_file(const char *, const unsigned char *, unsigned int, unsigned int, LodePNGColorType, unsigned int);
void lodepng_state_init(LodePNGState *);
void lodepng_state_cleanup(LodePNGState *);
void lodepng_state_copy(LodePNGState *, const LodePNGState *);
Expand Down
22 changes: 20 additions & 2 deletions ffi/png.lua
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,26 @@ end

local Png = {}

function Png.encodeToFile(filename, mem, w, h)
local err = lodepng.lodepng_encode32_file(filename, mem, w, h)
function Png.encodeToFile(filename, mem, w, h, n)

-- We'll always want 8-bits per component
local bitdepth = 8

-- Devise the output color type based on the number of components passed
local colortype
if n == 1 then
colortype = lodepng.LCT_GREY
elseif n == 2 then
colortype = lodepng.LCT_GREY_ALPHA
elseif n == 3 then
colortype = lodepng.LCT_RGB
elseif n == 4 then
colortype = lodepng.LCT_RGBA
else
return false, "passed an invalid number of color components"
end

local err = lodepng.lodepng_encode_file(filename, mem, w, h, colortype, bitdepth)
if err ~= 0 then
local err_msg = lodepng.lodepng_error_text(err)
return false, err_msg
Expand Down
6 changes: 6 additions & 0 deletions ffi/posix_h.lua
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,16 @@ char *realpath(const char *restrict, char *restrict) __attribute__((nothrow, lea
char *basename(char *) __attribute__((nothrow, leaf));
char *dirname(char *) __attribute__((nothrow, leaf));
void *malloc(size_t) __attribute__((malloc, leaf, nothrow));
void *calloc(size_t, size_t) __attribute__((malloc, leaf, nothrow));
void free(void *) __attribute__((leaf, nothrow));
void *memset(void *, int, size_t) __attribute__((leaf, nothrow));
char *strdup(const char *) __attribute__((malloc, leaf, nothrow));
char *strndup(const char *, size_t) __attribute__((malloc, leaf, nothrow));
int strcoll(const char *, const char *) __attribute__((nothrow, leaf, pure));
int strcmp(const char *, const char *) __attribute__((pure, leaf, nothrow));
int strcasecmp(const char *, const char *) __attribute__((pure, leaf, nothrow));
static const int F_OK = 0;
int access(const char *, int) __attribute__((nothrow, leaf));
struct _IO_FILE *fopen(const char *restrict, const char *restrict);
int fclose(struct _IO_FILE *);
int printf(const char *, ...);
Expand Down
36 changes: 36 additions & 0 deletions ffi/util.lua
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,42 @@ function util.df(path)
tonumber(statvfs.f_bfree * statvfs.f_bsize)
end

--- Wrapper for C.strcoll.
-- string sort function respecting LC_COLLATE
local function strcoll(str1, str2)
return C.strcoll(str1, str2) < 0
end

function util.strcoll(str1, str2)
-- lookup strcoll implementation on first use to avoid circular require
local strcoll_func = strcoll

-- Some devices lack compiled locales (Hi, Kobo!), preventing strcoll from behaving sanely. See issue koreader/koreader#686
if jit.os == "Linux" and C.access("/usr/lib/locale/locale-archive", C.F_OK) ~= 0 then
strcoll_func = function(a, b)
return a < b
end
end

-- patch real strcoll implementation
util.strcoll = function(a, b)
if a == nil and b == nil then
return false
elseif a == nil then
return true
elseif b == nil then
return false
elseif DALPHA_SORT_CASE_INSENSITIVE then
return strcoll_func(string.lower(a), string.lower(b))
else
return strcoll_func(a, b)
end
end

-- delegate to real strcoll implementation
return util.strcoll(str1, str2)
end

--- Wrapper for C.realpath.
function util.realpath(path)
local buffer = ffi.new("char[?]", C.PATH_MAX)
Expand Down
2 changes: 1 addition & 1 deletion spec/unit/png_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ describe("Png module", function()
end
end

ok = Png.encodeToFile(fn, mem, w, h)
ok = Png.encodeToFile(fn, mem, w, h, 4)
ffi.C.free(cdata)
assert.are.same(ok, true)

Expand Down

0 comments on commit 5cc289e

Please sign in to comment.