Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions experiments/svg_cairo.nim
Original file line number Diff line number Diff line change
Expand Up @@ -408,8 +408,8 @@ proc draw(img: ptr Context, node: XmlNode, ctxStack: var seq[Ctx]) =
let points = points.split(" ")
if points.len mod 2 != 0:
failInvalid()
for i in countup(0, points.len - 2, 2):
vecs.add(vec2(parseFloat(points[i]), parseFloat(points[i + 1])))
for i in 0 ..< points.len div 2:
vecs.add(vec2(parseFloat(points[i * 2]), parseFloat(points[i * 2 + 1])))

if vecs.len == 0:
failInvalid()
Expand Down
4 changes: 2 additions & 2 deletions src/pixie/fileformats/svg.nim
Original file line number Diff line number Diff line change
Expand Up @@ -410,8 +410,8 @@ proc drawInternal(img: Image, node: XmlNode, ctxStack: var seq[Ctx]) =
let points = points.split(" ")
if points.len mod 2 != 0:
failInvalid()
for i in countup(0, points.len - 2, 2):
vecs.add(vec2(parseFloat(points[i]), parseFloat(points[i + 1])))
for i in 0 ..< points.len div 2:
vecs.add(vec2(parseFloat(points[i * 2]), parseFloat(points[i * 2 + 1])))

if vecs.len == 0:
failInvalid()
Expand Down
74 changes: 38 additions & 36 deletions src/pixie/images.nim
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,19 @@ proc newImage*(mask: Mask): Image {.raises: [PixieError].} =
result = newImage(mask.width, mask.height)
var i: int
when defined(amd64) and not defined(pixieNoSimd):
for _ in countup(0, mask.data.len - 16, 4):
var alphas = unpackAlphaValues(mm_loadu_si128(mask.data[i].addr))
alphas = mm_or_si128(alphas, mm_srli_epi32(alphas, 8))
alphas = mm_or_si128(alphas, mm_srli_epi32(alphas, 16))
mm_storeu_si128(result.data[i].addr, alphas)
i += 4
for _ in 0 ..< mask.data.len div 16:
var alphas = mm_loadu_si128(mask.data[i].addr)
for j in 0 ..< 4:
var unpacked = unpackAlphaValues(alphas)
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 8))
unpacked = mm_or_si128(unpacked, mm_srli_epi32(unpacked, 16))
mm_storeu_si128(result.data[i + j * 4].addr, unpacked)
alphas = mm_srli_si128(alphas, 4)
i += 16

for i in i ..< mask.data.len:
let v = mask.data[i]
result.data[i] = rgbx(v, v, v, v)
for j in i ..< mask.data.len:
let v = mask.data[j]
result.data[j] = rgbx(v, v, v, v)

proc copy*(image: Image): Image {.raises: [PixieError].} =
## Copies the image data into a new image.
Expand Down Expand Up @@ -104,19 +107,19 @@ proc fillUnsafe*(
var i = start
when defined(amd64) and not defined(pixieNoSimd):
# When supported, SIMD fill until we run out of room
let m = mm_set1_epi32(cast[int32](rgbx))
for j in countup(i, start + len - 8, 8):
mm_storeu_si128(data[j].addr, m)
mm_storeu_si128(data[j + 4].addr, m)
let colorVec = mm_set1_epi32(cast[int32](rgbx))
for _ in 0 ..< len div 8:
mm_storeu_si128(data[i + 0].addr, colorVec)
mm_storeu_si128(data[i + 4].addr, colorVec)
i += 8
else:
when sizeof(int) == 8:
# Fill 8 bytes at a time when possible
let
u32 = cast[uint32](rgbx)
u64 = cast[uint64]([u32, u32])
for j in countup(i, start + len - 2, 2):
cast[ptr uint64](data[j].addr)[] = u64
for _ in 0 ..< len div 2:
cast[ptr uint64](data[i].addr)[] = u64
i += 2
# Fill whatever is left the slow way
for j in i ..< start + len:
Expand All @@ -135,10 +138,10 @@ proc isOneColor*(image: Image): bool {.raises: [].} =
var i: int
when defined(amd64) and not defined(pixieNoSimd):
let colorVec = mm_set1_epi32(cast[int32](color))
for j in countup(0, image.data.len - 8, 8):
for _ in 0 ..< image.data.len div 8:
let
values0 = mm_loadu_si128(image.data[j].addr)
values1 = mm_loadu_si128(image.data[j + 4].addr)
values0 = mm_loadu_si128(image.data[i + 0].addr)
values1 = mm_loadu_si128(image.data[i + 4].addr)
mask0 = mm_movemask_epi8(mm_cmpeq_epi8(values0, colorVec))
mask1 = mm_movemask_epi8(mm_cmpeq_epi8(values1, colorVec))
if mask0 != uint16.high.int or mask1 != uint16.high.int:
Expand All @@ -155,17 +158,17 @@ proc isTransparent*(image: Image): bool {.raises: [].} =

var i: int
when defined(amd64) and not defined(pixieNoSimd):
let transparent = mm_setzero_si128()
for j in countup(0, image.data.len - 16, 16):
let zeroVec = mm_setzero_si128()
for _ in 0 ..< image.data.len div 16:
let
values0 = mm_loadu_si128(image.data[j].addr)
values1 = mm_loadu_si128(image.data[j + 4].addr)
values2 = mm_loadu_si128(image.data[j + 8].addr)
values3 = mm_loadu_si128(image.data[j + 12].addr)
values0 = mm_loadu_si128(image.data[i + 0].addr)
values1 = mm_loadu_si128(image.data[i + 4].addr)
values2 = mm_loadu_si128(image.data[i + 8].addr)
values3 = mm_loadu_si128(image.data[i + 12].addr)
values01 = mm_or_si128(values0, values1)
values23 = mm_or_si128(values2, values3)
values = mm_or_si128(values01, values23)
mask = mm_movemask_epi8(mm_cmpeq_epi8(values, transparent))
mask = mm_movemask_epi8(mm_cmpeq_epi8(values, zeroVec))
if mask != uint16.high.int:
return false
i += 16
Expand Down Expand Up @@ -416,9 +419,8 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} =
let
oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081))
vOpacity = mm_slli_epi16(mm_set1_epi16(cast[int16](opacity)), 8)

for _ in countup(0, byteLen - 16, 16):
opacityVec = mm_slli_epi16(mm_set1_epi16(cast[int16](opacity)), 8)
for _ in 0 ..< byteLen div 16:
when type(target) is Image:
let index = i div 4
else:
Expand All @@ -433,8 +435,8 @@ proc applyOpacity*(target: Image | Mask, opacity: float32) {.raises: [].} =
valuesOdd = mm_and_si128(values, oddMask)

# values * opacity
valuesEven = mm_mulhi_epu16(valuesEven, vOpacity)
valuesOdd = mm_mulhi_epu16(valuesOdd, vOpacity)
valuesEven = mm_mulhi_epu16(valuesEven, opacityVec)
valuesOdd = mm_mulhi_epu16(valuesOdd, opacityVec)

# div 255
valuesEven = mm_srli_epi16(mm_mulhi_epu16(valuesEven, div255), 7)
Expand Down Expand Up @@ -465,21 +467,21 @@ proc invert*(target: Image | Mask) {.raises: [].} =
## Inverts all of the colors and alpha.
var i: int
when defined(amd64) and not defined(pixieNoSimd):
let v255 = mm_set1_epi8(cast[int8](255))
let vec255 = mm_set1_epi8(cast[int8](255))

when type(target) is Image:
let byteLen = target.data.len * 4
else:
let byteLen = target.data.len

for _ in countup(0, byteLen - 16, 16):
for _ in 0 ..< byteLen div 16:
when type(target) is Image:
let index = i div 4
else:
let index = i

var values = mm_loadu_si128(target.data[index].addr)
values = mm_sub_epi8(v255, values)
values = mm_sub_epi8(vec255, values)
mm_storeu_si128(target.data[index].addr, values)

i += 16
Expand Down Expand Up @@ -568,7 +570,7 @@ proc newMask*(image: Image): Mask {.raises: [PixieError].} =

var i: int
when defined(amd64) and not defined(pixieNoSimd):
for _ in countup(0, image.data.len - 16, 16):
for _ in 0 ..< image.data.len div 16:
var
a = mm_loadu_si128(image.data[i + 0].addr)
b = mm_loadu_si128(image.data[i + 4].addr)
Expand Down Expand Up @@ -817,7 +819,7 @@ proc drawUber(
when type(a) is Image:
if blendMode.hasSimdBlender():
let blenderSimd = blendMode.blenderSimd()
for _ in countup(x, xMax - 16, 16):
for _ in 0 ..< (xMax - xMin) div 16:
let
srcPos = p + dx * x.float32 + dy * y.float32
sx = srcPos.x.int
Expand Down Expand Up @@ -847,7 +849,7 @@ proc drawUber(
else: # is a Mask
if blendMode.hasSimdMasker():
let maskerSimd = blendMode.maskerSimd()
for _ in countup(x, xMax - 16, 16):
for _ in 0 ..< (xMax - xMin) div 16:
let
srcPos = p + dx * x.float32 + dy * y.float32
sx = srcPos.x.int
Expand Down
3 changes: 1 addition & 2 deletions src/pixie/internal.nim
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ proc toPremultipliedAlpha*(data: var seq[ColorRGBA | ColorRGBX]) {.raises: [].}
notAlphaMask = mm_set1_epi32(0x00ffffff)
oddMask = mm_set1_epi16(cast[int16](0xff00))
div255 = mm_set1_epi16(cast[int16](0x8081))

for _ in countup(i, data.len - 4, 4):
for _ in 0 ..< data.len div 4:
var
color = mm_loadu_si128(data[i].addr)
alpha = mm_and_si128(color, alphaMask)
Expand Down
10 changes: 5 additions & 5 deletions src/pixie/masks.nim
Original file line number Diff line number Diff line change
Expand Up @@ -246,12 +246,12 @@ proc ceil*(mask: Mask) {.raises: [].} =
var i: int
when defined(amd64) and not defined(pixieNoSimd):
let
vZero = mm_setzero_si128()
vMax = mm_set1_epi32(cast[int32](uint32.high))
for _ in countup(0, mask.data.len - 16, 16):
zeroVec = mm_setzero_si128()
vec255 = mm_set1_epi32(cast[int32](uint32.high))
for _ in 0 ..< mask.data.len div 16:
var values = mm_loadu_si128(mask.data[i].addr)
values = mm_cmpeq_epi8(values, vZero)
values = mm_andnot_si128(values, vMax)
values = mm_cmpeq_epi8(values, zeroVec)
values = mm_andnot_si128(values, vec255)
mm_storeu_si128(mask.data[i].addr, values)
i += 16

Expand Down
14 changes: 7 additions & 7 deletions src/pixie/paths.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1258,10 +1258,10 @@ proc computeCoverages(
var i = fillStart
when defined(amd64) and not defined(pixieNoSimd):
let sampleCoverageVec = mm_set1_epi8(cast[int8](sampleCoverage))
for j in countup(i, fillStart + fillLen - 16, 16):
var coverage = mm_loadu_si128(coverages[j - startX].addr)
for _ in 0 ..< fillLen div 16:
var coverage = mm_loadu_si128(coverages[i - startX].addr)
coverage = mm_add_epi8(coverage, sampleCoverageVec)
mm_storeu_si128(coverages[j - startX].addr, coverage)
mm_storeu_si128(coverages[i - startX].addr, coverage)
i += 16
for j in i ..< fillStart + fillLen:
coverages[j - startX] += sampleCoverage
Expand Down Expand Up @@ -1296,7 +1296,7 @@ proc fillCoverage(
vec255 = mm_set1_epi32(cast[int32](uint32.high))
zeroVec = mm_setzero_si128()
colorVec = mm_set1_epi32(cast[int32](rgbx))
for _ in countup(x, startX + coverages.len - 16, 16):
for _ in 0 ..< coverages.len div 16:
let
index = image.dataIndex(x, y)
coverage = mm_loadu_si128(coverages[x - startX].unsafeAddr)
Expand Down Expand Up @@ -1386,7 +1386,7 @@ proc fillCoverage(
let
maskerSimd = blendMode.maskerSimd()
zeroVec = mm_setzero_si128()
for _ in countup(x, startX + coverages.len - 16, 16):
for _ in 0 ..< coverages.len div 16:
let
index = mask.dataIndex(x, y)
coverage = mm_loadu_si128(coverages[x - startX].unsafeAddr)
Expand Down Expand Up @@ -1448,7 +1448,7 @@ proc fillHits(
let
blenderSimd = blendMode.blenderSimd()
colorVec = mm_set1_epi32(cast[int32](rgbx))
for _ in countup(fillStart, fillLen - 16, 16):
for _ in 0 ..< fillLen div 16:
let index = image.dataIndex(x, y)
for i in 0 ..< 4:
let backdrop = mm_loadu_si128(image.data[index + i * 4].addr)
Expand Down Expand Up @@ -1497,7 +1497,7 @@ proc fillHits(
let
maskerSimd = blendMode.maskerSimd()
valueVec = mm_set1_epi8(cast[int8](255))
for _ in countup(fillStart, fillLen - 16, 16):
for _ in 0 ..< fillLen div 16:
let backdrop = mm_loadu_si128(mask.data[mask.dataIndex(x, y)].addr)
mm_storeu_si128(
mask.data[mask.dataIndex(x, y)].addr,
Expand Down
4 changes: 3 additions & 1 deletion tests/benchmark_blends.nim
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,10 @@ when defined(amd64) and not defined(pixieNoSimd):
reset()

timeIt "blendNormal [simd]":
for i in countup(0, backdrop.data.len - 4, 4):
var i: int
while i < backdrop.data.len - 4:
let
b = mm_loadu_si128(backdrop.data[i].addr)
s = mm_loadu_si128(source.data[i].addr)
mm_storeu_si128(backdrop.data[i].addr, blendNormalSimd(b, s))
i += 4