From 9a784b182289ade2ea131d61f086806bb43c64fc Mon Sep 17 00:00:00 2001 From: Eric Jensen Date: Fri, 21 Oct 2022 16:15:36 -0400 Subject: [PATCH 1/5] Improve the speed of comparing memory buffers by using a workaround to a missed compiler optimization --- Sources/SnapshotTesting/Snapshotting/NSImage.swift | 2 +- Sources/SnapshotTesting/Snapshotting/UIImage.swift | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Sources/SnapshotTesting/Snapshotting/NSImage.swift b/Sources/SnapshotTesting/Snapshotting/NSImage.swift index 620556e6c..9b1989f6d 100644 --- a/Sources/SnapshotTesting/Snapshotting/NSImage.swift +++ b/Sources/SnapshotTesting/Snapshotting/NSImage.swift @@ -104,7 +104,7 @@ private func compare(_ old: NSImage, _ new: NSImage, precision: Float, perceptua let newRep = NSBitmapImageRep(cgImage: newerCgImage).bitmapData! let byteCountThreshold = Int((1 - precision) * Float(byteCount)) var differentByteCount = 0 - for offset in 0.., _ body: (Int) -> Void) { + var index = range.lowerBound + while index < range.upperBound { + body(index) + index += 1 + } +} From ad051b1833ae6b0190406bbbf0d433aabc06467b Mon Sep 17 00:00:00 2001 From: Eric Jensen Date: Mon, 26 Sep 2022 22:07:58 -0400 Subject: [PATCH 2/5] =?UTF-8?q?Remove=20Metal=20usage=20since=20some=20vir?= =?UTF-8?q?tualized=20environments=20don=E2=80=99t=20support=20it?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the CoreImage operations that require Metal with CPU-based calculations --- .../Snapshotting/UIImage.swift | 120 ++++++++---------- 1 file changed, 52 insertions(+), 68 deletions(-) diff --git a/Sources/SnapshotTesting/Snapshotting/UIImage.swift b/Sources/SnapshotTesting/Snapshotting/UIImage.swift index 9415136c4..7bf74253d 100644 --- a/Sources/SnapshotTesting/Snapshotting/UIImage.swift +++ b/Sources/SnapshotTesting/Snapshotting/UIImage.swift @@ -169,83 +169,67 @@ private func diff(_ old: UIImage, _ new: UIImage) -> UIImage { #endif #if os(iOS) || os(tvOS) || os(macOS) -import CoreImage.CIKernel -import MetalPerformanceShaders +import Accelerate.vImage +import CoreImage -@available(iOS 10.0, tvOS 10.0, macOS 10.13, *) func perceptuallyCompare(_ old: CIImage, _ new: CIImage, pixelPrecision: Float, perceptualPrecision: Float) -> String? { + // Calculate the deltaE values. Each pixel is a value between 0-100. + // 0 means no difference, 100 means completely opposite. let deltaOutputImage = old.applyingFilter("CILabDeltaE", parameters: ["inputImage2": new]) - let thresholdOutputImage: CIImage - do { - thresholdOutputImage = try ThresholdImageProcessorKernel.apply( - withExtent: new.extent, - inputs: [deltaOutputImage], - arguments: [ThresholdImageProcessorKernel.inputThresholdKey: (1 - perceptualPrecision) * 100] - ) - } catch { - return "Newly-taken snapshot's data could not be loaded. \(error)" - } - var averagePixel: Float = 0 + // Setting the working color space and output color space to NSNull disables color management. This is appropriate when the output + // of the operations is computational instead of an image intended to be displayed. let context = CIContext(options: [.workingColorSpace: NSNull(), .outputColorSpace: NSNull()]) - context.render( - thresholdOutputImage.applyingFilter("CIAreaAverage", parameters: [kCIInputExtentKey: new.extent]), - toBitmap: &averagePixel, - rowBytes: MemoryLayout.size, - bounds: CGRect(x: 0, y: 0, width: 1, height: 1), - format: .Rf, - colorSpace: nil - ) - let actualPixelPrecision = 1 - averagePixel - guard actualPixelPrecision < pixelPrecision else { return nil } + guard let buffer = deltaOutputImage.render(extent: deltaOutputImage.extent, context: context) else { + return "Newly-taken snapshot could not be processed." + } + defer { buffer.free() } + let deltaThreshold = (1 - perceptualPrecision) * 100 + var failingPixelCount: Int = 0 var maximumDeltaE: Float = 0 - context.render( - deltaOutputImage.applyingFilter("CIAreaMaximum", parameters: [kCIInputExtentKey: new.extent]), - toBitmap: &maximumDeltaE, - rowBytes: MemoryLayout.size, - bounds: CGRect(x: 0, y: 0, width: 1, height: 1), - format: .Rf, - colorSpace: nil - ) - let actualPerceptualPrecision = 1 - maximumDeltaE / 100 - if pixelPrecision < 1 { - return """ - Actual image precision \(actualPixelPrecision) is less than required \(pixelPrecision) - Actual perceptual precision \(actualPerceptualPrecision) is less than required \(perceptualPrecision) - """ - } else { - return "Actual perceptual precision \(actualPerceptualPrecision) is less than required \(perceptualPrecision)" + // rowBytes must be a multiple of 8, so vImage_Buffer pads the end of each row with bytes to meet the multiple of 0 requirement. + // We must do 2D iteration of the vImage_Buffer in order to avoid loading the padding garbage bytes at the end of each row. + fastForEach(in: 0...size + let deltaE = buffer.data.load(fromByteOffset: columnOffset, as: Float.self) + if deltaE > deltaThreshold { + failingPixelCount += 1 + } + maximumDeltaE = max(maximumDeltaE, deltaE) + } } + let failingPixelPercent = Float(failingPixelCount) / Float(deltaOutputImage.extent.width * deltaOutputImage.extent.height) + let actualPixelPrecision = 1 - failingPixelPercent + guard actualPixelPrecision < pixelPrecision else { return nil } + // The actual perceptual precision is the perceptual precision of the pixel with the highest DeltaE. + // DeltaE is in a 0-100 scale, so we need to divide by 100 to transform it to a percentage. + let minimumPerceptualPrecision = 1 - maximumDeltaE / 100 + return """ + Actual image precision \(actualPixelPrecision) is less than required \(pixelPrecision) + Minimum perceptual precision \(minimumPerceptualPrecision) is less than required \(perceptualPrecision) + """ } -// Copied from https://developer.apple.com/documentation/coreimage/ciimageprocessorkernel -@available(iOS 10.0, tvOS 10.0, macOS 10.13, *) -final class ThresholdImageProcessorKernel: CIImageProcessorKernel { - static let inputThresholdKey = "thresholdValue" - static let device = MTLCreateSystemDefaultDevice() - - override class func process(with inputs: [CIImageProcessorInput]?, arguments: [String: Any]?, output: CIImageProcessorOutput) throws { - guard - let device = device, - let commandBuffer = output.metalCommandBuffer, - let input = inputs?.first, - let sourceTexture = input.metalTexture, - let destinationTexture = output.metalTexture, - let thresholdValue = arguments?[inputThresholdKey] as? Float else { - return - } - - let threshold = MPSImageThresholdBinary( - device: device, - thresholdValue: thresholdValue, - maximumValue: 1.0, - linearGrayColorTransform: nil - ) - - threshold.encode( - commandBuffer: commandBuffer, - sourceTexture: sourceTexture, - destinationTexture: destinationTexture +extension CIImage { + func render(extent: CGRect, context: CIContext, format: CIFormat = CIFormat.Rh) -> vImage_Buffer? { + // Some hardware configurations (virtualized CPU renderers) do not support 32-bit float output formats, + // so use a compatible 16-bit float format and convert the output value to 32-bit floats. + guard var buffer16 = try? vImage_Buffer(width: Int(extent.width), height: Int(extent.height), bitsPerPixel: 16) else { return nil } + defer { buffer16.free() } + context.render( + self, + toBitmap: buffer16.data, + rowBytes: buffer16.rowBytes, + bounds: extent, + format: format, + colorSpace: nil ) + guard + var buffer32 = try? vImage_Buffer(width: Int(buffer16.width), height: Int(buffer16.height), bitsPerPixel: 32), + vImageConvert_Planar16FtoPlanarF(&buffer16, &buffer32, 0) == kvImageNoError + else { return nil } + return buffer32 } } #endif From 8d402bf1ef731d784172496768aab17d51f77d80 Mon Sep 17 00:00:00 2001 From: Eric Jensen Date: Wed, 14 Dec 2022 23:20:29 -0500 Subject: [PATCH 3/5] Re-add the Metal-based image comparison Check for support before using and fallback to CPU computation if Metal is not supported --- .../Snapshotting/UIImage.swift | 139 +++++++++++++++--- 1 file changed, 116 insertions(+), 23 deletions(-) diff --git a/Sources/SnapshotTesting/Snapshotting/UIImage.swift b/Sources/SnapshotTesting/Snapshotting/UIImage.swift index 7bf74253d..bb576d109 100644 --- a/Sources/SnapshotTesting/Snapshotting/UIImage.swift +++ b/Sources/SnapshotTesting/Snapshotting/UIImage.swift @@ -170,49 +170,101 @@ private func diff(_ old: UIImage, _ new: UIImage) -> UIImage { #if os(iOS) || os(tvOS) || os(macOS) import Accelerate.vImage -import CoreImage +import CoreImage.CIKernel +import MetalPerformanceShaders func perceptuallyCompare(_ old: CIImage, _ new: CIImage, pixelPrecision: Float, perceptualPrecision: Float) -> String? { // Calculate the deltaE values. Each pixel is a value between 0-100. // 0 means no difference, 100 means completely opposite. - let deltaOutputImage = old.applyingFilter("CILabDeltaE", parameters: ["inputImage2": new]) + let deltaOutputImage = old.applyingLabDeltaE(new) // Setting the working color space and output color space to NSNull disables color management. This is appropriate when the output // of the operations is computational instead of an image intended to be displayed. let context = CIContext(options: [.workingColorSpace: NSNull(), .outputColorSpace: NSNull()]) - guard let buffer = deltaOutputImage.render(extent: deltaOutputImage.extent, context: context) else { - return "Newly-taken snapshot could not be processed." - } - defer { buffer.free() } let deltaThreshold = (1 - perceptualPrecision) * 100 - var failingPixelCount: Int = 0 + let actualPixelPrecision: Float var maximumDeltaE: Float = 0 - // rowBytes must be a multiple of 8, so vImage_Buffer pads the end of each row with bytes to meet the multiple of 0 requirement. - // We must do 2D iteration of the vImage_Buffer in order to avoid loading the padding garbage bytes at the end of each row. - fastForEach(in: 0...size - let deltaE = buffer.data.load(fromByteOffset: columnOffset, as: Float.self) - if deltaE > deltaThreshold { - failingPixelCount += 1 + + // Metal is supported by all iOS/tvOS devices (2013 models or later) and Macs (2012 models or later). + // Older devices do not support iOS/tvOS 13 and macOS 10.15 which are the minimum versions of swift-snapshot-testing. + // However, some virtualized hardware do not have GPUs and therefore do not support Metal. + // In this case, macOS falls back to a CPU-based OpenGL ES renderer that silently fails when a Metal command is issued. + // We need to check for Metal device support and fallback to CPU based vImage buffer iteration. + if ThresholdImageProcessorKernel.isSupported { + // Fast path - Metal processing + guard + let thresholdOutputImage = try? deltaOutputImage.applyingThreshold(deltaThreshold), + let averagePixel = thresholdOutputImage.applyingAreaAverage().renderSingleValue(in: context) + else { + return "Newly-taken snapshot's data could not be processed." + } + actualPixelPrecision = 1 - averagePixel + if actualPixelPrecision < pixelPrecision { + maximumDeltaE = deltaOutputImage.applyingAreaMaximum().renderSingleValue(in: context) ?? 0 + } + } else { + // Slow path - CPU based vImage buffer iteration + guard let buffer = deltaOutputImage.render(in: context) else { + return "Newly-taken snapshot could not be processed." + } + defer { buffer.free() } + var failingPixelCount: Int = 0 + // rowBytes must be a multiple of 8, so vImage_Buffer pads the end of each row with bytes to meet the multiple of 0 requirement. + // We must do 2D iteration of the vImage_Buffer in order to avoid loading the padding garbage bytes at the end of each row. + fastForEach(in: 0...size + let deltaE = buffer.data.load(fromByteOffset: columnOffset, as: Float.self) + if deltaE > deltaThreshold { + failingPixelCount += 1 + if deltaE > maximumDeltaE { + maximumDeltaE = deltaE + } + } } - maximumDeltaE = max(maximumDeltaE, deltaE) } + let failingPixelPercent = Float(failingPixelCount) / Float(deltaOutputImage.extent.width * deltaOutputImage.extent.height) + actualPixelPrecision = 1 - failingPixelPercent } - let failingPixelPercent = Float(failingPixelCount) / Float(deltaOutputImage.extent.width * deltaOutputImage.extent.height) - let actualPixelPrecision = 1 - failingPixelPercent + guard actualPixelPrecision < pixelPrecision else { return nil } // The actual perceptual precision is the perceptual precision of the pixel with the highest DeltaE. // DeltaE is in a 0-100 scale, so we need to divide by 100 to transform it to a percentage. - let minimumPerceptualPrecision = 1 - maximumDeltaE / 100 + let minimumPerceptualPrecision = 1 - min(maximumDeltaE / 100, 1) return """ - Actual image precision \(actualPixelPrecision) is less than required \(pixelPrecision) - Minimum perceptual precision \(minimumPerceptualPrecision) is less than required \(perceptualPrecision) + The percentage of pixels that match \(actualPixelPrecision) is less than required \(pixelPrecision) + The lowest perceptual color precision \(minimumPerceptualPrecision) is less than required \(perceptualPrecision) """ } extension CIImage { - func render(extent: CGRect, context: CIContext, format: CIFormat = CIFormat.Rh) -> vImage_Buffer? { + func applyingLabDeltaE(_ other: CIImage) -> CIImage { + applyingFilter("CILabDeltaE", parameters: ["inputImage2": other]) + } + + func applyingThreshold(_ threshold: Float) throws -> CIImage { + try ThresholdImageProcessorKernel.apply( + withExtent: extent, + inputs: [self], + arguments: [ThresholdImageProcessorKernel.inputThresholdKey: threshold] + ) + } + + func applyingAreaAverage() -> CIImage { + applyingFilter("CIAreaAverage", parameters: [kCIInputExtentKey: extent]) + } + + func applyingAreaMaximum() -> CIImage { + applyingFilter("CIAreaMaximum", parameters: [kCIInputExtentKey: extent]) + } + + func renderSingleValue(in context: CIContext) -> Float? { + guard let buffer = render(in: context) else { return nil } + defer { buffer.free() } + return buffer.data.load(fromByteOffset: 0, as: Float.self) + } + + func render(in context: CIContext, format: CIFormat = CIFormat.Rh) -> vImage_Buffer? { // Some hardware configurations (virtualized CPU renderers) do not support 32-bit float output formats, // so use a compatible 16-bit float format and convert the output value to 32-bit floats. guard var buffer16 = try? vImage_Buffer(width: Int(extent.width), height: Int(extent.height), bitsPerPixel: 16) else { return nil } @@ -232,6 +284,47 @@ extension CIImage { return buffer32 } } + +// Copied from https://developer.apple.com/documentation/coreimage/ciimageprocessorkernel +final class ThresholdImageProcessorKernel: CIImageProcessorKernel { + static let inputThresholdKey = "thresholdValue" + static let device = MTLCreateSystemDefaultDevice() + + static var isSupported: Bool { + #if targetEnvironment(simulator) + guard #available(iOS 14.0, tvOS 14.0, *) else { + // The MPSSupportsMTLDevice method throws an exception on iOS/tvOS simulators older than 14.0 + return false + } + #endif + return MPSSupportsMTLDevice(device) + } + + override class func process(with inputs: [CIImageProcessorInput]?, arguments: [String: Any]?, output: CIImageProcessorOutput) throws { + guard + let device = device, + let commandBuffer = output.metalCommandBuffer, + let input = inputs?.first, + let sourceTexture = input.metalTexture, + let destinationTexture = output.metalTexture, + let thresholdValue = arguments?[inputThresholdKey] as? Float else { + return + } + + let threshold = MPSImageThresholdBinary( + device: device, + thresholdValue: thresholdValue, + maximumValue: 1.0, + linearGrayColorTransform: nil + ) + + threshold.encode( + commandBuffer: commandBuffer, + sourceTexture: sourceTexture, + destinationTexture: destinationTexture + ) + } +} #endif /// When the compiler doesn't have optimizations enabled, like in test targets, a `while` loop is significantly faster than a `for` loop From 88fb50f301b39ed4173c8f960a04ebf88c56dd53 Mon Sep 17 00:00:00 2001 From: Eric Jensen Date: Wed, 29 Nov 2023 15:53:03 -0500 Subject: [PATCH 4/5] Update logic to determine if a Metal kernel is supported on the device --- Sources/SnapshotTesting/Snapshotting/UIImage.swift | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Sources/SnapshotTesting/Snapshotting/UIImage.swift b/Sources/SnapshotTesting/Snapshotting/UIImage.swift index e75ebf223..fe2cf2ced 100644 --- a/Sources/SnapshotTesting/Snapshotting/UIImage.swift +++ b/Sources/SnapshotTesting/Snapshotting/UIImage.swift @@ -319,9 +319,12 @@ static let device = MTLCreateSystemDefaultDevice() static var isSupported: Bool { + guard let device = device else { + return false + } #if targetEnvironment(simulator) - guard let device = device, #available(iOS 14.0, tvOS 14.0, *) else { - // The MPSSupportsMTLDevice method throws an exception on iOS/tvOS simulators older than 14.0 + guard #available(iOS 14.0, tvOS 14.0, *) else { + // The MPSSupportsMTLDevice method throws an exception on iOS/tvOS simulators < 14.0 return false } #endif From 7871eb7238e9b6e191f31fe9ae4d91a28f8ce916 Mon Sep 17 00:00:00 2001 From: Eric Jensen Date: Wed, 29 Nov 2023 15:59:38 -0500 Subject: [PATCH 5/5] Use the maintainers preferred method of using a while loop --- .../Snapshotting/UIImage.swift | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/Sources/SnapshotTesting/Snapshotting/UIImage.swift b/Sources/SnapshotTesting/Snapshotting/UIImage.swift index fe2cf2ced..ea36c1a6e 100644 --- a/Sources/SnapshotTesting/Snapshotting/UIImage.swift +++ b/Sources/SnapshotTesting/Snapshotting/UIImage.swift @@ -236,10 +236,19 @@ var failingPixelCount: Int = 0 // rowBytes must be a multiple of 8, so vImage_Buffer pads the end of each row with bytes to meet the multiple of 0 requirement. // We must do 2D iteration of the vImage_Buffer in order to avoid loading the padding garbage bytes at the end of each row. + // + // NB: We are purposely using a verbose 'while' loop instead of a 'for in' loop. When the + // compiler doesn't have optimizations enabled, like in test targets, a `while` loop is + // significantly faster than a `for` loop for iterating through the elements of a memory + // buffer. Details can be found in [SR-6983](https://github.com/apple/swift/issues/49531) let componentStride = MemoryLayout.stride - fastForEach(in: 0.. deltaThreshold { @@ -361,13 +370,3 @@ } } #endif - -/// When the compiler doesn't have optimizations enabled, like in test targets, a `while` loop is significantly faster than a `for` loop -/// for iterating through the elements of a memory buffer. Details can be found in [SR-6983](https://github.com/apple/swift/issues/49531#issuecomment-1108286654) -func fastForEach(in range: Range, _ body: (Int) -> Void) { - var index = range.lowerBound - while index < range.upperBound { - body(index) - index += 1 - } -}