diff --git a/Boyer-Moore-Horspool/BoyerMooreHorspool.swift b/Boyer-Moore-Horspool/BoyerMooreHorspool.swift index 481b7d483..4ddd1b52e 100644 --- a/Boyer-Moore-Horspool/BoyerMooreHorspool.swift +++ b/Boyer-Moore-Horspool/BoyerMooreHorspool.swift @@ -11,23 +11,31 @@ extension String { // use it a few times and it's expensive to calculate. let patternLength = pattern.count guard patternLength > 0, patternLength <= self.count else { return nil } - + + // This points at the last character in the pattern. + let p = pattern.index(before: pattern.endIndex) + let lastChar = pattern[p] + // Make the skip table. This table determines how far we skip ahead // when a character from the pattern is found. var skipTable = [Character: Int]() for (i, c) in pattern.enumerated() { - skipTable[c] = patternLength - i - 1 + // In Horspool version we gonna skip ahead full pattern length + // when it's last character from the pattern + if usingHorspoolImprovement, i == (patternLength - 1) { + if skipTable[c] == nil { + skipTable[c] = patternLength + } + } else { + skipTable[c] = patternLength - i - 1 + } } - - // This points at the last character in the pattern. - let p = pattern.index(before: pattern.endIndex) - let lastChar = pattern[p] - + // The pattern is scanned right-to-left, so skip ahead in the string by // the length of the pattern. (Minus 1 because startIndex already points // at the first character in the source string.) var i = index(startIndex, offsetBy: patternLength - 1) - + // This is a helper function that steps backwards through both strings // until we find a character that doesn’t match, or until we’ve reached // the beginning of the pattern. @@ -41,24 +49,22 @@ extension String { } return j } - + // The main loop. Keep going until the end of the string is reached. while i < endIndex { let c = self[i] - + // Does the current character match the last character from the pattern? if c == lastChar { - + // There is a possible match. Do a brute-force search backwards. if let k = backwards() { return k } - + if !usingHorspoolImprovement { // If no match, we can only safely skip one character ahead. i = index(after: i) } else { - // Ensure to jump at least one character (this is needed because the first - // character is in the skipTable, and `skipTable[lastChar] = 0`) - let jumpOffset = max(skipTable[c] ?? patternLength, 1) + let jumpOffset = skipTable[c] ?? patternLength i = index(i, offsetBy: jumpOffset, limitedBy: endIndex) ?? endIndex } } else { diff --git a/Knuth-Morris-Pratt/KnuthMorrisPratt.swift b/Knuth-Morris-Pratt/KnuthMorrisPratt.swift index 81bc65278..4e20e2eec 100644 --- a/Knuth-Morris-Pratt/KnuthMorrisPratt.swift +++ b/Knuth-Morris-Pratt/KnuthMorrisPratt.swift @@ -8,58 +8,61 @@ import Foundation -extension String { - - func indexesOf(ptnr: String) -> [Int]? { - - let text = Array(self.characters) - let pattern = Array(ptnr.characters) - - let textLength: Int = text.count - let patternLength: Int = pattern.count - - guard patternLength > 0 else { - return nil - } - - var suffixPrefix: [Int] = [Int](repeating: 0, count: patternLength) - var textIndex: Int = 0 - var patternIndex: Int = 0 - var indexes: [Int] = [Int]() +func indents(_ pattern: [String.Element]) -> [Int]? { + guard pattern.count > 0 else { return nil } + let length = pattern.count - /* Pre-processing stage: computing the table for the shifts (through Z-Algorithm) */ - let zeta = ZetaAlgorithm(ptnr: ptnr) + var indents = Array(repeating: 0, count: length) + var i = 1 + var j = 0 - for patternIndex in (1 ..< patternLength).reversed() { - textIndex = patternIndex + zeta![patternIndex] - 1 - suffixPrefix[textIndex] = zeta![patternIndex] + while i < length { + if pattern[i] == pattern[j] { + indents[i] = j + 1 + i += 1 + j += 1 + + } else if j == 0 { + indents[i] = 0 + i += 1 + + } else { + j = indents[j - 1] + } } - /* Search stage: scanning the text for pattern matching */ - textIndex = 0 - patternIndex = 0 - - while textIndex + (patternLength - patternIndex - 1) < textLength { - - while patternIndex < patternLength && text[textIndex] == pattern[patternIndex] { - textIndex = textIndex + 1 - patternIndex = patternIndex + 1 - } - - if patternIndex == patternLength { - indexes.append(textIndex - patternIndex) - } - - if patternIndex == 0 { - textIndex = textIndex + 1 - } else { - patternIndex = suffixPrefix[patternIndex - 1] - } - } + return indents +} + +extension String { - guard !indexes.isEmpty else { - return nil + func indices(of pattern: String) -> [Index] { + let ptrn = Array(pattern) + let str = Array(self) + guard let indents = indents(ptrn) else { return [] } + + var indices = [Index]() + var k = 0 + var l = 0 + + while k < str.count { + + if str[k] == ptrn[l] { + k += 1 + l += 1 + if l == ptrn.count { + let index = self.index(startIndex, offsetBy: k - ptrn.count) + indices.append(index) + l = 0 + } + + } else if l == 0 { + k += 1 + } else { + l = indents[l - 1] + } + } + + return indices } - return indexes - } } diff --git a/Rabin-Karp/rabin-karp.swift b/Rabin-Karp/rabin-karp.swift index c337de1a7..2aafb5f75 100644 --- a/Rabin-Karp/rabin-karp.swift +++ b/Rabin-Karp/rabin-karp.swift @@ -20,95 +20,63 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -struct Constants { - static let hashMultiplier = 69069 -} - -precedencegroup PowerPrecedence { higherThan: MultiplicationPrecedence } -infix operator ** : PowerPrecedence -func ** (radix: Int, power: Int) -> Int { - return Int(pow(Double(radix), Double(power))) -} -func ** (radix: Double, power: Int) -> Double { - return pow(radix, Double(power)) -} - -extension Character { - var asInt: Int { - let s = String(self).unicodeScalars - return Int(s[s.startIndex].value) +public func hash(_ string: String) -> Int { + return string.reduce(0) { (result: Int, character: Character) in + result &+ character.hashValue } } -// Find first position of pattern in the text using Rabin Karp algorithm -public func search(text: String, pattern: String) -> Int { - // convert to array of ints - let patternArray = pattern.characters.flatMap { $0.asInt } - let textArray = text.characters.flatMap { $0.asInt } - - if textArray.count < patternArray.count { - return -1 - } - - let patternHash = hash(array: patternArray) - var endIdx = patternArray.count - 1 - let firstChars = Array(textArray[0...endIdx]) - let firstHash = hash(array: firstChars) +public func nextHash(prevHash: Int, dropped: Character, added: Character) -> Int { + return prevHash &- dropped.hashValue &+ added.hashValue +} +public func search(text: String, pattern: String) -> [String.Index] { + if text.count < pattern.count { return [] } + + var indices = [String.Index]() + + let patternHash = hash(pattern) + let patternLength = pattern.count - 1 + let offset = text.index(text.startIndex, offsetBy: patternLength) + let firstText = String(text[...offset]) + let firstHash = hash(firstText) + if patternHash == firstHash { - // Verify this was not a hash collison - if firstChars == patternArray { - return 0 + if firstText == pattern { + indices.append(text.startIndex) } } - + + let start = text.index(after: text.startIndex) + let end = text.index(text.endIndex, offsetBy: -patternLength) var prevHash = firstHash - // Now slide the window across the text to be searched - for idx in 1...(textArray.count - patternArray.count) { - endIdx = idx + (patternArray.count - 1) - let window = Array(textArray[idx...endIdx]) - let windowHash = nextHash( - prevHash: prevHash, - dropped: textArray[idx - 1], - added: textArray[endIdx], - patternSize: patternArray.count - 1 - ) - - if windowHash == patternHash { - if patternArray == window { - return idx - } + var i = start + + while i != end { + let terminator = text.index(i, offsetBy: patternLength) + let window = text[i...terminator] + let prev = text.index(before: i) + let windowHash = nextHash(prevHash: prevHash, dropped: text[prev], added: text[terminator]) + + if windowHash == patternHash, + pattern == window { + indices.append(i) } - + prevHash = windowHash + + i = text.index(after: i) } - - return -1 -} - -public func hash(array: Array) -> Double { - var total: Double = 0 - var exponent = array.count - 1 - for i in array { - total += Double(i) * (Double(Constants.hashMultiplier) ** exponent) - exponent -= 1 - } - - return Double(total) -} - -public func nextHash(prevHash: Double, dropped: Int, added: Int, patternSize: Int) -> Double { - let oldHash = prevHash - (Double(dropped) * - (Double(Constants.hashMultiplier) ** patternSize)) - return Double(Constants.hashMultiplier) * oldHash + Double(added) + + return indices } // TESTS assert(search(text:"The big dog jumped over the fox", - pattern:"ump") == 13, "Invalid index returned") + pattern:"ump") == 13, "Invalid index returned") assert(search(text:"The big dog jumped over the fox", - pattern:"missed") == -1, "Invalid index returned") + pattern:"missed") == -1, "Invalid index returned") assert(search(text:"The big dog jumped over the fox", - pattern:"T") == 0, "Invalid index returned") + pattern:"T") == 0, "Invalid index returned")