From 3e4e3b02d4fe4c0f6ca73751e3af18a048aa63d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arne=20D=C3=B6ring?= Date: Sun, 4 Nov 2018 20:52:20 +0100 Subject: [PATCH 1/4] WIP --- lib/pure/unicode.nim | 7 ++- lib/std/wordwrap.nim | 124 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 lib/std/wordwrap.nim diff --git a/lib/pure/unicode.nim b/lib/pure/unicode.nim index c0c1ba778695..ec3cb9899e27 100644 --- a/lib/pure/unicode.nim +++ b/lib/pure/unicode.nim @@ -213,6 +213,10 @@ proc toUTF8*(c: Rune): string {.rtl, extern: "nuc$1".} = result = "" fastToUTF8Copy(c, result, 0, false) +proc add*(s: var string; c: Rune) = + let pos = s.len + fastToUTF8Copy(c, s, pos, false) + proc `$`*(rune: Rune): string = ## Converts a Rune to a string rune.toUTF8 @@ -220,7 +224,8 @@ proc `$`*(rune: Rune): string = proc `$`*(runes: seq[Rune]): string = ## Converts a sequence of Runes to a string result = "" - for rune in runes: result.add(rune.toUTF8) + for rune in runes: + result.add rune proc runeOffset*(s: string, pos:Natural, start: Natural = 0): int = ## Returns the byte position of unicode character diff --git a/lib/std/wordwrap.nim b/lib/std/wordwrap.nim new file mode 100644 index 000000000000..ce5598de8d6e --- /dev/null +++ b/lib/std/wordwrap.nim @@ -0,0 +1,124 @@ +import unicode + +proc addSubstrExcl(self: var string, str: string; a,b: int) = + ## equivalent to ``self.add str.substr(a,b-1)``. Exclusive upper bound. + if a > b: + echo a, " ", b + assert a <= b + + if a < b: + let idx = self.len + self.setLen idx + b - a + copyMem(self[idx].addr, str[a].unsafeAddr, b - a) + +let tmp ="Наши исследования позволяют сделать вывод о том, что субъект выбирает uiaetudtiraeüöätpghiacodöeronfdquiahgoüöädoiaqofhgiaeotrnuiaßqzfgiaoeurnudtitraenuitenruitarenitarenuitarentduiranetduiranetdruianetrnuiaertnuiatdenruiatdrne институциональный психоз. Важность этой функции подчеркивается тем фактом, что объект вызывает эгоцентризм. Самоактуализация аннигилирует генезис. Анима аннигилирует возрастной код. Закон просветляет аутотренинг. Наши исследования позволяют сделать вывод о том, что воспитание заметно осознаёт инсайт." + +proc wordWrap*(s: string, maxLineWidth = 80, + splitLongWords = true, + newLine = "\n"): string = + + var currentWordLength: int = 0 + var currentWord: string = newStringOfCap(32) + var currentLineLength: int = 0 + var currentWordLineEndMark: int = -1 + var currentWordLengthAtLineEnd: int = -1 + + template handleWhitespace(): untyped = + if currentWord.len > 0: + + if currentLineLength + 1 + currentWordLength > maxLineWidth: + var splitWord = splitLongWords + if splitLongWords: + # arbitrary minimum length of words to split + splitWord = currentWordLength > maxLineWidth div 2 + if currentWordLengthAtLineEnd <= 3: + # does the current word fit in the next line? + if currentWordLength <= maxLineWidth: + splitWord = false + + if splitWord: + result.addSubstrExcl(currentWord, 0, currentWordLineEndMark) + result.add newLine + result.addSubstrExcl(currentWord, currentWordLineEndMark, currentWord.len) + currentLineLength = currentWordLength - currentWordLengthAtLineEnd + else: + result.add newLine + currentLineLength = 0 + + if currentLineLength > 0: + result.add ' ' + currentLineLength += 1 + + result.add currentWord + currentLineLength += currentWordLength + + currentWord.setlen 0 + currentWordLength = 0 + + currentWordLineEndMark = -1 + + for rune in s.runes: + if rune.isWhiteSpace: + handleWhitespace() + else: + currentWord.add rune + inc currentWordLength + + if splitLongWords: + # the word reached the end of the current line + if currentLineLength + 1 + currentWordLength == maxLineWidth: + assert(currentWordLineEndMark == -1) + currentWordLineEndMark = currentWord.len + currentWordLengthAtLineEnd = currentWordLength + + # the word reached the end of the next line + if currentWordLength - currentWordLengthAtLineEnd == maxLineWidth: + # superlong word, stop being smart. + result.addSubstrExcl(currentWord, 0, currentWordLineEndMark) + result.add newLine + + currentWord. +currentWordLineEndMark() + currentWordLength = currentWordLength - currentWordLengthAtLineEnd + + handleWhitespace() + currentWordLineEndMark = maxLineWidth + + handleWhitespace() + +echo wordWrap(tmp) + + +import strutils + +echo strutils.wordWrap(tmp, splitLongWords=true) +echo strutils.wordWrap(tmp, splitLongWords=false) + + # result = newStringOfCap(s.len + s.len shr 6) + # var spaceLeft = maxLineWidth + # var lastSep = "" + # for word, isSep in tokenize(s, seps): + # if isSep: + # lastSep = word + # spaceLeft = spaceLeft - len(word) + # continue + # if len(word) > spaceLeft: + # if splitLongWords and len(word) > maxLineWidth: + # result.add(substr(word, 0, spaceLeft-1)) + # var w = spaceLeft + # var wordLeft = len(word) - spaceLeft + # while wordLeft > 0: + # result.add(newLine) + # var L = min(maxLineWidth, wordLeft) + # spaceLeft = maxLineWidth - L + # result.add(substr(word, w, w+L-1)) + # inc(w, L) + # dec(wordLeft, L) + # else: + # spaceLeft = maxLineWidth - len(word) + # result.add(newLine) + # result.add(word) + # else: + # spaceLeft = spaceLeft - len(word) + # result.add(lastSep & word) + # lastSep.setLen(0) From 3c9cee76bf202a4bd082f27ae0d4287a661978d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arne=20D=C3=B6ring?= Date: Tue, 6 Nov 2018 08:39:07 +0100 Subject: [PATCH 2/4] wordwrap stuff --- lib/std/wordwrap.nim | 109 +++++++++---------------------------------- 1 file changed, 23 insertions(+), 86 deletions(-) diff --git a/lib/std/wordwrap.nim b/lib/std/wordwrap.nim index ce5598de8d6e..ff33a2ba96f9 100644 --- a/lib/std/wordwrap.nim +++ b/lib/std/wordwrap.nim @@ -1,18 +1,5 @@ import unicode -proc addSubstrExcl(self: var string, str: string; a,b: int) = - ## equivalent to ``self.add str.substr(a,b-1)``. Exclusive upper bound. - if a > b: - echo a, " ", b - assert a <= b - - if a < b: - let idx = self.len - self.setLen idx + b - a - copyMem(self[idx].addr, str[a].unsafeAddr, b - a) - -let tmp ="Наши исследования позволяют сделать вывод о том, что субъект выбирает uiaetudtiraeüöätpghiacodöeronfdquiahgoüöädoiaqofhgiaeotrnuiaßqzfgiaoeurnudtitraenuitenruitarenitarenuitarentduiranetduiranetdruianetrnuiaertnuiatdenruiatdrne институциональный психоз. Важность этой функции подчеркивается тем фактом, что объект вызывает эгоцентризм. Самоактуализация аннигилирует генезис. Анима аннигилирует возрастной код. Закон просветляет аутотренинг. Наши исследования позволяют сделать вывод о том, что воспитание заметно осознаёт инсайт." - proc wordWrap*(s: string, maxLineWidth = 80, splitLongWords = true, newLine = "\n"): string = @@ -20,30 +7,15 @@ proc wordWrap*(s: string, maxLineWidth = 80, var currentWordLength: int = 0 var currentWord: string = newStringOfCap(32) var currentLineLength: int = 0 - var currentWordLineEndMark: int = -1 var currentWordLengthAtLineEnd: int = -1 + var longWordMode = false template handleWhitespace(): untyped = if currentWord.len > 0: if currentLineLength + 1 + currentWordLength > maxLineWidth: - var splitWord = splitLongWords - if splitLongWords: - # arbitrary minimum length of words to split - splitWord = currentWordLength > maxLineWidth div 2 - if currentWordLengthAtLineEnd <= 3: - # does the current word fit in the next line? - if currentWordLength <= maxLineWidth: - splitWord = false - - if splitWord: - result.addSubstrExcl(currentWord, 0, currentWordLineEndMark) - result.add newLine - result.addSubstrExcl(currentWord, currentWordLineEndMark, currentWord.len) - currentLineLength = currentWordLength - currentWordLengthAtLineEnd - else: - result.add newLine - currentLineLength = 0 + result.add newLine + currentLineLength = 0 if currentLineLength > 0: result.add ' ' @@ -55,70 +27,35 @@ proc wordWrap*(s: string, maxLineWidth = 80, currentWord.setlen 0 currentWordLength = 0 - currentWordLineEndMark = -1 - for rune in s.runes: if rune.isWhiteSpace: handleWhitespace() else: + if splitLongWords and currentWordLength >= maxLineWidth: + handleWhitespace() + currentWord.add rune inc currentWordLength - if splitLongWords: - # the word reached the end of the current line - if currentLineLength + 1 + currentWordLength == maxLineWidth: - assert(currentWordLineEndMark == -1) - currentWordLineEndMark = currentWord.len - currentWordLengthAtLineEnd = currentWordLength + handleWhitespace() - # the word reached the end of the next line - if currentWordLength - currentWordLengthAtLineEnd == maxLineWidth: - # superlong word, stop being smart. - result.addSubstrExcl(currentWord, 0, currentWordLineEndMark) - result.add newLine - currentWord. -currentWordLineEndMark() - currentWordLength = currentWordLength - currentWordLengthAtLineEnd +when isMainModule: + import strutils - handleWhitespace() - currentWordLineEndMark = maxLineWidth - handleWhitespace() + proc checkLineLength(arg: string): void = + for line in splitlines(arg): + var numRunes = 0 + for rune in runes(line): + numRunes += 1 + + assert numRunes <= 80 + + let longlongword = "abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüöäzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüüöäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiqfglwcßqfgxvlcwgtfhiaoenrsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocfqclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdrtnaetdriaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε" + + checkLineLength(longlongword.wordWrap) + + let tmp ="Наши исследования позволяют сделать вывод о том, что субъект выбирает xxxuiaetudtiraeüöätpghiacodöeronfdquiahgoüöädoiaqofhgiaeotrnuiaßqzfgiaoeurnudtitraenuitenruitarenitarenuitarentduiranetduiranetdruianetrnuiaertnuiatdenruiatdrne институциональный психоз. Важность этой функции подчеркивается тем фактом, что объект вызывает эгоцентризм. Самоактуализация аннигилирует генезис. Анима аннигилирует возрастной код. Закон просветляет аутотренинг. Наши исследования позволяют сделать вывод о том, что воспитание заметно осознаёт инсайт." -echo wordWrap(tmp) - - -import strutils - -echo strutils.wordWrap(tmp, splitLongWords=true) -echo strutils.wordWrap(tmp, splitLongWords=false) - - # result = newStringOfCap(s.len + s.len shr 6) - # var spaceLeft = maxLineWidth - # var lastSep = "" - # for word, isSep in tokenize(s, seps): - # if isSep: - # lastSep = word - # spaceLeft = spaceLeft - len(word) - # continue - # if len(word) > spaceLeft: - # if splitLongWords and len(word) > maxLineWidth: - # result.add(substr(word, 0, spaceLeft-1)) - # var w = spaceLeft - # var wordLeft = len(word) - spaceLeft - # while wordLeft > 0: - # result.add(newLine) - # var L = min(maxLineWidth, wordLeft) - # spaceLeft = maxLineWidth - L - # result.add(substr(word, w, w+L-1)) - # inc(w, L) - # dec(wordLeft, L) - # else: - # spaceLeft = maxLineWidth - len(word) - # result.add(newLine) - # result.add(word) - # else: - # spaceLeft = spaceLeft - len(word) - # result.add(lastSep & word) - # lastSep.setLen(0) + checkLineLength(tmp.wordWrap) From dd1b991b25a574f9facec13599f4dd01961fc3f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arne=20D=C3=B6ring?= Date: Thu, 8 Nov 2018 09:46:12 +0100 Subject: [PATCH 3/4] add doc string --- lib/std/wordwrap.nim | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/std/wordwrap.nim b/lib/std/wordwrap.nim index ff33a2ba96f9..3e3a4609c5c8 100644 --- a/lib/std/wordwrap.nim +++ b/lib/std/wordwrap.nim @@ -3,6 +3,10 @@ import unicode proc wordWrap*(s: string, maxLineWidth = 80, splitLongWords = true, newLine = "\n"): string = + ## This function breaks all words that reach over `maxLineWidth` + ## measured in number of runes. When `splitLongWords` is `true` + ## words that are longer than `maxLineWidth` are splitted. Multiple spaces and newlines are converted to a single space. All + ## whitespace is treated equally. Non-breaking whitespace is ignored. var currentWordLength: int = 0 var currentWord: string = newStringOfCap(32) From dda196ef7800c1da3b948e63587941157a99113e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arne=20D=C3=B6ring?= Date: Thu, 8 Nov 2018 10:08:52 +0100 Subject: [PATCH 4/4] fill region --- lib/std/wordwrap.nim | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/std/wordwrap.nim b/lib/std/wordwrap.nim index 3e3a4609c5c8..85cde6f0d696 100644 --- a/lib/std/wordwrap.nim +++ b/lib/std/wordwrap.nim @@ -5,8 +5,10 @@ proc wordWrap*(s: string, maxLineWidth = 80, newLine = "\n"): string = ## This function breaks all words that reach over `maxLineWidth` ## measured in number of runes. When `splitLongWords` is `true` - ## words that are longer than `maxLineWidth` are splitted. Multiple spaces and newlines are converted to a single space. All - ## whitespace is treated equally. Non-breaking whitespace is ignored. + ## words that are longer than `maxLineWidth` are splitted. Multiple + ## spaces and newlines are converted to a single space. All + ## whitespace is treated equally. Non-breaking whitespace is + ## ignored. var currentWordLength: int = 0 var currentWord: string = newStringOfCap(32)