From 90a388afa9e3314a59ae6f20c1eb47b03faa5ade Mon Sep 17 00:00:00 2001 From: spencer kelly Date: Tue, 23 Jan 2024 16:26:03 -0500 Subject: [PATCH] fix clauses issues --- scratch.js | 8 +++----- src/1-one/match/methods/termMethods.js | 4 ++-- src/3-three/chunker/api/clauses.js | 17 ++++++++++------- .../clause.ignore.js => three/clause.test.js} | 2 +- 4 files changed, 16 insertions(+), 15 deletions(-) rename tests/{_ignore/clause.ignore.js => three/clause.test.js} (98%) diff --git a/scratch.js b/scratch.js index 118aafe21..2c8c7c875 100644 --- a/scratch.js +++ b/scratch.js @@ -6,11 +6,9 @@ import nlp from './src/three.js' // let doc = nlp('one two three four five. one three four') // doc.before('three four').debug() - -let m = nlp("i said, 'did you have to do that' and then left, like nothing happened (which it didn't).") - .clauses() - .debug() -console.log(m.eq(1).text()) +let doc = nlp(`...and my butt smells, and i like to kiss my own butt`) +doc.match('@hasEllipses').debug() +// let m = doc.clauses() //.debug() let arr = [ // 'I left the window open for fresh air.', diff --git a/src/1-one/match/methods/termMethods.js b/src/1-one/match/methods/termMethods.js index 71df13afd..545f63fea 100644 --- a/src/1-one/match/methods/termMethods.js +++ b/src/1-one/match/methods/termMethods.js @@ -11,7 +11,7 @@ const hasDash = / [-–—]{1,3} / /** search the term's 'post' punctuation */ const hasPost = (term, punct) => term.post.indexOf(punct) !== -1 /** search the term's 'pre' punctuation */ -const hasPre = (term, punct) => term.pre.indexOf(punct) !== -1 +// const hasPre = (term, punct) => term.pre.indexOf(punct) !== -1 const methods = { /** does it have a quotation symbol? */ @@ -25,7 +25,7 @@ const methods = { /** does it end with a question mark? */ hasQuestionMark: term => hasPost(term, '?') || hasPost(term, '¿'), /** is there a ... at the end? */ - hasEllipses: term => hasPost(term, '..') || hasPost(term, '…') || hasPre(term, '..') || hasPre(term, '…'), + hasEllipses: term => hasPost(term, '..') || hasPost(term, '…'), /** is there a semicolon after term word? */ hasSemicolon: term => hasPost(term, ';'), /** is there a colon after term word? */ diff --git a/src/3-three/chunker/api/clauses.js b/src/3-three/chunker/api/clauses.js index 982d7392f..1a0d2f57a 100644 --- a/src/3-three/chunker/api/clauses.js +++ b/src/3-three/chunker/api/clauses.js @@ -10,7 +10,7 @@ const byComma = function (doc) { if (m.growRight('. .').wordCount() === 1) { return false } - let more = m.grow(".") // grow by 1 word in either direction + let more = m.grow('.') // grow by 1 word in either direction more = more.ifNo('@hasComma @hasComma') //fun, cool... more = more.ifNo('@hasComma (and|or) .') //cool, and fun more = more.ifNo('(#City && @hasComma) #Country') //'toronto, canada' @@ -70,13 +70,18 @@ const clauses = function (n) { found = found.splitBefore('as (though|if)') found = found.splitBefore('(til|until)') + // it is cool but it is .. + let m = found.match('#Verb .* [but] .* #Verb', 0) + if (m.found) { + found = found.splitBefore(m) + } // it is cool and it is .. // let conjunctions = found.if('#Copula #Adjective #Conjunction (#Pronoun|#Determiner) #Verb').match('#Conjunction') // found = found.splitBefore(conjunctions) - // // if it is this then that - // let condition = found.if('if .{2,9} then .').match('then') - // found = found.splitBefore(condition) + // if it is this then that + let condition = found.if('if .{2,9} then .').match('then') + found = found.splitBefore(condition) // // misc clause partitions // found = found.splitBefore('as well as .') @@ -87,14 +92,12 @@ const clauses = function (n) { // found = found.splitAfter('@hasSemicolon') // found = found.splitAfter('@hasDash') - // // + // // // found = found.splitBefore('which (were|are|will)') // // he said [...] // found = found.splitAfter('#Noun (said|say|says)') - - // passive voice verb - '.. which was robbed is empty' // let passive = found.match('#Noun (which|that) (was|is) #Adverb? #PastTense #Adverb?') // if (passive.found) { diff --git a/tests/_ignore/clause.ignore.js b/tests/three/clause.test.js similarity index 98% rename from tests/_ignore/clause.ignore.js rename to tests/three/clause.test.js index a34988eb6..f6edb5101 100644 --- a/tests/_ignore/clause.ignore.js +++ b/tests/three/clause.test.js @@ -1,5 +1,5 @@ import test from 'tape' -import nlp from '../three/_lib.js' +import nlp from './_lib.js' const here = '[three/clause] ' test('clauses-parentheses:', function (t) {