Skip to content

Commit

Permalink
assume indexing
Browse files Browse the repository at this point in the history
  • Loading branch information
spencermountain committed Nov 8, 2021
1 parent 784130c commit c4c3916
Show file tree
Hide file tree
Showing 15 changed files with 56 additions and 40 deletions.
1 change: 1 addition & 0 deletions .jshintrc
Expand Up @@ -3,6 +3,7 @@
"node": true,
"asi": true,
"undef": true,
"-W138": true,
"globals": {
"self": true
}
Expand Down
2 changes: 1 addition & 1 deletion lib/switches/clues/person.js
Expand Up @@ -15,7 +15,7 @@ export default {
// PresentTense: p, //bob seems
},
ownTags: {
// ProperNoun: p, //capital letter
ProperNoun: p, //capital letter
},
beforeWords: {
hi: p,
Expand Down
1 change: 1 addition & 0 deletions lib/switches/index.js
Expand Up @@ -34,4 +34,5 @@ Object.keys(switches).forEach(k => {
fallback: switches[k].fallback,
}
})

export default switches
13 changes: 5 additions & 8 deletions scratch.js
Expand Up @@ -3,7 +3,7 @@ import nlp from './src/three.js'
// import nlp from './builds/compromise.cjs'
// import text from '/Users/spencer/mountain/compromise/scripts/perf/flame/_sotu-text.js'

nlp.verbose('tagger')
// nlp.verbose('tagger')


// bug!
Expand Down Expand Up @@ -44,11 +44,7 @@ let txt
// txt= 'Thursday works for me.'
// txt= 'She is going to be a kinesiologist, sports injury therapist.'
// txt= 'Does that work?'
txt = 'Bob Bowen'
// txt= 'Drew Fossum'
// txt = 'Drew Fossum'
// txt= 'bob k'
// txt= "You're right, Sue."
txt = "right, Sue."
// txt= 'Day One Interviews Day Two Interviews'
// txt= 'Sue and Jeff --'
// txt= 'Has Liz finished with gathering the documents?'
Expand All @@ -74,8 +70,10 @@ txt = 'Bob Bowen'
// txt= 'I had a conversation with the woman running this place in April 2010.'
// txt= 'Great Cookies, Cakes, and Customer Service'
// txt= 'May, 2009.'
txt = 'you guys don\'t know'

let doc = nlp(txt).debug()
let doc = nlp(`John Smith and Jack were walking`)
doc.insertAfter('drugs')

// bug 1
// txt = `out-lived`
Expand All @@ -87,7 +85,6 @@ let doc = nlp(txt).debug()




/*
Expand Down
3 changes: 2 additions & 1 deletion scripts/pack.js
Expand Up @@ -40,6 +40,7 @@ const steps = [
}
})
})

return switches
},
},
Expand Down Expand Up @@ -68,6 +69,6 @@ steps.forEach(obj => {

//get filesize
const stats = fs.statSync(obj.path)
let size = (stats['size'] / 1000.0).toFixed(1)
let size = (stats.size / 1000.0).toFixed(1)
console.log(` - ${obj.label} is ` + size + 'k\n')
})
2 changes: 1 addition & 1 deletion src/1-one/change/api/insert.js
Expand Up @@ -26,7 +26,7 @@ const insert = function (str, view, prepend) {
ptrs = ptrs.map(a => [a[0]])
let doc = view.toView(ptrs)
// try to tag them, too
doc.compute(['preTagger', 'contractions', 'postTagger', 'index'])
doc.compute(view.world.hooks)
return doc
}

Expand Down
20 changes: 14 additions & 6 deletions src/2-two/contraction/compute/contractions/index.js
Expand Up @@ -9,20 +9,28 @@ import isPossessive from './isPossessive.js'
const byApostrophe = /'/
const numDash = /^[0-9][^-–—]*[-–—].*?[0-9]/

const reTag = function (terms, i, world) {
const reTag = function (terms, i, world, view) {
const preTagger = world.compute.preTagger
// just re-tag neighbourhood
let start = i < 2 ? 0 : i - 2
let slice = terms.slice(start, i + 3)
preTagger([slice], world)
let slice = terms//terms.slice(start, i + 3)
slice = [slice]

let tmp = view.clone()
tmp.document = slice
tmp.compute('index', 'tagger')
// tmp.compute(world.hooks)
// console.log(world.hooks)

preTagger(slice, world)
}

// const isArray = function (arr) {
// return Object.prototype.toString.call(arr) === '[object Array]'
// }

//really easy ones
const contractions = (document = [], world) => {
const contractions = (document = [], world, view) => {
const { model, methods } = world
let list = model.two.contractions || []
document.forEach((terms, n) => {
Expand Down Expand Up @@ -85,7 +93,7 @@ const contractions = (document = [], world) => {
// actually insert the new terms
if (words) {
splice(document, [n, i], words, hint)
reTag(terms, i, world)
reTag(terms, i, world, view)
return true
}
// '44-2'
Expand All @@ -95,7 +103,7 @@ const contractions = (document = [], world) => {
hint = ['Value', 'Conjunction', 'Value']
splice(document, [n, i], words, hint)
methods.one.setTag(terms, 'NumberRange', world)
reTag(terms, i, world)
reTag(terms, i, world, view)
return true
}
}
Expand Down
9 changes: 8 additions & 1 deletion src/2-two/preTagger/compute/2nd-pass/01-case.js
Expand Up @@ -13,9 +13,16 @@ const romanNumValid = /^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/
// if it's a unknown titlecase word, it's a propernoun
const checkCase = function (terms, i, model) {
let term = terms[i]
// console.log(`=-=-=-= ${term.text} -=-=-=-`)
if (!term.index) {
console.log('error')
process.exit()
// term.index = []
}
let index = term.index[1]
let str = term.text //need case info
// titlecase and not first word of sentence
if (i !== 0 && titleCase.test(str) === true && hasNumber.test(str) === false) {
if (index !== 0 && titleCase.test(str) === true && hasNumber.test(str) === false) {
if (notProper.find(tag => term.tags.has(tag))) {
return null
}
Expand Down
4 changes: 2 additions & 2 deletions src/2-two/preTagger/model/switches/_data.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 20 additions & 16 deletions src/2-two/preTagger/model/switches/index.js
Expand Up @@ -14,6 +14,25 @@ const titleCase = function (obj) {
}, {})
}

// make a copy of nounVerb called 'presentPlural'
const copySwitch = function (from) {
const presentPlural = Object.assign({}, from)
presentPlural.fallback = 'PresentTense'
let keys = ['beforeTags', 'afterTags', 'beforeWords', 'afterWords', 'ownTags']
keys.forEach(k => {
presentPlural[k] = Object.assign({}, from[k])
Object.keys(presentPlural[k]).forEach(key => {
presentPlural[k][key] = presentPlural[k][key] === 'Infinitive' ? 'PresentTense' : 'Plural'
})
})
let words = {}
Object.keys(presentPlural.words).forEach(str => {
words[str + 's'] = true
})
presentPlural.words = words
return presentPlural
}

// unpack our lexicon of ambiguous-words
// (found in ./lib/switches/)
// add compressed word-data
Expand All @@ -28,22 +47,7 @@ Object.keys(switches).forEach(k => {
switches[k].ownTags = titleCase(switches[k].ownTags)
})

// make a copy of nounVerb called 'presentPlural'
const presentPlural = Object.assign({}, switches.nounVerb)
presentPlural.fallback = 'PresentTense'
let keys = ['beforeTags', 'afterTags', 'beforeWords', 'afterWords', 'ownTags']
keys.forEach(k => {
presentPlural[k] = Object.assign({}, switches.nounVerb[k])
Object.keys(presentPlural[k]).forEach(key => {
presentPlural[k][key] = presentPlural[k][key] === 'Infinitive' ? 'PresentTense' : 'Plural'
})
})
let words = {}
Object.keys(presentPlural.words).forEach(str => {
words[str + 's'] = true
})
presentPlural.words = words
switches.presentPlural = presentPlural
switches.presentPlural = copySwitch(switches.nounVerb)

// random ad-hoc changes -
// 'was time' vs 'was working'
Expand Down
1 change: 0 additions & 1 deletion src/3-three/chunker/api/api.js
Expand Up @@ -6,7 +6,6 @@ import getChunks from './chunks.js'

const chunker = function (View) {
View.prototype.chunks = function () {
this.compute('index')
return getChunks(this)
}

Expand Down
1 change: 0 additions & 1 deletion src/3-three/chunker/api/parentheses.js
Expand Up @@ -11,7 +11,6 @@ const findEnd = function (terms, i) {
}

const find = function () {
this.compute('index')
let ptrs = []
this.docs.forEach(terms => {
let isOpen = false
Expand Down
1 change: 0 additions & 1 deletion src/3-three/chunker/api/quotations.js
Expand Up @@ -38,7 +38,6 @@ const findEnd = function (terms, i) {
}

const find = function () {
this.compute('index')
let ptrs = []
this.docs.forEach(terms => {
let isOpen = false
Expand Down
1 change: 0 additions & 1 deletion src/3-three/sentences/questions.js
Expand Up @@ -59,7 +59,6 @@ const isQuestion = function (doc) {

const findQuestions = function (view) {
const hasQ = /\?/
view.compute('index')
const { document } = view
return view.filter(m => {
let terms = m.docs[0]
Expand Down
1 change: 1 addition & 0 deletions src/nlp.js
Expand Up @@ -15,6 +15,7 @@ const nlp = function (input, lex) {
//assume ./01-tokenize is installed
let document = methods.one.tokenize(input, world)
let doc = new View(document)
doc.world = world
doc.compute(hooks)
return doc
}
Expand Down

0 comments on commit c4c3916

Please sign in to comment.