Skip to content

Commit

Permalink
probabililty operator as % and weighted, allow alias arguments
Browse files Browse the repository at this point in the history
  • Loading branch information
rodrigopivi committed Jun 24, 2019
1 parent f5457ed commit 9aa3de3
Show file tree
Hide file tree
Showing 6 changed files with 490 additions and 85 deletions.
17 changes: 13 additions & 4 deletions parser/chatito.pegjs
Expand Up @@ -4,7 +4,7 @@ Start = (ImportFile/TopLevelStatement/CommentLine)+
TopLevelStatement = od:(IntentDefinition/SlotDefinition/AliasDefinition) { return od; }

// ============= Probability operator =============
ProbabilityOperatorDefinition = "*[" probability:BasicKeywordLiteral "]" { return probability; }
ProbabilityOperatorDefinition = "*[" probability:Number percent:"%"? "]" { return `${probability}${percent ||''}`; }
// ============= Entities =============
EntityOpt = "?"
EntityBody = "[" value:EntityKeywordLiteral "]" { return value }
Expand Down Expand Up @@ -45,11 +45,12 @@ SlotDefinition = EOL? o:EntitySlotDefinition EOL
{ return { type: o.type, key: o.value, args: o.args, location: o.location, inner: s, variation: o.variation } }

// Alias
EntityAliasDefinition = "~" value:EntityBody { return { value: value, type: "AliasDefinition", location: location() } }
EntityAliasDefinition = "~" value:EntityBody args:EntityArguments?
{ return { value: value, type: "AliasDefinition", location: location(), args: args } }
OptionalAlias = "~" op:EntityOptionalBody { return { value: op.value, type: "Alias", opt: op.opt } }
AliasDefinition = EOL? o:EntityAliasDefinition EOL
Indent s:IntentAndSlotInnerStatements Dedent
{ return { type: o.type, key: o.value, location: o.location, inner: s } }
{ return { type: o.type, key: o.value, location: o.location, inner: s, args: o.args } }

// ============= Identation =============
Samedent "correct indentation" = s:" "* &{ return s.length === level * STEP; }
Expand All @@ -69,7 +70,15 @@ BasicKeywordLiteral "entity name" = v:(t:((!"\r\n")(!"\n")(!"]") .) { return t.j
EntityKeywordLiteral "entity name" = v:(t:((!"\r\n")(!"\n")(!"]")(!"?") .) { return t.join(""); })+ { return v.join(""); }
SlotKeywordLiteral "entity name" = v:(t:((!"\r\n")(!"\n")(!"#")(!"]")(!"?") .) { return t.join(""); })+ { return v.join(""); }

Integer "integer" = [0-9]+ { return parseInt(text(), 10); }
// Number
Number "number" = int frac? { return parseFloat(text()); }
DecimalPoint = "."
Digit1_9 = [1-9]
Digit0_9 = [0-9]
frac = DecimalPoint Digit0_9+
int = zero / (Digit1_9 Digit0_9*)
zero = "0"

EOS "end of sentence" = EOL / EOF
EOL "end of line "= (EOLNonWindows/EOLWindows)+
EOLNonWindows "non windows end of line" = "\n"
Expand Down
9 changes: 4 additions & 5 deletions spec.md
Expand Up @@ -126,7 +126,7 @@ From the output perspective, a slot is the tag that is added the relevant words
```

Slot entities referenced within sentences, can have `?` symbol at the end of the reference name. (e.g.: @[name?]).
In that context, the `?` symbol means that the slot combination is optional, and could be omitted at generation.
In that context, the `?` symbol means that the slot combination is optional, and could be omitted at generation. The probabilities of being omitted are defined by the number of sentence definitions at the entity. If the entity defines only one sentence, then the probabilities of empty string will be 50%, if the sentences defines 2 sentences, the probabilities of being omitted are 33.3333%, and so on.

Slots provide a particular property at their definitions called variations.

Expand Down Expand Up @@ -166,8 +166,7 @@ Alias are just variations of a word and does not generate any tag. By default if
hey
```

Same as with slots, alias references can contain a `?` symbol at the end of the reference name. (e.g.: ~[hi?]).
In that context, the `?` symbol means that the alias combination is optional, and could be omitted at generation.
Same as with slots, alias references can be ommited using a `?` symbol at the end of the reference name. (e.g.: ~[hi?]).

When an alias is referenced inside a slot definition, and it is the only token of the slot sentence, by default the generator will tag the generated alias value as a `synonym` of the alias key name.

Expand Down Expand Up @@ -259,7 +258,7 @@ For `even` distribution using the previous example:

#### 2.2.1 - Sentence probability operator

The sentence probability operator is defined by the `*[` symbol at the start of a sentence following by the probability value and `]`. The probability value may be expressed in two ways, as a plain number (considered as weighted probabilty, e.g.: `1`) or as a percentage value (a number ending with `%`, e.g.: `33.3333%`), but once an entity defines a probabilty as either weight or percentage, then all the other sentences for that entity should use the same type. Inconsistencies declaring entity sentence probabilty values should be considered an input error.
The sentence probability operator is defined by the `*[` symbol at the start of a sentence following by the probability value and `]`. The probability value may be expressed in two ways, as a plain number (considered as weighted probabilty, e.g.: `1`) or as a percentage value (a number ending with `%`, e.g.: `33.3333%`), but once an entity defines a probabilty as either weight or percentage, then all the other sentences for that entity should use the same type. Inconsistencies declaring entity sentence probabilty values should be considered an input error and if the value is not a valid integer, float or percentual value, the input shouuld be considered as simple text and not as a sentence probability definition.

NOTE: If the probabilty value is a percentage type, then and the sum of all sentence probabilty operators declared inside the entity definition should never exceed 100.

Expand All @@ -281,7 +280,7 @@ The previous example, declares `20%` probabilties for the first sentence. This w
| sentence 3 | 400 | 40% | 35.5556% (400*80/900) |


Here is the same example for probabilty value is a weight:
When probabilty value is a weight with regular distribution, multiply that value with the maximum combinations for that sentence, if distribution is even, that value is the actual weighted probability. E.g.:

```
%[intent with a maximum of 1k combinations]
Expand Down
51 changes: 37 additions & 14 deletions src/main.ts
Expand Up @@ -70,6 +70,7 @@ const getVariationsFromEntity = async <T>(
const variationKey = ed.variation ? `#${ed.variation}` : '';
const cacheKey = `${ed.type}-${ed.key}${variationKey}`;
let cacheStats = cache.get(cacheKey) as IStatCache;
let probabilityTypeDefined: 'w' | '%' | null = null;
if (!cacheStats) {
// if the entity is not cache, create an empty cache for it
const counts: IChatitoCache[] = [];
Expand All @@ -89,18 +90,32 @@ const getVariationsFromEntity = async <T>(
indexesOfSentencesWithNullProbability.push(definedSentenceProbabilities.length);
definedSentenceProbabilities.push(null);
} else {
const prob = parseInt(c.probability || '', 10);
if (!Number.isInteger(prob)) {
throw new Error(`Probability "${c.probability}" must be an integer value. At ${cacheKey}`);
const p = c.probability || '';
const isPercent = p.slice(-1) === '%';
const setenceProbabilityType = isPercent ? '%' : 'w';
if (probabilityTypeDefined === null) {
probabilityTypeDefined = setenceProbabilityType;
} else if (setenceProbabilityType !== probabilityTypeDefined) {
throw new Error(`All probability definitions for "${cacheKey}" must be of the same type.`);
}
if (prob < 1 || prob > 100) {
throw new Error(`Probability "${c.probability}" must be from 1 to 100. At ${cacheKey}`);
const prob = parseFloat(isPercent ? p.slice(0, -1) : p);
if (isPercent) {
if (!Number.isInteger(prob)) {
throw new Error(`Probability "${p}" must be an integer or float number. At ${cacheKey}`);
}
if (prob <= 0 || prob > 100) {
throw new Error(`Probability "${p}" must be greater than 0 up to 100. At ${cacheKey}`);
}
} else if (setenceProbabilityType === 'w') {
if (prob <= 0) {
throw new Error(`Probability weight "${p}" must be greater than 0. At ${cacheKey}`);
}
}
sumOfTotalProbabilitiesDefined += prob;
definedSentenceProbabilities.push(prob);
}
}
if (sumOfTotalProbabilitiesDefined && sumOfTotalProbabilitiesDefined > 100) {
if (probabilityTypeDefined === '%' && sumOfTotalProbabilitiesDefined && sumOfTotalProbabilitiesDefined > 100) {
throw new Error(
`The sum of sentence probabilities (${sumOfTotalProbabilitiesDefined}) for an entity can't be higher than 100%. At ${cacheKey}`
);
Expand All @@ -110,11 +125,20 @@ const getVariationsFromEntity = async <T>(
indexesOfSentencesWithNullProbability.map(i => maxCounts[i]).reduce((p, n) => (p || 0) + (n || 0), 0) || 0;
// calculate the split of remaining probability for sentences that don't define them
// const realProbabilities = maxCounts.map(m => (m * 100) / sumOfTotalMax);
const probabilities = definedSentenceProbabilities.map((p, i) =>
p === null
? (((maxCounts[i] * 100) / totalMaxCountsToShareBetweenNullProbSent) * (100 - sumOfTotalProbabilitiesDefined)) / 100
: p
);
let probabilities: number[];
if (probabilityTypeDefined === '%') {
// if probabilityTypeDefined is percentual, then calculate each sentence chances in percent
probabilities = definedSentenceProbabilities.map((p, i) =>
p === null
? (((maxCounts[i] * 100) / totalMaxCountsToShareBetweenNullProbSent) * (100 - sumOfTotalProbabilitiesDefined)) / 100
: p
);
} else if (probabilityTypeDefined === 'w') {
// if probabilityTypeDefined is weighted, then multiply the weight by max counts
probabilities = definedSentenceProbabilities.map((p, i) => (p === null ? maxCounts[i] : maxCounts[i] * p));
} else {
probabilities = maxCounts;
}
const currentEntityCache: IStatCache = { counts, maxCounts, optional, probabilities };
cache.set(cacheKey, currentEntityCache);
cacheStats = cache.get(cacheKey) as IStatCache;
Expand Down Expand Up @@ -228,10 +252,9 @@ export const datasetFromAST = async (
entity = operatorDefinitions.Slot;
} else if (od.type === 'AliasDefinition') {
entity = operatorDefinitions.Alias;
} else if (od.type === 'Comment' || od.type === 'ImportFile') {
return; // skip comments
} else {
throw new Error(`Unknown definition definition for ${od.type}`);
// type is 'Comment' or 'ImportFile'
return; // skip comments
}
const odKey = od.variation ? `${od.key}#${od.variation}` : od.key;
if (entity[odKey]) {
Expand Down

0 comments on commit 9aa3de3

Please sign in to comment.