Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,28 @@ exports[`getPatternFlyMcpResources should return multiple organized facets: prop
]
`;

exports[`mutateKeyWordsMap should handle filtering keywords map, blocklist is prioritized over exception for split tokens 1`] = `
[
"cli",
"tooling",
"component cli tooling",
]
`;

exports[`mutateKeyWordsMap should handle filtering keywords map, exception keeps length token when not blocked 1`] = `
[
"cli",
"guidelines",
"cli guidelines",
]
`;

exports[`mutateKeyWordsMap should handle filtering keywords map, word length filter combined with blocklist 1`] = `
[
"cli or guidelines",
]
`;

exports[`setCategoryDisplayLabel should normalize categories and apply linking markdown, accessibility 1`] = `"Accessibility"`;

exports[`setCategoryDisplayLabel should normalize categories and apply linking markdown, design 1`] = `"Design Guidelines"`;
Expand Down
9 changes: 8 additions & 1 deletion src/__tests__/docs.filterWords.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { INDEX_BLOCKLIST_WORDS, INDEX_NOISE_WORDS } from '../docs.filterWords';
import { INDEX_BLOCKLIST_WORDS, INDEX_EXCEPTION_WORDS, INDEX_NOISE_WORDS } from '../docs.filterWords';

describe('INDEX_BLOCKLIST_WORDS', () => {
it('should be defined and contain words', () => {
Expand All @@ -7,6 +7,13 @@ describe('INDEX_BLOCKLIST_WORDS', () => {
});
});

describe('INDEX_EXCEPTION_WORDS', () => {
it('should be defined and contain words', () => {
expect(INDEX_EXCEPTION_WORDS.length).toBeGreaterThanOrEqual(0);
expect(INDEX_EXCEPTION_WORDS).toBeDefined();
});
});

describe('INDEX_NOISE_WORDS', () => {
it('should be defined and contain words', () => {
expect(INDEX_NOISE_WORDS.length).toBeGreaterThanOrEqual(0);
Expand Down
45 changes: 45 additions & 0 deletions src/__tests__/patternFly.getResources.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import {
setCategoryDisplayLabel,
getPatternFlyComponentSchema,
getPatternFlyComponentNames,
mutateKeyWordsMap,
getPatternFlyMcpResources
} from '../patternFly.getResources';

Expand Down Expand Up @@ -96,6 +97,50 @@ describe('getPatternFlyComponentNames', () => {
});
});

describe('mutateKeyWordsMap', () => {
it.each([
{
description: 'blocklist is prioritized over exception for split tokens',
params: {
keyword: 'component cli tooling',
name: 'resource',
version: 'v1'
},
settings: {
blockList: ['component'],
exceptionList: ['component', 'cli']
}
},
{
description: 'exception keeps length token when not blocked',
params: {
keyword: 'cli guidelines',
name: 'resource',
version: 'v1'
},
settings: undefined
},
{
description: 'word length filter combined with blocklist',
params: {
keyword: 'cli or guidelines',
name: 'resource',
version: 'v1'
},
settings: {
blockList: ['cli', 'guidelines'],
lengthFilter: 2
}
}
])('should handle filtering keywords map, $description', ({ params, settings }) => {
const keywordsMap = new Map();

mutateKeyWordsMap(keywordsMap, params, settings);

expect(Object.keys(Object.fromEntries(keywordsMap))).toMatchSnapshot();
});
});

describe('getPatternFlyMcpResources', () => {
it('should return multiple organized facets', async () => {
const result = await getPatternFlyMcpResources();
Expand Down
10 changes: 9 additions & 1 deletion src/docs.filterWords.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@
*/
const INDEX_BLOCKLIST_WORDS = ['patternfly', 'component', 'components', 'documentation', 'example', 'examples'];

/**
* Technical terms and acronyms that should be exempt from length and noise filtering.
*
* @note If "AI" starts producing noisy or overly broad matches in search, remove it from this
* list and consider adding it to the noise words or blocklist.
*/
const INDEX_EXCEPTION_WORDS = ['cli', 'css', 'ai', 'rtl', 'ltr'];

/**
* Noise words that are common and do not add significant value to search results.
*/
Expand Down Expand Up @@ -112,4 +120,4 @@ const INDEX_NOISE_WORDS = [
'you'
];

export { INDEX_BLOCKLIST_WORDS, INDEX_NOISE_WORDS };
export { INDEX_BLOCKLIST_WORDS, INDEX_EXCEPTION_WORDS, INDEX_NOISE_WORDS };
59 changes: 52 additions & 7 deletions src/patternFly.getResources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ import {
type PatternFlyMcpDocsCatalogEntry,
type PatternFlyMcpDocsCatalogDoc
} from './docs.embedded';
import { INDEX_BLOCKLIST_WORDS, INDEX_NOISE_WORDS } from './docs.filterWords';
import {
INDEX_BLOCKLIST_WORDS,
INDEX_EXCEPTION_WORDS,
INDEX_NOISE_WORDS
} from './docs.filterWords';

/**
* Derive the component schema type from @patternfly/patternfly-component-schemas
Expand Down Expand Up @@ -300,17 +304,34 @@ const getPatternFlyComponentNames = async (contextPathOverride?: string): Promis
getPatternFlyComponentNames.memo = memo(getPatternFlyComponentNames);

/**
* Filter keywords by removing noise words.
* Filter keywords using the exception list and noise-word rules.
*
* - Words are kept that match the `exceptionList`.
* - Words are removed that match the `filterList` or the `distanceMatch` checks.
* - All other words are kept by default.
*
* @param keywordsMap - Available keywords by resource name.
* @param settings - Settings object
* @param settings.exceptionList - List of words to exempt from filtering.
* @param settings.filterList - List of words to filter out from keywords.
* @param settings.distanceMatch - Allowed length gap in characters between a keyword and a
* filter word.
*/
const filterKeywords = (keywordsMap: PatternFlyMcpKeywordsMap, { filterList = INDEX_NOISE_WORDS } = {}) => {
const filterKeywords = (
keywordsMap: PatternFlyMcpKeywordsMap,
{ exceptionList = INDEX_EXCEPTION_WORDS, filterList = INDEX_NOISE_WORDS, distanceMatch = 3 } = {}
) => {
const filteredKeywords: PatternFlyMcpKeywordsMap = new Map();

for (const [keyword, versionMap] of keywordsMap) {
const updatedKeyword = keyword.toLowerCase().trim();

// Exception match, never filter these out.
if (exceptionList.includes(updatedKeyword)) {
filteredKeywords.set(keyword, versionMap);
continue;
}

const isVariant = filterList.some(word => {
const updatedWord = word.toLowerCase().trim();

Expand All @@ -320,7 +341,7 @@ const filterKeywords = (keywordsMap: PatternFlyMcpKeywordsMap, { filterList = IN
}

// Related match, is filterList word related?
if (Math.abs(updatedKeyword.length - updatedWord.length) <= 3) {
if (Math.abs(updatedKeyword.length - updatedWord.length) <= distanceMatch) {
return updatedKeyword.startsWith(updatedWord) || updatedKeyword.endsWith(updatedWord);
}

Expand All @@ -336,7 +357,17 @@ const filterKeywords = (keywordsMap: PatternFlyMcpKeywordsMap, { filterList = IN
};

/**
* Update the keywords map with the given keyword.
* Mutate the `keywordsMap` with the given normalized keyword.
*
* - The normalized keyword is always indexed.
* - When the normalized keyword has multiple words, each word is also indexed unless:
* - They are on the `blockList`.
* - Their character length fails the `lengthFilter` and they are not on the `exceptionList`.
*
* @note Future updates for this function should consider returning a new Map
* instead of mutating.
*
* @internal Exposed for testing only. Not recommended for general use.
*
* @param keywordsMap - Available keywords by resource name.
* @param params - Params object
Expand All @@ -345,11 +376,14 @@ const filterKeywords = (keywordsMap: PatternFlyMcpKeywordsMap, { filterList = IN
* @param params.version - Version of the resource associated with the keyword.
* @param settings - Settings object
* @param settings.blockList - List of words to block from indexing.
* @param settings.exceptionList - List of words to exempt from filtering. `blocklist` words
* are prioritized over `exceptionList` words.
* @param settings.lengthFilter - Word length filter for reducing keyword noise.
*/
const mutateKeyWordsMap = (
keywordsMap: PatternFlyMcpKeywordsMap,
{ keyword, name, version }: { keyword: string, name: string, version: string },
{ blockList = INDEX_BLOCKLIST_WORDS } = {}
{ blockList = INDEX_BLOCKLIST_WORDS, exceptionList = INDEX_EXCEPTION_WORDS, lengthFilter = 3 } = {}
) => {
const normalizedKeyword = keyword.toLowerCase().trim();
const initialSplit = normalizedKeyword.split(' ').filter(Boolean);
Expand Down Expand Up @@ -378,7 +412,13 @@ const mutateKeyWordsMap = (
const splitKeywords = initialSplit.map(word => word.trim().replace(/[()|"'<>@#!,.;:]/g, ''));

for (const word of splitKeywords) {
if (word.length <= 3 || blockList.find(blockedWord => blockedWord === word.toLowerCase())) {
const lowerWord = word.toLowerCase();

if (blockList.includes(lowerWord)) {
continue;
}

if (word.length <= lengthFilter && !exceptionList.includes(lowerWord)) {
continue;
}

Expand Down Expand Up @@ -482,6 +522,10 @@ const getPatternFlyMcpResources = async (contextPathOverride?: string): Promise<

mutateKeyWordsMap(rawKeywordsMap, { keyword: name, name, version });

if (entry.displayName) {
mutateKeyWordsMap(rawKeywordsMap, { keyword: entry.displayName, name, version });
}

if (entry.category) {
mutateKeyWordsMap(rawKeywordsMap, { keyword: entry.category, name, version });
}
Expand Down Expand Up @@ -562,6 +606,7 @@ export {
getPatternFlyComponentSchema,
getPatternFlyMcpResources,
getPatternFlyComponentNames,
mutateKeyWordsMap,
setCategoryDisplayLabel,
type PatternFlyMcpComponentNames,
type PatternFlyMcpComponentNamesByVersion,
Expand Down
Loading