Skip to content

Commit fd4f711

Browse files
committed
Add Ticket Archive to Knowledge Base #7252
1 parent 0d9052c commit fd4f711

3 files changed

Lines changed: 104 additions & 45 deletions

File tree

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Ticket: Add Ticket Archive to Knowledge Base
2+
3+
GH ticket id: #7252
4+
5+
**Assignee:** Gemini
6+
**Status:** To Do
7+
8+
## Description
9+
10+
To make the context of all past work searchable, the archived tickets will be integrated into the AI Knowledge Base. This will allow developers and agents to query for the history and rationale behind previous changes.
11+
12+
## Scope of Work
13+
14+
1. **Enhance `createKnowledgeBase.mjs`:**
15+
- The script will be modified to glob for and process all markdown files within the `.github/ISSUE_ARCHIVE/` directory and its sub-directories.
16+
- These files will be chunked and assigned a new content `type` of `ticket`.
17+
18+
2. **Enhance `queryKnowledgeBase.mjs`:**
19+
- The `--type` command-line option will be updated to accept `ticket` as a new valid value.
20+
- Type-based filtering is now handled directly within the database query using a `where` clause, replacing the previous, less efficient post-query filtering in JavaScript.
21+
- The scoring algorithm has been adjusted to apply a penalty to `ticket` type results in general queries (`--type all`) to reduce noise, ensuring they are discoverable only when explicitly queried via `--type ticket`.

buildScripts/ai/createKnowledgeBase.mjs

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,46 @@ class CreateKnowledgeBase {
194194
}
195195
}
196196

197-
console.log(`Processed ${releaseChunks} release note chunks. Total chunks: ${apiChunks + guideChunks + releaseChunks}.`);
197+
console.log(`Processed ${releaseChunks} release note chunks.`);
198+
199+
// 4. Process ticket archives
200+
const ticketArchivePath = path.resolve(process.cwd(), '.github/ISSUE_ARCHIVE');
201+
let ticketChunks = 0;
202+
203+
if (await fs.pathExists(ticketArchivePath)) {
204+
const releaseVersions = await fs.readdir(ticketArchivePath);
205+
206+
for (const version of releaseVersions) {
207+
const versionPath = path.join(ticketArchivePath, version);
208+
if ((await fs.stat(versionPath)).isDirectory()) {
209+
const ticketFiles = await fs.readdir(versionPath);
210+
211+
for (const file of ticketFiles) {
212+
if (file.endsWith('.md')) {
213+
const filePath = path.join(versionPath, file);
214+
const content = await fs.readFile(filePath, 'utf-8');
215+
const titleMatch = content.match(/^# Ticket: (.*)/m);
216+
const chunkName = titleMatch ? titleMatch[1] : file.replace('.md', '');
217+
218+
const chunk = {
219+
type : 'ticket',
220+
name : chunkName,
221+
content: content,
222+
source : filePath
223+
};
198224

199-
// 4. End the stream
225+
chunk.hash = createContentHash(chunk);
226+
writeStream.write(JSON.stringify(chunk) + '\n');
227+
ticketChunks++;
228+
}
229+
}
230+
}
231+
}
232+
}
233+
234+
console.log(`Processed ${ticketChunks} ticket chunks. Total chunks: ${apiChunks + guideChunks + releaseChunks + ticketChunks}.`);
235+
236+
// 5. End the stream
200237
writeStream.end();
201238
console.log(`Knowledge base creation complete. Saved to ${outputPath}`);
202239
}

buildScripts/ai/queryKnowledgeBase.mjs

Lines changed: 44 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -78,39 +78,28 @@ class QueryKnowledgeBase {
7878
}
7979

8080
const queryEmbedding = await model.embedContent(query);
81-
const results = await collection.query({
82-
queryEmbeddings: [queryEmbedding.embedding.values],
83-
nResults : 100 // Increased to get a wider net for filtering
84-
});
81+
const queryLower = query.toLowerCase();
8582

86-
// 2. Filter results by content type if specified
83+
let whereClause = {};
8784
if (type && type !== 'all') {
88-
results.metadatas[0] = results.metadatas[0].filter(metadata => {
89-
const source = metadata.source || '';
90-
switch (type) {
91-
case 'blog':
92-
return source.includes('/learn/blog/');
93-
case 'guide':
94-
return source.includes('/learn/guides/');
95-
case 'src':
96-
return source.includes('/src/');
97-
case 'example':
98-
return source.includes('/examples/');
99-
case 'release':
100-
return source.includes('/.github/RELEASE_NOTES/');
101-
default:
102-
return true;
103-
}
104-
});
85+
whereClause = {type: type};
86+
}
87+
88+
const queryOptions = {
89+
queryEmbeddings: [queryEmbedding.embedding.values],
90+
nResults : 100 // Get a wider net for filtering
91+
};
92+
93+
if (Object.keys(whereClause).length > 0) {
94+
queryOptions.where = whereClause;
10595
}
10696

97+
const results = await collection.query(queryOptions);
98+
10799
// 3. Process results with the enhanced scoring algorithm
108100
if (results.metadatas?.length > 0 && results.metadatas[0].length > 0) {
109-
const sourceScores = {};
110-
const queryLower = query.toLowerCase();
111-
const queryWords = queryLower.replace(/[^a-zA-Z ]/g, '').split(' ').filter(w => w.length > 2);
112-
const mainKeyword = queryWords[queryWords.length - 1] || '';
113-
const keywordSingular = mainKeyword.endsWith('s') ? mainKeyword.slice(0, -1) : mainKeyword;
101+
const sourceScores = {};
102+
const queryWords = queryLower.replace(/[^a-zA-Z ]/g, '').split(' ').filter(w => w.length > 2);
114103

115104
results.metadatas[0].forEach((metadata, index) => {
116105
if (!metadata.source || metadata.source === 'unknown') return;
@@ -121,26 +110,38 @@ class QueryKnowledgeBase {
121110
const sourcePathLower = sourcePath.toLowerCase();
122111
const fileName = sourcePath.split('/').pop().toLowerCase();
123112
const nameLower = (metadata.name || '').toLowerCase();
124-
const keyword = keywordSingular;
125-
126-
if (keyword) {
127-
if (sourcePathLower.includes(`/${keyword}/`)) score += 40;
128-
if (fileName.includes(keyword)) score += 30;
129-
if (metadata.type === 'class' && nameLower.includes(keyword)) score += 20;
130-
if (metadata.className && metadata.className.toLowerCase().includes(keyword)) score += 20;
131-
if (metadata.type === 'guide') {
132-
// Guides are the most authoritative source for how-to information.
133-
score += metadata.isBlog === 'true' ? 5 : 50;
134-
if (nameLower.includes(keyword)) score += 50;
113+
114+
queryWords.forEach(queryWord => {
115+
const keyword = queryWord;
116+
const keywordSingular = keyword.endsWith('s') ? keyword.slice(0, -1) : keyword;
117+
118+
if (keywordSingular.length > 2) { // Only apply boosts for meaningful keywords
119+
if (sourcePathLower.includes(`/${keywordSingular}/`)) score += 40;
120+
if (fileName.includes(keywordSingular)) score += 30;
121+
if (metadata.type === 'class' && nameLower.includes(keywordSingular)) score += 20;
122+
if (metadata.className && metadata.className.toLowerCase().includes(keywordSingular)) score += 20;
123+
if (metadata.type === 'guide') {
124+
// Guides are the most authoritative source for how-to information.
125+
score += metadata.isBlog === 'true' ? 5 : 50;
126+
if (nameLower.includes(keywordSingular)) score += 50;
127+
}
128+
const nameParts = nameLower.split('.');
129+
if (nameParts.includes(keywordSingular)) score += 30;
135130
}
136-
if (metadata.type === 'release') {
137-
score -= 50; // Penalize release notes in general queries
131+
});
132+
133+
if (metadata.type === 'ticket') {
134+
if (type === 'all') {
135+
score -= 70;
138136
}
139-
if (fileName.endsWith('base.mjs')) score += 20;
140-
const nameParts = nameLower.split('.');
141-
if (nameParts.includes(keyword)) score += 30;
142137
}
143138

139+
// Logic for release and base.mjs, applies once per chunk
140+
if (metadata.type === 'release') {
141+
score -= 50; // Penalize release notes in general queries
142+
}
143+
if (fileName.endsWith('base.mjs')) score += 20;
144+
144145
// Boost exact matches for version-like queries
145146
if (metadata.type === 'release' && queryLower.startsWith('v') && nameLower === queryLower) {
146147
score += 1000; // Strong boost for exact version match

0 commit comments

Comments
 (0)