Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
4 changes: 2 additions & 2 deletions server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
"test:integration": "vitest run tests/integration/*.test.ts",
"test:mock": "vitest run tests/mock/*.test.ts",
"crawl-servers": "tsx src/data/mcp_servers_crawler.ts --url https://raw.githubusercontent.com/modelcontextprotocol/servers/refs/heads/main/README.md",
"update-server-types": "tsx src/data/mcp_server_crawler_result_updater.ts",
"crawl-servers-postprocess": "tsx src/data/mcp_server_crawler_result_updater.ts",
"clean-duplicates": "tsx src/data/clean_duplicate.ts",
"process_categories": "tsx src/data/process_categories.ts",
"process_locales": "tsx src/data/process_locales.ts",
"process_githubinfo": "tsx src/data/process_githubinfo.ts"
"process_githubinfo": "tsx src/data/process_githubinfo.ts --batch_size 200"
},
"dependencies": {
"axios": "^1.6.0",
Expand Down
2 changes: 1 addition & 1 deletion server/src/data/mcp_servers_official_list.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"metadata": {
"totalServers": 444,
"extractedAt": "2025-04-27T15:31:45.593Z",
"extractedAt": "2025-04-29T04:06:45.722Z",
"sourceUrl": "https://raw.githubusercontent.com/modelcontextprotocol/servers/refs/heads/main/README.md",
"baseRepoUrl": "https://github.com/modelcontextprotocol/servers",
"defaultBranch": "main",
Expand Down
2 changes: 1 addition & 1 deletion server/src/data/process_githubinfo.log.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"lastProcessed": "2025-04-28T08:45:46.926Z",
"lastProcessed": "2025-04-29T07:41:12.835Z",
"processedFiles": [
"0006b282-ac88-4c32-b76c-02476e972a04_githubprojects",
"003f3571-5f97-4f84-b126-f09b89e4247e_amadeus",
Expand Down
72 changes: 60 additions & 12 deletions server/src/data/process_githubinfo.ts
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,23 @@ import path from 'path';
import { fileURLToPath } from 'url';
import { fetchGithubInfo, extractGithubRepoInfo } from '../lib/githubEnrichment.js';

// Parse command line arguments
const args = process.argv.slice(2);
let BATCH_SIZE: number | null = null; // Null means process all records

// Process command line arguments
for (let i = 0; i < args.length; i++) {
if (args[i] === '--batch_size' && i + 1 < args.length) {
const batchSize = parseInt(args[i + 1], 10);
if (!isNaN(batchSize) && batchSize > 0) {
BATCH_SIZE = batchSize;
i++; // Skip the next argument as it's the value
} else {
console.error(`Invalid batch size: ${args[i + 1]}. Will process all records.`);
}
}
}

// Get the directory name in ESM
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
Expand All @@ -34,11 +51,23 @@ function ensureDirectoryExists(dirPath: string): void {
}

// Function to load the log file or create it if it doesn't exist
function loadProcessedLog(): ProcessedLog {
function loadProcessedLog(allFilesCount: number): ProcessedLog {
if (fs.existsSync(LOG_FILE)) {
try {
const logContent = fs.readFileSync(LOG_FILE, 'utf8');
return JSON.parse(logContent) as ProcessedLog;
const logData = JSON.parse(logContent) as ProcessedLog;

// Check if we've already processed all files and should start fresh
if (logData.processedFiles.length >= allFilesCount) {
console.log(`Log file shows all ${logData.processedFiles.length} files already processed. Starting fresh.`);
return {
lastProcessed: new Date().toISOString(),
processedFiles: [],
errors: {}
};
}

return logData;
} catch (error) {
console.warn(`Error reading log file, creating a new one: ${error}`);
}
Expand Down Expand Up @@ -153,21 +182,22 @@ async function updateGithubInfoInFile(filePath: string): Promise<boolean> {
async function processAllFiles(): Promise<void> {
console.log('Starting GitHub info update process...');
console.log(`Looking for JSON files in: ${SPLIT_DIR}`);
console.log(BATCH_SIZE ? `Batch size set to: ${BATCH_SIZE}` : `Processing all remaining records`);

// Get all JSON files from split directory (only in the root, not in language subdirectories)
const allFiles = fs.readdirSync(SPLIT_DIR)
.filter(file => file.endsWith('.json') && fs.statSync(path.join(SPLIT_DIR, file)).isFile());

console.log(`Found ${allFiles.length} total JSON files in root directory`);

// Load processed log
const processedLog = loadProcessedLog();
const processedLog = loadProcessedLog(allFiles.length);
console.log(`Loaded processing log. Last run: ${processedLog.lastProcessed}`);
console.log(`Previously processed ${processedLog.processedFiles.length} files`);

// Setup handlers to save progress on interruption
setupShutdownHandlers(processedLog);

// Get all JSON files from split directory (only in the root, not in language subdirectories)
const allFiles = fs.readdirSync(SPLIT_DIR)
.filter(file => file.endsWith('.json') && fs.statSync(path.join(SPLIT_DIR, file)).isFile());

console.log(`Found ${allFiles.length} total JSON files in root directory`);

// Filter out already processed files
const filesToProcess = allFiles.filter(file => {
const hubId = getHubIdFromFilename(file);
Expand All @@ -181,11 +211,17 @@ async function processAllFiles(): Promise<void> {
return;
}

// Process each file
for (const [index, file] of filesToProcess.entries()) {
// Limit the number of files to process based on batch size if provided
const filesToProcessInThisBatch = BATCH_SIZE ? filesToProcess.slice(0, BATCH_SIZE) : filesToProcess;
console.log(BATCH_SIZE
? `Processing batch of ${filesToProcessInThisBatch.length} files (limited by batch size ${BATCH_SIZE})`
: `Processing all ${filesToProcessInThisBatch.length} remaining files`);

// Process each file in the batch
for (const [index, file] of filesToProcessInThisBatch.entries()) {
try {
const hubId = getHubIdFromFilename(file);
console.log(`Processing file ${index + 1}/${filesToProcess.length}: ${file} (hubId: ${hubId})`);
console.log(`Processing file ${index + 1}/${filesToProcess.length} ${BATCH_SIZE ? `(batch_size: ${BATCH_SIZE})` : ''}: ${file} (hubId: ${hubId})`);
const filePath = path.join(SPLIT_DIR, file);

// Update GitHub info in the main file
Expand Down Expand Up @@ -227,6 +263,18 @@ async function processAllFiles(): Promise<void> {
} else {
console.log('GitHub info update process completed successfully!');
}

// Report on overall progress
console.log(`Processed ${filesToProcessInThisBatch.length} files ${BATCH_SIZE ? 'in this batch' : ''}.`);
console.log(`Total progress: ${processedLog.processedFiles.length}/${allFiles.length} files processed.`);

if (processedLog.processedFiles.length < allFiles.length) {
console.log(BATCH_SIZE
? `Run the script again to process the next batch.`
: `Some files may have been skipped due to errors. Check the log file for details.`);
} else {
console.log(`All files have been processed. The log file will be reset on next run.`);
}
}

// Execute the main function
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"requiresApiKey": false,
"isRecommended": false,
"githubStars": 493,
"githubStars": 502,
"downloadCount": 414,
"createdAt": "2025-02-17T22:30:26.383193Z",
"updatedAt": "2025-04-22T03:59:36Z",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"requiresApiKey": false,
"isRecommended": true,
"githubStars": 309,
"githubStars": 310,
"downloadCount": 912,
"createdAt": "2025-02-17T22:27:37.384353Z",
"updatedAt": "2025-03-20T17:17:38Z",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"requiresApiKey": false,
"isRecommended": false,
"githubStars": 169,
"githubStars": 170,
"downloadCount": 0,
"createdAt": "2025-03-17T08:29:20.399027+00:00",
"updatedAt": "2025-03-10T05:10:04Z",
Expand All @@ -25,6 +25,6 @@
"isReferenceServer": false,
"isCommunityServer": true,
"githubLatestCommit": "fe3b6a8808116a5d08adfe9197f8391a6e359e77",
"githubForks": 24,
"githubForks": 25,
"licenseType": "MIT"
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"requiresApiKey": false,
"isRecommended": false,
"githubStars": 39,
"githubStars": 40,
"downloadCount": 0,
"createdAt": "2025-03-17T08:29:23.366219+00:00",
"updatedAt": "2025-03-13T05:56:46Z",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@
"isReferenceServer": false,
"isCommunityServer": true,
"githubLatestCommit": "ba2d6d946f0334305f8a888d47338f55222f4af1",
"githubForks": 1,
"githubForks": 2,
"licenseType": null
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"requiresApiKey": false,
"isRecommended": false,
"githubStars": 18,
"githubStars": 17,
"downloadCount": 0,
"createdAt": "2025-03-17T08:29:23.366219+00:00",
"updatedAt": "2025-04-27T21:48:21Z",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"tags": [],
"requiresApiKey": false,
"isRecommended": false,
"githubStars": 2,
"githubStars": 3,
"downloadCount": 0,
"createdAt": "2025-04-27T14:08:34.434Z",
"updatedAt": "2025-04-28T00:48:02Z",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"requiresApiKey": false,
"isRecommended": false,
"githubStars": 41714,
"githubStars": 42020,
"downloadCount": 0,
"createdAt": "2025-03-17T08:29:19.654593+00:00",
"updatedAt": "2025-04-27T13:54:22Z",
Expand All @@ -25,6 +25,6 @@
"isReferenceServer": false,
"isCommunityServer": true,
"githubLatestCommit": "de1abc85a7ddbe408fffc00f783c7e9f1a69b6b3",
"githubForks": 4574,
"githubForks": 4612,
"licenseType": "MIT"
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"requiresApiKey": false,
"isRecommended": true,
"githubStars": 1297,
"githubStars": 1303,
"downloadCount": 390,
"createdAt": "2025-02-17T22:27:20.529942Z",
"updatedAt": "2025-04-25T21:14:26Z",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"requiresApiKey": false,
"isRecommended": true,
"githubStars": 41714,
"githubStars": 42020,
"downloadCount": 1086,
"createdAt": "2025-02-18T05:44:51.508867Z",
"updatedAt": "2025-04-27T13:54:22Z",
Expand All @@ -25,6 +25,6 @@
"isReferenceServer": true,
"isCommunityServer": false,
"githubLatestCommit": "de1abc85a7ddbe408fffc00f783c7e9f1a69b6b3",
"githubForks": 4574,
"githubForks": 4612,
"licenseType": "MIT"
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"requiresApiKey": false,
"isRecommended": true,
"githubStars": 7101,
"githubStars": 7119,
"downloadCount": 0,
"createdAt": "2025-03-15T07:59:28.714502+00:00",
"updatedAt": "2025-04-27T08:18:13Z",
Expand All @@ -25,6 +25,6 @@
"isReferenceServer": false,
"isCommunityServer": true,
"githubLatestCommit": "bf2c0261311c2fb4b2175e378080c8fa66c52289",
"githubForks": 718,
"githubForks": 723,
"licenseType": "AGPL-3.0"
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"requiresApiKey": false,
"isRecommended": false,
"githubStars": 2051,
"githubStars": 2056,
"downloadCount": 0,
"createdAt": "2025-03-17T08:29:22.149254+00:00",
"updatedAt": "2025-04-25T22:03:05Z",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"requiresApiKey": false,
"isRecommended": false,
"githubStars": 30,
"githubStars": 31,
"downloadCount": 0,
"createdAt": "2025-04-26T13:41:51.295Z",
"updatedAt": "2025-04-09T19:07:12Z",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"requiresApiKey": false,
"isRecommended": false,
"githubStars": 40,
"githubStars": 39,
"downloadCount": 0,
"createdAt": "2025-03-17T08:29:22.634233+00:00",
"updatedAt": "2025-01-29T07:16:04Z",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"requiresApiKey": false,
"isRecommended": false,
"githubStars": 12,
"githubStars": 11,
"downloadCount": 0,
"createdAt": "2025-03-17T08:29:24.291233+00:00",
"updatedAt": "2025-01-09T07:29:20Z",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"tags": [],
"requiresApiKey": false,
"isRecommended": false,
"githubStars": 870,
"githubStars": 871,
"downloadCount": 0,
"createdAt": "2025-04-27T14:08:34.434Z",
"updatedAt": "2025-04-24T17:55:44Z",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"requiresApiKey": false,
"isRecommended": false,
"githubStars": 60,
"githubStars": 63,
"downloadCount": 0,
"createdAt": "2025-03-24T02:03:10.166753+00:00",
"updatedAt": "2025-04-02T11:25:52Z",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"requiresApiKey": false,
"isRecommended": false,
"githubStars": 80,
"githubStars": 81,
"downloadCount": 0,
"createdAt": "2025-03-17T08:29:22.634233+00:00",
"updatedAt": "2025-02-01T02:13:16Z",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"requiresApiKey": false,
"isRecommended": false,
"githubStars": 46,
"githubStars": 47,
"downloadCount": 0,
"createdAt": "2025-03-17T08:29:25.041553+00:00",
"updatedAt": "2025-03-06T22:15:45Z",
Expand All @@ -25,6 +25,6 @@
"isReferenceServer": false,
"isCommunityServer": true,
"githubLatestCommit": "26d5fbc7acb53394c0cd3037b0387ea096bf251e",
"githubForks": 11,
"githubForks": 12,
"licenseType": "Apache-2.0"
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"tags": [],
"requiresApiKey": false,
"isRecommended": false,
"githubStars": 212,
"githubStars": 214,
"downloadCount": 0,
"createdAt": "2025-04-27T14:08:34.434Z",
"updatedAt": "2025-03-22T20:17:47Z",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
],
"requiresApiKey": false,
"isRecommended": false,
"githubStars": 40,
"githubStars": 41,
"downloadCount": 0,
"createdAt": "2025-03-17T08:29:22.634233+00:00",
"updatedAt": "2024-12-05T07:13:54Z",
Expand Down
Loading