-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Parallelize the GithubRepoLoader
- Loading branch information
Showing
3 changed files
with
227 additions
and
30 deletions.
There are no files selected for viewing
93 changes: 93 additions & 0 deletions
93
langchain/src/document_loaders/tests/example_data/github_api_responses.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
import { GithubFile } from "../../web/github.js"; | ||
|
||
export const GithubLoaderApis = { | ||
getRepoFiles: { | ||
0: [ | ||
{ | ||
name: "foo.txt", | ||
path: "foo.txt", | ||
type: "file", | ||
size: 50, | ||
url: "https://githubfilecontent.com", | ||
html_url: "", | ||
sha: "", | ||
git_url: "", | ||
download_url: "", | ||
_links: { | ||
self: "", | ||
git: "", | ||
html: "", | ||
}, | ||
}, | ||
{ | ||
name: "dir1", | ||
path: "dir1", | ||
type: "dir", | ||
size: 50, | ||
url: "https://githubfilecontent.com", | ||
html_url: "", | ||
sha: "", | ||
git_url: "", | ||
download_url: "", | ||
_links: { | ||
self: "", | ||
git: "", | ||
html: "", | ||
}, | ||
}, | ||
], | ||
1: [ | ||
{ | ||
name: "dir1_1", | ||
path: "dir1/dir1_1", | ||
type: "dir", | ||
size: 50, | ||
url: "https://githubfilecontent.com", | ||
html_url: "", | ||
sha: "", | ||
git_url: "", | ||
download_url: "", | ||
_links: { | ||
self: "", | ||
git: "", | ||
html: "", | ||
}, | ||
}, | ||
], | ||
2: [ | ||
{ | ||
name: "nested_file.txt", | ||
path: "dir1/dir1_1/nested_file.txt", | ||
type: "file", | ||
size: 50, | ||
url: "https://githubfilecontent.com", | ||
html_url: "", | ||
sha: "", | ||
git_url: "", | ||
download_url: "", | ||
_links: { | ||
self: "", | ||
git: "", | ||
html: "", | ||
}, | ||
}, | ||
{ | ||
name: "EXAMPLE.md", | ||
path: "dir1/dir1_1/EXAMPLE.md", | ||
type: "file", | ||
size: 50, | ||
url: "https://githubfilecontent.com", | ||
html_url: "", | ||
sha: "", | ||
git_url: "", | ||
download_url: "", | ||
_links: { | ||
self: "", | ||
git: "", | ||
html: "", | ||
}, | ||
}, | ||
], | ||
} as Record<string, GithubFile[]>, | ||
getFileContents: "this is a file full of stuff", | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import { jest, test } from "@jest/globals"; | ||
import { GithubFile, GithubRepoLoader } from "../web/github.js"; | ||
import { GithubLoaderApis } from "./example_data/github_api_responses.js"; | ||
|
||
describe("GithubRepoLoader recursion", () => { | ||
let callCount = 0; | ||
beforeAll(() => { | ||
global.fetch = jest.fn().mockImplementation((url) => { | ||
let responseData: GithubFile[] | string = | ||
GithubLoaderApis.getRepoFiles[callCount.toString()]; | ||
|
||
if ((url as string).includes("https://api.github.com/repos")) { | ||
responseData = GithubLoaderApis.getRepoFiles[callCount.toString()]; | ||
callCount += 1; | ||
} else if ((url as string).includes("githubfilecontent.com")) { | ||
responseData = GithubLoaderApis.getFileContents; | ||
} | ||
return Promise.resolve({ | ||
ok: true, | ||
json: () => Promise.resolve(responseData), | ||
text: () => Promise.resolve("this is a file full of stuff"), | ||
}); | ||
// eslint-disable-next-line @typescript-eslint/no-explicit-any | ||
}) as any; | ||
}); | ||
|
||
afterAll(() => { | ||
jest.clearAllMocks(); | ||
callCount = 0; | ||
}); | ||
|
||
test("Test recursion with GithubRepoLoader", async () => { | ||
const loader = new GithubRepoLoader( | ||
"https://github.com/hwchase17/langchainjs", | ||
{ | ||
branch: "main", | ||
recursive: true, | ||
unknown: "warn", | ||
ignorePaths: ["*.md"], | ||
} | ||
); | ||
|
||
const documents = await loader.load(); | ||
expect(documents.length).toBe(2); | ||
expect(documents.map((doc) => doc.metadata.source)).toEqual([ | ||
"foo.txt", | ||
"dir1/dir1_1/nested_file.txt", | ||
]); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters