-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
wikipedia_query_run.ts
181 lines (154 loc) Β· 4.85 KB
/
wikipedia_query_run.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import { Tool } from "@langchain/core/tools";
/**
* Interface for the parameters that can be passed to the
* WikipediaQueryRun constructor.
*/
export interface WikipediaQueryRunParams {
topKResults?: number;
maxDocContentLength?: number;
baseUrl?: string;
}
/**
* Type alias for URL parameters. Represents a record where keys are
* strings and values can be string, number, boolean, undefined, or null.
*/
type UrlParameters = Record<
string,
string | number | boolean | undefined | null
>;
/**
* Interface for the structure of search results returned by the Wikipedia
* API.
*/
interface SearchResults {
query: {
search: Array<{
title: string;
}>;
};
}
/**
* Interface for the structure of a page returned by the Wikipedia API.
*/
interface Page {
pageid: number;
ns: number;
title: string;
extract: string;
}
/**
* Interface for the structure of a page result returned by the Wikipedia
* API.
*/
interface PageResult {
batchcomplete: string;
query: {
pages: Record<string, Page>;
};
}
/**
* Class for interacting with and fetching data from the Wikipedia API. It
* extends the Tool class.
* @example
* ```typescript
* const wikipediaQuery = new WikipediaQueryRun({
* topKResults: 3,
* maxDocContentLength: 4000,
* });
* const result = await wikipediaQuery.call("Langchain");
* ```
*/
export class WikipediaQueryRun extends Tool {
static lc_name() {
return "WikipediaQueryRun";
}
name = "wikipedia-api";
description =
"A tool for interacting with and fetching data from the Wikipedia API.";
protected topKResults = 3;
protected maxDocContentLength = 4000;
protected baseUrl = "https://en.wikipedia.org/w/api.php";
constructor(params: WikipediaQueryRunParams = {}) {
super();
this.topKResults = params.topKResults ?? this.topKResults;
this.maxDocContentLength =
params.maxDocContentLength ?? this.maxDocContentLength;
this.baseUrl = params.baseUrl ?? this.baseUrl;
}
async _call(query: string): Promise<string> {
const searchResults = await this._fetchSearchResults(query);
const summaries: string[] = [];
for (
let i = 0;
i < Math.min(this.topKResults, searchResults.query.search.length);
i += 1
) {
const page = searchResults.query.search[i].title;
const pageDetails = await this._fetchPage(page, true);
if (pageDetails) {
const summary = `Page: ${page}\nSummary: ${pageDetails.extract}`;
summaries.push(summary);
}
}
if (summaries.length === 0) {
return "No good Wikipedia Search Result was found";
} else {
return summaries.join("\n\n").slice(0, this.maxDocContentLength);
}
}
/**
* Fetches the content of a specific Wikipedia page. It returns the
* extracted content as a string.
* @param page The specific Wikipedia page to fetch its content.
* @param redirect A boolean value to indicate whether to redirect or not.
* @returns The extracted content of the specific Wikipedia page as a string.
*/
public async content(page: string, redirect = true): Promise<string> {
try {
const result = await this._fetchPage(page, redirect);
return result.extract;
} catch (error) {
throw new Error(`Failed to fetch content for page "${page}": ${error}`);
}
}
/**
* Builds a URL for the Wikipedia API using the provided parameters.
* @param parameters The parameters to be used in building the URL.
* @returns A string representing the built URL.
*/
protected buildUrl<P extends UrlParameters>(parameters: P): string {
const nonUndefinedParams: [string, string][] = Object.entries(parameters)
.filter(([_, value]) => value !== undefined)
.map(([key, value]) => [key, `${value}`]);
const searchParams = new URLSearchParams(nonUndefinedParams);
return `${this.baseUrl}?${searchParams}`;
}
private async _fetchSearchResults(query: string): Promise<SearchResults> {
const searchParams = new URLSearchParams({
action: "query",
list: "search",
srsearch: query,
format: "json",
});
const response = await fetch(`${this.baseUrl}?${searchParams.toString()}`);
if (!response.ok) throw new Error("Network response was not ok");
const data: SearchResults = await response.json();
return data;
}
private async _fetchPage(page: string, redirect: boolean): Promise<Page> {
const params = new URLSearchParams({
action: "query",
prop: "extracts",
explaintext: "true",
redirects: redirect ? "1" : "0",
format: "json",
titles: page,
});
const response = await fetch(`${this.baseUrl}?${params.toString()}`);
if (!response.ok) throw new Error("Network response was not ok");
const data: PageResult = await response.json();
const { pages } = data.query;
const pageId = Object.keys(pages)[0];
return pages[pageId];
}
}