-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
searchapi.ts
178 lines (158 loc) Β· 5.26 KB
/
searchapi.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import { Document } from "@langchain/core/documents";
import { getEnvironmentVariable } from "@langchain/core/utils/env";
import { BaseDocumentLoader } from "@langchain/core/document_loaders/base";
type JSONPrimitive = string | number | boolean | null;
type JSONValue = JSONPrimitive | JSONObject | JSONArray;
interface JSONObject {
[key: string]: JSONValue;
}
interface JSONArray extends Array<JSONValue> {}
/**
* SearchApiParameters Type Definition.
*
* For more parameters and supported search engines, refer specific engine documentation:
* Google - https://www.searchapi.io/docs/google
* Google News - https://www.searchapi.io/docs/google-news
* Google Scholar - https://www.searchapi.io/docs/google-scholar
* YouTube Transcripts - https://www.searchapi.io/docs/youtube-transcripts
* and others.
*
*/
type SearchApiParameters = {
[key: string]: JSONValue;
};
/**
* Class representing a document loader for loading search results from
* the SearchApi. It extends the BaseDocumentLoader class.
* @example
* ```typescript
* const loader = new SearchApiLoader({
* q: "{query}",
* apiKey: "{apiKey}",
* engine: "google",
* });
* const docs = await loader.load();
* ```
*/
export class SearchApiLoader extends BaseDocumentLoader {
private apiKey: string;
private parameters: SearchApiParameters;
constructor(params: SearchApiParameters) {
super();
const { apiKey = getEnvironmentVariable("SEARCHAPI_API_KEY") } = params;
if (typeof apiKey !== "string") {
throw new Error("Invalid type for apiKey. Expected string.");
}
if (!apiKey) {
throw new Error(
"SearchApi API key not set. You can set it as SEARCHAPI_API_KEY in your .env file, or pass it to SearchApi."
);
}
this.apiKey = apiKey;
this.parameters = { ...params };
}
/**
* Builds the URL for the SearchApi search request.
* @returns The URL for the search request.
*/
public buildUrl(): string {
this.parameters = {
engine: "google",
api_key: this.apiKey,
...this.parameters,
};
const preparedParams: [string, string][] = Object.entries(this.parameters)
.filter(
([key, value]) =>
value !== undefined && value !== null && key !== "apiKey"
)
.map(([key, value]) => [key, `${value}`]);
const searchParams = new URLSearchParams(preparedParams);
return `https://www.searchapi.io/api/v1/search?${searchParams}`;
}
/**
* Extracts documents from the provided output.
* @param output - The output to extract documents from.
* @param responseType - The type of the response to extract documents from.
* @returns An array of Documents.
*/
private extractDocuments(output: unknown, responseType: string): Document[] {
const documents: Document[] = [];
const results = Array.isArray(output) ? output : [output];
if (responseType === "transcripts") {
const pageContent = results.map((result) => result.text).join("\n");
const metadata = {
source: "SearchApi",
responseType,
};
documents.push(new Document({ pageContent, metadata }));
} else {
for (const result of results) {
const pageContent = JSON.stringify(result);
const metadata = {
source: "SearchApi",
responseType,
};
documents.push(new Document({ pageContent, metadata }));
}
}
return documents;
}
/**
* Processes the response data from the SearchApi search request and converts it into an array of Documents.
* @param data - The response data from the SearchApi search request.
* @returns An array of Documents.
*/
public processResponseData(data: Record<string, unknown>): Document[] {
const documents: Document[] = [];
const responseTypes = [
"answer_box",
"shopping_results",
"knowledge_graph",
"organic_results",
"transcripts",
];
for (const responseType of responseTypes) {
if (responseType in data) {
documents.push(
...this.extractDocuments(data[responseType], responseType)
);
}
}
return documents;
}
/**
* Fetches the data from the provided URL and returns it as a JSON object.
* If an error occurs during the fetch operation, an exception is thrown with the error message.
* @param url - The URL to fetch data from.
* @returns A promise that resolves to the fetched data as a JSON object.
* @throws An error if the fetch operation fails.
*/
private async fetchData(url: string): Promise<Record<string, unknown>> {
const response = await fetch(url);
const data = await response.json();
if (data.error) {
throw new Error(
`Failed to load search results from SearchApi due to: ${data.error}`
);
}
return data;
}
/**
* Loads the search results from the SearchApi.
* @returns An array of Documents representing the search results.
* @throws An error if the search results could not be loaded.
*/
public async load(): Promise<Document[]> {
const url = this.buildUrl();
const data = await this.fetchData(url);
try {
return this.processResponseData(data);
} catch (error) {
console.error(error);
throw new Error(
`Failed to process search results from SearchApi: ${error}`
);
}
}
}