-
Notifications
You must be signed in to change notification settings - Fork 380
/
Copy pathCSVReader.ts
56 lines (52 loc) · 1.91 KB
/
CSVReader.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import type { ParseConfig } from "papaparse";
import Papa from "papaparse";
import { Document } from "../Node.js";
import { FileReader } from "./type.js";
/**
* papaparse-based csv parser
* @class CSVReader
* @implements BaseReader
*/
export class PapaCSVReader extends FileReader {
private concatRows: boolean;
private colJoiner: string;
private rowJoiner: string;
private papaConfig?: ParseConfig;
/**
* Constructs a new instance of the class.
* @param {boolean} [concatRows=true] - whether to concatenate all rows into one document.If set to False, a Document will be created for each row.True by default.
* @param {string} [colJoiner=', '] - Separator to use for joining cols per row. Set to ", " by default.
* @param {string} [rowJoiner='\n'] - Separator to use for joining each row.Only used when `concat_rows=True`.Set to "\n" by default.
*/
constructor(
concatRows: boolean = true,
colJoiner: string = ", ",
rowJoiner: string = "\n",
papaConfig?: ParseConfig,
) {
super();
this.concatRows = concatRows;
this.colJoiner = colJoiner;
this.rowJoiner = rowJoiner;
this.papaConfig = papaConfig;
}
/**
* Loads data from csv files
* @param {string} file - The path to the file to load.
* @param {GenericFileSystem} [fs=DEFAULT_FS] - The file system to use for reading the file.
* @returns {Promise<Document[]>}
*/
async loadDataAsContent(fileContent: Buffer): Promise<Document[]> {
const result = Papa.parse(fileContent.toString("utf-8"), this.papaConfig);
const textList = result.data.map((row: any) => {
// Compatible with header row mode
const rowValues = Object.values(row).map((value) => String(value));
return rowValues.join(this.colJoiner);
});
if (this.concatRows) {
return [new Document({ text: textList.join(this.rowJoiner) })];
} else {
return textList.map((text) => new Document({ text }));
}
}
}