diff --git a/packages/core/src/Node.ts b/packages/core/src/Node.ts index 8d8d045ab..99f01d2db 100644 --- a/packages/core/src/Node.ts +++ b/packages/core/src/Node.ts @@ -1,5 +1,6 @@ import { createSHA256, path, randomUUID } from "@llamaindex/env"; import _ from "lodash"; +import { Settings } from "./Settings.js"; export enum NodeRelationship { SOURCE = "SOURCE", @@ -208,7 +209,14 @@ export class TextNode extends BaseNode { getContent(metadataMode: MetadataMode = MetadataMode.NONE): string { const metadataStr = this.getMetadataStr(metadataMode).trim(); - return `${metadataStr}\n\n${this.text}`.trim(); + const fullText = `${metadataStr}\n\n${this.text}`.trim(); + if (Settings.chunkSize) { + if (fullText.length > Settings.chunkSize) { + console.warn(`Content ${this.id_} is too long, truncating`); + return this.text.slice(0, Settings.chunkSize); + } + } + return fullText; } getMetadataStr(metadataMode: MetadataMode): string { diff --git a/packages/core/src/readers/SimpleDirectoryReader.edge.ts b/packages/core/src/readers/SimpleDirectoryReader.edge.ts index a26eae4f9..0a96d558c 100644 --- a/packages/core/src/readers/SimpleDirectoryReader.edge.ts +++ b/packages/core/src/readers/SimpleDirectoryReader.edge.ts @@ -1,5 +1,5 @@ -import { fs, path } from "@llamaindex/env"; -import { Document, type Metadata } from "../Node.js"; +import { path } from "@llamaindex/env"; +import { Document } from "../Node.js"; import { walk } from "../storage/FileSystem.js"; import { TextFileReader } from "./TextFileReader.js"; import type { BaseReader } from "./type.js"; @@ -85,7 +85,7 @@ export class SimpleDirectoryReader implements BaseReader { continue; } - const fileDocs = await reader.loadData(filePath, fs); + const fileDocs = await reader.loadData(filePath); fileDocs.forEach(addMetaData(filePath)); // Observer can still cancel addition of the resulting docs from this file @@ -123,8 +123,8 @@ export class SimpleDirectoryReader implements BaseReader { } } -function addMetaData(filePath: string): (doc: Document) => void { - return (doc: Document) => { +function addMetaData(filePath: string): (doc: Document) => void { + return (doc: Document) => { doc.metadata["file_path"] = path.resolve(filePath); doc.metadata["file_name"] = path.basename(filePath); }; diff --git a/packages/core/tests/Embedding.test.ts b/packages/core/tests/Embedding.test.ts index ab863ead1..a70297c97 100644 --- a/packages/core/tests/Embedding.test.ts +++ b/packages/core/tests/Embedding.test.ts @@ -1,8 +1,4 @@ -import { - OpenAIEmbedding, - SimilarityType, - similarity, -} from "llamaindex/embeddings/index"; +import { OpenAIEmbedding, SimilarityType, similarity } from "llamaindex"; import { beforeAll, describe, expect, test } from "vitest"; import { mockEmbeddingModel } from "./utility/mockOpenAI.js";