Skip to content

Commit

Permalink
fix: pymupdf must be optional because is AGPLv3 licensed (#11896)
Browse files Browse the repository at this point in the history
  • Loading branch information
nicoloboschi authored Mar 13, 2024
1 parent 270778e commit 46f1e13
Show file tree
Hide file tree
Showing 20 changed files with 36 additions and 53 deletions.
8 changes: 3 additions & 5 deletions docs/examples/agent/openai_retrieval_benchmark.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install llama-index-readers-file\n",
"%pip install llama-index-readers-file pymupdf\n",
"%pip install llama-index-agent-openai\n",
"%pip install llama-index-llms-openai"
]
Expand Down Expand Up @@ -74,8 +74,7 @@
"\n",
"data/llama2.pdf 100%[===================>] 13.03M 141KB/s in 1m 48s \n",
"\n",
"2023-11-08 21:55:42 (123 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n",
"\n"
"2023-11-08 21:55:42 (123 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n"
]
}
],
Expand Down Expand Up @@ -198,8 +197,7 @@
"\n",
"data/llama2_eval_qr 100%[===================>] 59.23K --.-KB/s in 0.02s \n",
"\n",
"2023-11-08 22:20:12 (2.87 MB/s) - ‘data/llama2_eval_qr_dataset.json’ saved [60656/60656]\n",
"\n"
"2023-11-08 22:20:12 (2.87 MB/s) - ‘data/llama2_eval_qr_dataset.json’ saved [60656/60656]\n"
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"source": [
"%pip install llama-index-llms-gradient\n",
"%pip install llama-index-llms-openai\n",
"%pip install llama-index-readers-file\n",
"%pip install llama-index-readers-file pymupdf\n",
"%pip install llama-index-finetuning"
]
},
Expand Down
5 changes: 2 additions & 3 deletions docs/examples/finetuning/openai_fine_tuning_functions.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"%pip install llama-index-finetuning\n",
"%pip install llama-index-llms-openai\n",
"%pip install llama-index-finetuning-callbacks\n",
"%pip install llama-index-readers-file\n",
"%pip install llama-index-readers-file pymupdf\n",
"%pip install llama-index-program-openai"
]
},
Expand Down Expand Up @@ -407,8 +407,7 @@
"\n",
"data/llama2.pdf 100%[===================>] 13.03M 229KB/s in 45s \n",
"\n",
"2023-10-04 23:47:25 (298 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n",
"\n"
"2023-10-04 23:47:25 (298 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n"
]
}
],
Expand Down
5 changes: 2 additions & 3 deletions docs/examples/low_level/evaluation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install llama-index-readers-file\n",
"%pip install llama-index-readers-file pymupdf\n",
"%pip install llama-index-llms-openai"
]
},
Expand All @@ -62,8 +62,7 @@
"\n",
"data/llama2.pdf 100%[===================>] 13.03M 1.56MB/s in 9.3s \n",
"\n",
"2023-09-19 00:05:25 (1.40 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n",
"\n"
"2023-09-19 00:05:25 (1.40 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n"
]
}
],
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/low_level/fusion_retriever.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install llama-index-readers-file\n",
"%pip install llama-index-readers-file pymupdf\n",
"%pip install llama-index-llms-openai"
]
},
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/low_level/oss_ingestion_retrieval.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install llama-index-readers-file\n",
"%pip install llama-index-readers-file pymupdf\n",
"%pip install llama-index-vector-stores-postgres\n",
"%pip install llama-index-embeddings-huggingface\n",
"%pip install llama-index-llms-llama-cpp"
Expand Down
7 changes: 2 additions & 5 deletions docs/examples/low_level/response_synthesis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install llama-index-readers-file\n",
"%pip install llama-index-readers-file pymupdf\n",
"%pip install llama-index-vector-stores-pinecone\n",
"%pip install llama-index-llms-openai"
]
Expand Down Expand Up @@ -386,7 +386,6 @@
"*****Response******:\n",
"\n",
"RLHF used both model-based and human-based evaluation to select the best-performing models among several ablations. Model-based evaluation was used to measure the robustness of the reward model by collecting a test set of prompts for both helpfulness and safety, and asking three annotators to judge the quality of the answers based on a 7-point Likert scale. Human evaluation was used to validate major model versions. Additionally, a more general reward was trained to ensure the measure wouldn't diverge from the human preferences. Results showed that the reward models were well calibrated with the human preference annotations.\n",
"\n",
"\n"
]
}
Expand Down Expand Up @@ -485,9 +484,7 @@
"---------------------\n",
"Given the context information and not prior knowledge, answer the query.\n",
"Query: Can you tell me about results from RLHF using both model-based and human-based evaluation?\n",
"Answer: \n",
"\n",
"\n"
"Answer: \n"
]
}
],
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/low_level/retrieval.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install llama-index-readers-file\n",
"%pip install llama-index-readers-file pymupdf\n",
"%pip install llama-index-vector-stores-pinecone\n",
"%pip install llama-index-embeddings-openai"
]
Expand Down
5 changes: 2 additions & 3 deletions docs/examples/low_level/router.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install llama-index-readers-file\n",
"%pip install llama-index-readers-file pymupdf\n",
"%pip install llama-index-program-openai\n",
"%pip install llama-index-llms-openai"
]
Expand Down Expand Up @@ -692,8 +692,7 @@
"\n",
"data/llama2.pdf 100%[===================>] 13.03M 1.50MB/s in 9.5s \n",
"\n",
"2023-09-17 23:37:22 (1.37 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n",
"\n"
"2023-09-17 23:37:22 (1.37 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n"
]
}
],
Expand Down
10 changes: 3 additions & 7 deletions docs/examples/low_level/vector_store.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install llama-index-readers-file\n",
"%pip install llama-index-readers-file pymupdf\n",
"%pip install llama-index-embeddings-openai"
]
},
Expand Down Expand Up @@ -690,9 +690,7 @@
"In this section, we describe our approach to safety fine-tuning, including safety categories, annotation\n",
"guidelines, and the techniques we use to mitigate safety risks. We employ a process similar to the general\n",
"fine-tuning methods as described in Section 3, with some notable differences related to safety concerns.\n",
"----------------\n",
"\n",
"\n"
"----------------\n"
]
}
],
Expand Down Expand Up @@ -774,9 +772,7 @@
"Better Long-Tail Safety Robustness without Hurting Helpfulness\n",
"Safety is inherently a long-tail problem,\n",
"where the challenge comes from a small number of very specific cases.\n",
"----------------\n",
"\n",
"\n"
"----------------\n"
]
}
],
Expand Down
5 changes: 2 additions & 3 deletions docs/examples/param_optimizer/param_optimizer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"source": [
"%pip install llama-index-llms-openai\n",
"%pip install llama-index-embeddings-openai\n",
"%pip install llama-index-readers-file\n",
"%pip install llama-index-readers-file pymupdf\n",
"%pip install llama-index-experimental-param-tuner"
]
},
Expand Down Expand Up @@ -66,8 +66,7 @@
"\n",
"data/llama2.pdf 100%[===================>] 13.03M 533KB/s in 36s \n",
"\n",
"2023-11-04 00:17:10 (376 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n",
"\n"
"2023-11-04 00:17:10 (376 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n"
]
}
],
Expand Down
5 changes: 2 additions & 3 deletions docs/examples/prompts/emotion_prompt.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"outputs": [],
"source": [
"%pip install llama-index-llms-openai\n",
"%pip install llama-index-readers-file"
"%pip install llama-index-readers-file pymupdf"
]
},
{
Expand Down Expand Up @@ -192,8 +192,7 @@
"\n",
"data/llama2_eval_qr 100%[===================>] 59.23K --.-KB/s in 0.04s \n",
"\n",
"2023-11-04 00:34:10 (1.48 MB/s) - ‘data/llama2_eval_qr_dataset.json’ saved [60656/60656]\n",
"\n"
"2023-11-04 00:34:10 (1.48 MB/s) - ‘data/llama2_eval_qr_dataset.json’ saved [60656/60656]\n"
]
}
],
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/prompts/prompt_optimization.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"outputs": [],
"source": [
"%pip install llama-index-llms-openai\n",
"%pip install llama-index-readers-file"
"%pip install llama-index-readers-file pymupdf"
]
},
{
Expand Down
5 changes: 2 additions & 3 deletions docs/examples/prompts/prompts_rag.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"outputs": [],
"source": [
"%pip install llama-index-llms-openai\n",
"%pip install llama-index-readers-file"
"%pip install llama-index-readers-file pymupdf"
]
},
{
Expand Down Expand Up @@ -130,8 +130,7 @@
"\n",
"data/llama2.pdf 100%[===================>] 13.03M 1.50MB/s in 10s \n",
"\n",
"2023-10-28 23:19:49 (1.31 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n",
"\n"
"2023-10-28 23:19:49 (1.31 MB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n"
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"outputs": [],
"source": [
"%pip install llama-index-embeddings-openai\n",
"%pip install llama-index-readers-file\n",
"%pip install llama-index-readers-file pymupdf\n",
"%pip install llama-index-llms-openai"
]
},
Expand Down
8 changes: 3 additions & 5 deletions docs/examples/retrievers/auto_merging_retriever.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"outputs": [],
"source": [
"%pip install llama-index-llms-openai\n",
"%pip install llama-index-readers-file"
"%pip install llama-index-readers-file pymupdf"
]
},
{
Expand Down Expand Up @@ -353,8 +353,7 @@
"text": [
"> Merging 4 nodes into parent node.\n",
"> Parent node id: caf5f81c-842f-46a4-b679-6be584bd6aff.\n",
"> Parent node text: We conduct RLHF by first collecting human preference data for safety similar to Section 3.2.2: an...\n",
"\n"
"> Parent node text: We conduct RLHF by first collecting human preference data for safety similar to Section 3.2.2: an...\n"
]
}
],
Expand Down Expand Up @@ -684,8 +683,7 @@
"text": [
"> Merging 4 nodes into parent node.\n",
"> Parent node id: 3671b20d-ea5e-4afc-983e-02be6ee8302d.\n",
"> Parent node text: We conduct RLHF by first collecting human preference data for safety similar to Section 3.2.2: an...\n",
"\n"
"> Parent node text: We conduct RLHF by first collecting human preference data for safety similar to Section 3.2.2: an...\n"
]
}
],
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/retrievers/composable_retrievers.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"%pip install llama-index-retrievers-bm25\n",
"%pip install llama-index-storage-docstore-redis\n",
"%pip install llama-index-storage-docstore-dynamodb\n",
"%pip install llama-index-readers-file"
"%pip install llama-index-readers-file pymupdf"
]
},
{
Expand Down
5 changes: 2 additions & 3 deletions docs/examples/retrievers/ensemble_retrieval.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"source": [
"%pip install llama-index-llms-openai\n",
"%pip install llama-index-postprocessor-cohere-rerank\n",
"%pip install llama-index-readers-file"
"%pip install llama-index-readers-file pymupdf"
]
},
{
Expand Down Expand Up @@ -159,8 +159,7 @@
"\n",
"data/llama2.pdf 100%[===================>] 13.03M 521KB/s in 42s \n",
"\n",
"2023-09-28 12:57:20 (320 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n",
"\n"
"2023-09-28 12:57:20 (320 KB/s) - ‘data/llama2.pdf’ saved [13661300/13661300]\n"
]
}
],
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# PyMuPDF Loader

```bash
pip install llama-index-readers-file
pip install llama-index-readers-file pymupdf
```

This loader extracts text from a local PDF file using the `PyMuPDF` Python library. If `metadata` is passed as True while calling `load` function; extracted documents will include basic metadata such as page numbers, file path and total number of pages in pdf.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,13 @@ license = "MIT"
maintainers = ["FarisHijazi", "Haowjy", "ephe-meral", "hursh-desai", "iamarunbrahma", "jon-chuang", "mmaatouk", "ravi03071991", "sangwongenip", "thejessezhang"]
name = "llama-index-readers-file"
readme = "README.md"
version = "0.1.10"
version = "0.1.11"

[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
llama-index-core = "^0.10.1"
pymupdf = "^1.23.21"
# pymupdf is AGPLv3-licensed, so it's optional
pymupdf = {optional = true, version = "^1.23.21"}
bs4 = "^0.0.2"
beautifulsoup4 = "^4.12.3"
pypdf = "^4.0.1"
Expand Down

0 comments on commit 46f1e13

Please sign in to comment.