In [2]:
! pip install sentence-transformers==2.2.2
! pip install transformers==4.35.2
! pip install optimum==1.15.0
! pip install accelerate==0.25.0
! pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/

Collecting sentence-transformers==2.2.2
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[K     |████████████████████████████████| 85 kB 8.2 MB/s  eta 0:00:01
[?25hCollecting huggingface-hub>=0.4.0
  Downloading huggingface_hub-0.20.3-py3-none-any.whl (330 kB)
[K     |████████████████████████████████| 330 kB 66.5 MB/s eta 0:00:01
Collecting scikit-learn
  Downloading scikit_learn-1.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.1 MB)
[K     |████████████████████████████████| 11.1 MB 116.2 MB/s eta 0:00:01
[?25hCollecting scipy
  Downloading scipy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.5 MB)
[K     |████████████████████████████████| 34.5 MB 100.9 MB/s eta 0:00:01
[?25hCollecting sentencepiece
  Downloading sentencepiece-0.1.99-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 92.0 MB/s eta 0:00:01
[?25hCollecting torch>=1.6.0
  Downloading torch-2.1.2-cp38-cp3

In [1]:
import torch
from auto_gptq import exllama_set_max_input_length
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

torch.cuda.empty_cache()

model_name_or_path = "TheBloke/neural-chat-7B-v3-2-GPTQ"
revision = "gptq-4bit-32g-actorder_True"
# To use a different branch, change revision
# For example: revision="gptq-4bit-32g-actorder_True"

model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path, device_map="auto", revision=revision
)

model = exllama_set_max_input_length(model, max_input_length=5120)

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

model = exllama_set_max_input_length(model, max_input_length=5120)
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=200,
    do_sample=True,
    temperature=0.1,
    top_k=40,
    top_p=0.95,
    repetition_penalty=1.15,
    # streamer=streamer,
)


system_prompt = "### System: You are a good JSON Interpreter who always answers \
the question based on the context only."

prompt_template = """
### User:
{query_str}

### Assistant:
"""


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Sample Question 2: Count the number of medically non-invasive assesments.

In [3]:
output = pipe("""### System: You are a good JSON Interpreter who always answers \
the question based on the context only.

### User: In the Given JSON,How many unique Selected_X are present for all the SOA_table_rows?
JSON : {
	"SOA_table_rows": [
		{
			"Assessments": "Informed consent/assent",
			"Selected_X": [
				"Screening  (-30 to -1)"
			]
		},
		{
			"Assessments": "Inclusion/exclusion",
			"Selected_X": [
				"Screening  (-30 to -1)",
				"Baseline (D 1)"
			]
		},
		{
			"Assessments": "Uroflowmetry  Assessment",
			"Selected_X": [
				"Screening  (-30 to -1)"
			]
		},
		{
			"Assessments": "Demographics,  medical/ADPKD history [{Superscript 'e' : ADPKD genetic history to be collected if available, although it is not specifically required.}] ",
			"Selected_X": [
				"Screening  (-30 to -1)"
			]
		},
		{
			"Assessments": "Tanner staging [{Superscript 'f' : A subject who reaches Stage 5 (both in pubic hair and genitalia) does not need to continue with Tanner Staging.}] ",
			"Selected_X": [
				"Baseline (D 1)",
				"M6 (±14 D)",
				"M12 [{Superscript 'b' : The Month 12 visit will serve as the Baseline visit for Phase B. The subject’s body weight at this visit will be used to determine the starting dose in Phase B.}] (D365)  (+14 D)/ End of  Treatment  (EoTx)c [{Superscript '' : Unknown}, {Superscript '' : Unknown}] d"
			]
		},
		{
			"Assessments": "MRI/ultrasound [{Superscript 'g' : MRI/ultrasound imaging: The baseline imaging used must be the same imaging used throughout the trial (Phase A and Phase B). It is recommended that subjects have their imaging done on the same day as the clinic visit; however imaging can be performed up to 30 days prior to the scheduled visit. The subject should be on IMP for 30 consecutive days prior to imaging (except screening). MRI/ultrasound is not required at the EoTx visit.}] ",
			"Selected_X": [
				"Screening  (-30 to -1)",
				"M12 [{Superscript 'b' : The Month 12 visit will serve as the Baseline visit for Phase B. The subject’s body weight at this visit will be used to determine the starting dose in Phase B.}] (D365)  (+14 D)/ End of  Treatment  (EoTx)c [{Superscript '' : Unknown}, {Superscript '' : Unknown}] d"
			]
		},
		{
			"Assessments": "Renal pelvic  measurement [{Superscript 'h' : Renal pelvic assessments will be done using ultrasound. The Month 1 assessment can be performed at any scheduled visit between Month 1 and Month 3 but cannot be conducted until 30 days after the subject has started IMP, see Section 3.7.4.5.}] ",
			"Selected_X": [
				"Screening  (-30 to -1)",
				"M1 (Wk  4) (7 D)",
				"M12 [{Superscript 'b' : The Month 12 visit will serve as the Baseline visit for Phase B. The subject’s body weight at this visit will be used to determine the starting dose in Phase B.}] (D365)  (+14 D)/ End of  Treatment  (EoTx)c [{Superscript '' : Unknown}, {Superscript '' : Unknown}] d"
			]
		},
		{
			"Assessments": "Vital signs [{Superscript 'i' : Vital signs at each visit include seated heart rate and blood pressure (systolic and diastolic). Temperature will be collected only at the Phase A baseline visit.}] ",
			"Selected_X": [
				"Screening  (-30 to -1)",
				"Baseline (D 1)",
				"Wk 1  (-1/+3 D)",
				"M1 (Wk  4) (7 D)",
				"M6 (±14 D)",
				"M12 [{Superscript 'b' : The Month 12 visit will serve as the Baseline visit for Phase B. The subject’s body weight at this visit will be used to determine the starting dose in Phase B.}] (D365)  (+14 D)/ End of  Treatment  (EoTx)c [{Superscript '' : Unknown}, {Superscript '' : Unknown}] d"
			]
		},
		{
			"Assessments": "12-lead electrocardiogram",
			"Selected_X": [
				"Screening  (-30 to -1)"
			]
		},
		{
			"Assessments": "Body height and  weight/growth  percentiles [{Superscript 'j' : Body weight should be measured post-void. Growth percentile is not required at the baseline, Wk 1 and M1 visits. Body weight is not required at the Wk 1 and M1 visits. .}] ",
			"Selected_X": [
				"Screening  (-30 to -1)",
				"Baseline (D 1)",
				"Wk 1  (-1/+3 D)",
				"M1 (Wk  4) (7 D)",
				"M6 (±14 D)",
				"M12 [{Superscript 'b' : The Month 12 visit will serve as the Baseline visit for Phase B. The subject’s body weight at this visit will be used to determine the starting dose in Phase B.}] (D365)  (+14 D)/ End of  Treatment  (EoTx)c [{Superscript '' : Unknown}, {Superscript '' : Unknown}] d"
			]
		},
		{
			"Assessments": "Serum  chemistry/hematology/ urinalysis [{Superscript 'k' : Central laboratory serum laboratory tests will be performed for all visits (only monthly safety labs may be collected at a local laboratory). Fasting is required at the screening visit. Fasting is recommended for all other visits, but is not required. Upon request, if clinically indicated and approved by the medical monitor, subjects may have the following evaluations in addition to the protocol-specified chemistry panel: serum calcium, phosphorus, parathyroid hormone, vitamin D, and bicarbonate levels.}] ",
			"Selected_X": [
				"Screening  (-30 to -1)",
				"M6 (±14 D)",
				"M12 [{Superscript 'b' : The Month 12 visit will serve as the Baseline visit for Phase B. The subject’s body weight at this visit will be used to determine the starting dose in Phase B.}] (D365)  (+14 D)/ End of  Treatment  (EoTx)c [{Superscript '' : Unknown}, {Superscript '' : Unknown}] d"
			]
		},
		{
			"Assessments": "Liver function  tests/creatinine [{Superscript 'k' : Central laboratory serum laboratory tests will be performed for all visits (only monthly safety labs may be collected at a local laboratory). Fasting is required at the screening visit. Fasting is recommended for all other visits, but is not required. Upon request, if clinically indicated and approved by the medical monitor, subjects may have the following evaluations in addition to the protocol-specified chemistry panel: serum calcium, phosphorus, parathyroid hormone, vitamin D, and bicarbonate levels.}, {Superscript 'l' : A full liver function panel (AST, ALT, alkaline phosphatase [ALP], bilirubin, total [BT]) will be done at screening. At subsequent visits, LFTs include AST and ALT only, unless otherwise clinically indicated. Laboratory samples for LFTs, serum sodium, and serum creatinine will be collected at baseline only if the baseline visit occurs > 7 days after the Screening visit. Monthly visits include LFTs, serum creatinine and pregnancy test. See Section 5.4 for handling abnormal LFTs.}] ",
			"Selected_X": [
				"Screening  (-30 to -1)",
				"Baseline (D 1)",
				"Wk 1  (-1/+3 D)",
				"M1 (Wk  4) (7 D)",
				"M6 (±14 D)",
				"Monthly Safety  Visits:  M2, 3, 4,  5, 7, 8, 9,  10, 11  (±4D) [{Superscript 'a' : Monthly safety visits: If it is more convenient for the subject, laboratory tests may be collected at a local laboratory; a clinic visit is not required. The PI is required to review monthly safety labs and record them in the electronic case report form (eCRF). Protocol 156-12-298 Confidential - Proprietary Information 40 Amendment 1, 08 Aug 2016}]",
				"M12 [{Superscript 'b' : The Month 12 visit will serve as the Baseline visit for Phase B. The subject’s body weight at this visit will be used to determine the starting dose in Phase B.}] (D365)  (+14 D)/ End of  Treatment  (EoTx)c [{Superscript '' : Unknown}, {Superscript '' : Unknown}] d",
				"Follow-Up [{Superscript 'c' : Any subject who discontinues IMP during Phase A will enter the Follow-up period and then be followed every 6 months through Phase A. The 14 day follow-up visit is a telephone contct only, a clinic visit is not required. Subjects who do not complete Phase A on treatment are not eligible to enter Phase B.}, {Superscript 'd' : An assessment of vital status will be conducted on subjects who terminate IMP prior to the Phase A Month 12 visit and do not agree to additional safety follow-up but do not withdraw consent for continued telephone contact. See Section 3.7.4.8.}] || 7 D Post  Last Dose  (+2 D)"
			]
		},
		{
			"Assessments": "Serum sodium [{Superscript 'k' : Central laboratory serum laboratory tests will be performed for all visits (only monthly safety labs may be collected at a local laboratory). Fasting is required at the screening visit. Fasting is recommended for all other visits, but is not required. Upon request, if clinically indicated and approved by the medical monitor, subjects may have the following evaluations in addition to the protocol-specified chemistry panel: serum calcium, phosphorus, parathyroid hormone, vitamin D, and bicarbonate levels.}] ",
			"Selected_X": [
				"Screening  (-30 to -1)",
				"Baseline (D 1)",
				"Wk 1  (-1/+3 D)",
				"M1 (Wk  4) (7 D)",
				"M6 (±14 D)",
				"Monthly Safety  Visits:  M2, 3, 4,  5, 7, 8, 9,  10, 11  (±4D) [{Superscript 'a' : Monthly safety visits: If it is more convenient for the subject, laboratory tests may be collected at a local laboratory; a clinic visit is not required. The PI is required to review monthly safety labs and record them in the electronic case report form (eCRF). Protocol 156-12-298 Confidential - Proprietary Information 40 Amendment 1, 08 Aug 2016}]"
			]
		},
		{
			"Assessments": "Urine osmolality [{Superscript 'n' : Spot urine sample collected predose on the morning of Day 1, prior to the morning dose on Week 1, and prior to the morning dose on Month 1. See Section 3.7.5.2 for sample collection details. Protocol 156-12-298 Confidential - Proprietary Information 41 Amendment 1, 08 Aug 2016}] ",
			"Selected_X": [
				"Baseline (D 1)",
				"Wk 1  (-1/+3 D)",
				"M1 (Wk  4) (7 D)"
			]
		},
		{
			"Assessments": "Urine specific gravity",
			"Selected_X": [
				"Baseline (D 1)",
				"Wk 1  (-1/+3 D)",
				"M1 (Wk  4) (7 D)"
			]
		},
		{
			"Assessments": "Urine or serum pregnancy  test [{Superscript 'o' : A urine or serum pregnancy test for females of childbearing potential (all females ≥ 12 years of age and females < 12 years of age who have started menstruating) will be performed during the study. On suspicion of pregnancy, an unscheduled urine or serum pregnancy testing will be performed. Positive urine pregnancy tests must be confirmed with a serum pregnancy test. Investigator (or appropriate site staff) is advised to counsel participants on the risk of pregnancy while participating in a clinical trial as well as ensuring the child understands how pregnancies occur and can be avoided. This should be documented in source records.}] ",
			"Selected_X": [
				"Screening  (-30 to -1)",
				"Baseline (D 1)",
				"M1 (Wk  4) (7 D)",
				"M6 (±14 D)",
				"Monthly Safety  Visits:  M2, 3, 4,  5, 7, 8, 9,  10, 11  (±4D) [{Superscript 'a' : Monthly safety visits: If it is more convenient for the subject, laboratory tests may be collected at a local laboratory; a clinic visit is not required. The PI is required to review monthly safety labs and record them in the electronic case report form (eCRF). Protocol 156-12-298 Confidential - Proprietary Information 40 Amendment 1, 08 Aug 2016}]",
				"M12 [{Superscript 'b' : The Month 12 visit will serve as the Baseline visit for Phase B. The subject’s body weight at this visit will be used to determine the starting dose in Phase B.}] (D365)  (+14 D)/ End of  Treatment  (EoTx)c [{Superscript '' : Unknown}, {Superscript '' : Unknown}] d"
			]
		},
		{
			"Assessments": "Alcohol & drug screen [{Superscript 'p' : Alcohol/drug screen is performed per clinical judgment and institutional guidelines.}] ",
			"Selected_X": [
				"Screening  (-30 to -1)",
				"Baseline (D 1)"
			]
		},
		{
			"Assessments": "Physical examination [{Superscript 'q' : A full physical examination is required at screening for the purposes of inclusion into the trial. At all other visits, as needed, a “directed” physical examination may be performed to focus on PKD-related signs and symptoms.}] ",
			"Selected_X": [
				"Screening  (-30 to -1)",
				"Baseline (D 1)",
				"Wk 1  (-1/+3 D)",
				"M1 (Wk  4) (7 D)",
				"M6 (±14 D)",
				"M12 [{Superscript 'b' : The Month 12 visit will serve as the Baseline visit for Phase B. The subject’s body weight at this visit will be used to determine the starting dose in Phase B.}] (D365)  (+14 D)/ End of  Treatment  (EoTx)c [{Superscript '' : Unknown}, {Superscript '' : Unknown}] d"
			]
		},
		{
			"Assessments": "Randomization",
			"Selected_X": [
				"Baseline (D 1)"
			]
		},
		{
			"Assessments": "IMP administration [{Superscript 'r' : The first dose of IMP will be administered to the subject in the clinic. If the visit occurs in the afternoon, the first dose will be the PM dose of IMP. The last dose of Phase A will be taken the evening prior to the Month 12 visit.}] ",
			"Selected_X": [
				"Baseline (D 1)",
				"Wk 1  (-1/+3 D)",
				"M1 (Wk  4) (7 D)",
				"M6 (±14 D)"
			]
		},
		{
			"Assessments": "IMPdispensation [{Superscript 's' : If during the titration contact the determination is made to adjust the dose regimen, dose adjustments will be made using dispensed supply if possible, and dose instructions will be provided by the investigator. The subject’s current dose will be recorded at each visit.}] ",
			"Selected_X": [
				"Baseline (D 1)",
				"Wk 1  (-1/+3 D)",
				"M1 (Wk  4) (7 D)",
				"M6 (±14 D)",
				"Monthly Safety  Visits:  M2, 3, 4,  5, 7, 8, 9,  10, 11  (±4D) [{Superscript 'a' : Monthly safety visits: If it is more convenient for the subject, laboratory tests may be collected at a local laboratory; a clinic visit is not required. The PI is required to review monthly safety labs and record them in the electronic case report form (eCRF). Protocol 156-12-298 Confidential - Proprietary Information 40 Amendment 1, 08 Aug 2016}]",
				"Titration  Contact  (phone  call or  visit)"
			]
		},
		{
			"Assessments": "IMPreconciliation [{Superscript 't' : Drug reconciliation will be conducted at every visit. If additional drug supply is required for titration purposes, the site will arrange a subject visit for dispensation.}] ",
			"Selected_X": [
				"Wk 1  (-1/+3 D)",
				"M1 (Wk  4) (7 D)",
				"M6 (±14 D)",
				"Monthly Safety  Visits:  M2, 3, 4,  5, 7, 8, 9,  10, 11  (±4D) [{Superscript 'a' : Monthly safety visits: If it is more convenient for the subject, laboratory tests may be collected at a local laboratory; a clinic visit is not required. The PI is required to review monthly safety labs and record them in the electronic case report form (eCRF). Protocol 156-12-298 Confidential - Proprietary Information 40 Amendment 1, 08 Aug 2016}]",
				"M12 [{Superscript 'b' : The Month 12 visit will serve as the Baseline visit for Phase B. The subject’s body weight at this visit will be used to determine the starting dose in Phase B.}] (D365)  (+14 D)/ End of  Treatment  (EoTx)c [{Superscript '' : Unknown}, {Superscript '' : Unknown}] d"
			]
		},
		{
			"Assessments": "Palatability and  acceptability",
			"Selected_X": [
				"Baseline (D 1)"
			]
		},
		{
			"Assessments": "QoL questionnaires [{Superscript 'u' : Generic pediatric QoL questionnaires, as available will be assessed in subjects 12 years and older at baseline (pre-dose), Week 1, Months 1, 3, 6, and 12/EoTx prior to any blood draws. The Month 12 assessment should be completed prior to administration of Phase B IMP.}] ",
			"Selected_X": [
				"Screening  (-30 to -1)",
				"Wk 1  (-1/+3 D)",
				"M1 (Wk  4) (7 D)",
				"M6 (±14 D)",
				"Monthly Safety  Visits:  M2, 3, 4,  5, 7, 8, 9,  10, 11  (±4D) [{Superscript 'a' : Monthly safety visits: If it is more convenient for the subject, laboratory tests may be collected at a local laboratory; a clinic visit is not required. The PI is required to review monthly safety labs and record them in the electronic case report form (eCRF). Protocol 156-12-298 Confidential - Proprietary Information 40 Amendment 1, 08 Aug 2016}]",
				"M12 [{Superscript 'b' : The Month 12 visit will serve as the Baseline visit for Phase B. The subject’s body weight at this visit will be used to determine the starting dose in Phase B.}] (D365)  (+14 D)/ End of  Treatment  (EoTx)c [{Superscript '' : Unknown}, {Superscript '' : Unknown}] d"
			]
		},
		{
			"Assessments": "ADPKD outcomes",
			"Selected_X": [
				"Baseline (D 1)",
				"Wk 1  (-1/+3 D)",
				"M1 (Wk  4) (7 D)",
				"M6 (±14 D)",
				"M12 [{Superscript 'b' : The Month 12 visit will serve as the Baseline visit for Phase B. The subject’s body weight at this visit will be used to determine the starting dose in Phase B.}] (D365)  (+14 D)/ End of  Treatment  (EoTx)c [{Superscript '' : Unknown}, {Superscript '' : Unknown}] d"
			]
		},
		{
			"Assessments": "Daytime and nighttime  void collection [{Superscript 'v' : The number of daytime and nightime voids for all subjects 12 years and older will be collected during Phase A. Refer also to Section 3.7.6.1. Daytime and nighttime voids are not required for the EoTx visit and subsequent follow-up visits.}] ",
			"Selected_X": [
				"Baseline (D 1)",
				"Wk 1  (-1/+3 D)",
				"M1 (Wk  4) (7 D)",
				"M6 (±14 D)",
				"M12 [{Superscript 'b' : The Month 12 visit will serve as the Baseline visit for Phase B. The subject’s body weight at this visit will be used to determine the starting dose in Phase B.}] (D365)  (+14 D)/ End of  Treatment  (EoTx)c [{Superscript '' : Unknown}, {Superscript '' : Unknown}] d"
			]
		},
		{
			"Assessments": "PK & PD sample  collection [{Superscript 'w' : Dense PK and PD sampling will occur in a subset of subjects (see Section 3.7.5.1) after at least 1 month of dosing. These subjects will be hospitalized the evening prior to the scheduled PK sampling (typically a Friday evening) and discharged after the 24 hour PK sample collection (typically Sunday morning). Sparse PK sampling will occur in all subjects at the Week 1, Month 1, Month 6, and Month 12 visits.}] ",
			"Selected_X": [
				"Wk 1  (-1/+3 D)",
				"M1 (Wk  4) (7 D)",
				"M6 (±14 D)",
				"M12 [{Superscript 'b' : The Month 12 visit will serve as the Baseline visit for Phase B. The subject’s body weight at this visit will be used to determine the starting dose in Phase B.}] (D365)  (+14 D)/ End of  Treatment  (EoTx)c [{Superscript '' : Unknown}, {Superscript '' : Unknown}] d"
			]
		},
		{
			"Assessments": "24-hour fluid balance",
			"Selected_X": [
				"Wk 1  (-1/+3 D)"
			]
		},
		{
			"Assessments": "Concomitant medications",
			"Selected_X": [
				"Screening  (-30 to -1)",
				"Follow-Up [{Superscript 'c' : Any subject who discontinues IMP during Phase A will enter the Follow-up period and then be followed every 6 months through Phase A. The 14 day follow-up visit is a telephone contct only, a clinic visit is not required. Subjects who do not complete Phase A on treatment are not eligible to enter Phase B.}, {Superscript 'd' : An assessment of vital status will be conducted on subjects who terminate IMP prior to the Phase A Month 12 visit and do not agree to additional safety follow-up but do not withdraw consent for continued telephone contact. See Section 3.7.4.8.}] || 7 D Post  Last Dose  (+2 D)",
				"Follow-Up [{Superscript 'c' : Any subject who discontinues IMP during Phase A will enter the Follow-up period and then be followed every 6 months through Phase A. The 14 day follow-up visit is a telephone contct only, a clinic visit is not required. Subjects who do not complete Phase A on treatment are not eligible to enter Phase B.}, {Superscript 'd' : An assessment of vital status will be conducted on subjects who terminate IMP prior to the Phase A Month 12 visit and do not agree to additional safety follow-up but do not withdraw consent for continued telephone contact. See Section 3.7.4.8.}] || 14 D Post  Last Dose  (+2 D)"
			]
		},
		{
			"Assessments": "Adverse events",
			"Selected_X": [
				"Screening  (-30 to -1)",
				"Follow-Up [{Superscript 'c' : Any subject who discontinues IMP during Phase A will enter the Follow-up period and then be followed every 6 months through Phase A. The 14 day follow-up visit is a telephone contct only, a clinic visit is not required. Subjects who do not complete Phase A on treatment are not eligible to enter Phase B.}, {Superscript 'd' : An assessment of vital status will be conducted on subjects who terminate IMP prior to the Phase A Month 12 visit and do not agree to additional safety follow-up but do not withdraw consent for continued telephone contact. See Section 3.7.4.8.}] || 7 D Post  Last Dose  (+2 D)",
				"Follow-Up [{Superscript 'c' : Any subject who discontinues IMP during Phase A will enter the Follow-up period and then be followed every 6 months through Phase A. The 14 day follow-up visit is a telephone contct only, a clinic visit is not required. Subjects who do not complete Phase A on treatment are not eligible to enter Phase B.}, {Superscript 'd' : An assessment of vital status will be conducted on subjects who terminate IMP prior to the Phase A Month 12 visit and do not agree to additional safety follow-up but do not withdraw consent for continued telephone contact. See Section 3.7.4.8.}] || 14 D Post  Last Dose  (+2 D)"
			]
		}
	]
}
### Assistant:
""")

OutOfMemoryError: CUDA out of memory. Tried to allocate 6.47 GiB. GPU 0 has a total capacty of 22.19 GiB of which 6.29 GiB is free. Including non-PyTorch memory, this process has 15.89 GiB memory in use. Of the allocated memory 12.57 GiB is allocated by PyTorch, and 3.02 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [5]:
from llama_index import SimpleDirectoryReader
from llama_index.node_parser import SimpleNodeParser
from llama_index.schema import MetadataMode


reader = SimpleDirectoryReader(input_files = ["/home/ubuntu/open-llm/pdfs/X06-201-00001_Protocol_Amendment_4.pdf"])