In [41]:
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
import json

try:
    # boto_session = boto3.Session(profile_name='ocicb-sandbox-alpha-dev-admin')
    # sess = sagemaker.Session(boto_session=boto_session)
    # role = sagemaker.get_execution_role()
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='NIAID-SageMakeFullAccess')['Role']['Arn']
    print(role)
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='NIAID-SageMakeFullAccess')['Role']['Arn']

def deploy_bart_large_cnn():
    
    # bart-large-cnn
    # Hub Model configuration. https://huggingface.co/models
    hub = {
    	'HF_MODEL_ID':'facebook/bart-large-cnn',
    	'HF_TASK':'summarization'
    }
    
    # create Hugging Face Model Class
    huggingface_model = HuggingFaceModel(
    	transformers_version='4.37.0',
    	pytorch_version='2.1.0',
    	py_version='py310',
    	env=hub,
    	role=role, 
    )

    # deploy model to SageMaker Inference
    predictor = huggingface_model.deploy(
    	initial_instance_count=1, # number of instances
    	instance_type='ml.m5.xlarge' # ec2 instance type
    )
    return predictor

def deploy_t5():
    # Hub Model configuration. https://huggingface.co/models
    hub = {
    	'HF_MODEL_ID':'google-t5/t5-base',
    	'SM_NUM_GPUS': json.dumps(1)
    }
    # create Hugging Face Model Class
    huggingface_model = HuggingFaceModel(
    	transformers_version='4.37.0',
    	pytorch_version='2.1.0',
    	py_version='py310',
    	env=hub,
    	role=role, 
    )
    
    # deploy model to SageMaker Inference
    predictor = huggingface_model.deploy(
    	initial_instance_count=1,
    	instance_type="ml.g5.2xlarge",
    	container_startup_health_check_timeout=300,
      )
    return predictor


arn:aws:iam::020571837677:role/NIAID-SageMakeFullAccess


In [42]:
predictor = deploy_t5()

-----------!

In [30]:
print(predictor)

HuggingFacePredictor: {'endpoint_name': 'huggingface-pytorch-inference-2024-07-29-18-29-03-005', 'sagemaker_session': <sagemaker.session.Session object at 0x2e570b650>, 'serializer': <sagemaker.base_serializers.JSONSerializer object at 0x2975e4a10>, 'deserializer': <sagemaker.base_deserializers.JSONDeserializer object at 0x2975e4bd0>}


In [45]:
from sagemaker.huggingface.model import HuggingFacePredictor, HuggingFaceModel

endpoint_name = "huggingface-pytorch-inference-2024-07-29-18-29-03-005"
predictor = HuggingFacePredictor(endpoint_name)
model = HuggingFaceModel(endpoint_name, py_version='py310', transformers_version='4.37.0', pytorch_version='2.1.0',)
print(model)


<sagemaker.huggingface.model.HuggingFaceModel object at 0x29c8a0f50>


In [46]:
summary = predictor.predict({
	"inputs": "The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.",
})


In [47]:
print(summary[0]['generated_text'])

the tallest structure in Paris., it is the tallest structure in Paris


In [52]:
import pymupdf
import fitz
from transformers import BartForConditionalGeneration, BartTokenizer
import textwrap

from transformers import T5ForConditionalGeneration, T5Tokenizer
tokenizer = T5Tokenizer.from_pretrained('t5-small')

def summarize_text(text):
    # Preprocess text for T5
    input_text = "summarize: USA is great country. USA has diverse population. USA has 50 states."
    # Generate summary
    summary_ids = predictor.predict({"inputs": input_text})
    print(summary_ids)
    
    
def summarize_pdf(file):
    print(file)
    doc = pymupdf.fitz(file)  # Open the PDF file
    text = ""
    for page_num in range(len(doc)):
        page = doc.load_page(page_num)  
        text += page.get_text()  
    print(len(text))
    return summarize_text(text)

summarize_pdf('./data/aiayn.pdf')

./data/aiayn.pdf
39593
[{'generated_text': 'USA is great country. USA has diverse population.'}]


In [35]:
import gradio as gr
# Create the Gradio interface
interface = gr.Interface(
    fn=summarize_pdf,
    inputs=gr.File(file_count="single", type="filepath", label="Upload PDF File"),
    outputs="text",
    title="PDF File Summarizer",
    description="Upload a single PDF file to summarize."
)

interface.launch(inline=True)

Running on local URL:  http://127.0.0.1:7868

To create a public link, set `share=True` in `launch()`.




/private/var/folders/tk/s70x7yfs2891bdbmc0zg65qx_b4w3m/T/gradio/53ad2a6f1a2ec53d745dbc4350fa1247d6f79972/NIAID_DAR-1718658969.1616802.pdf
2661
tensor([[21603,    10,     8,  5065,   222,  1809,    16,  1919,     5,     6,
            34,    19,     8,  5065,   222,  1809,    16,  1919,     1]])


Traceback (most recent call last):
  File "/Users/shresthap/Documents/Projects/rag/llama-langchain/.venv/lib/python3.11/site-packages/gradio/queueing.py", line 536, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/shresthap/Documents/Projects/rag/llama-langchain/.venv/lib/python3.11/site-packages/gradio/route_utils.py", line 276, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/shresthap/Documents/Projects/rag/llama-langchain/.venv/lib/python3.11/site-packages/gradio/blocks.py", line 1923, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/shresthap/Documents/Projects/rag/llama-langchain/.venv/lib/python3.11/site-packages/gradio/blocks.py", line 1508, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^

In [53]:
predictor.delete_endpoint()

In [55]:
import itertools

itertools.accumulate(itertools.cycle(map(ord, "Close")), initial=0)

<itertools.accumulate at 0x2aedfb600>

In [62]:
import pprint
from langchain_community.document_loaders import AsyncHtmlLoader
from langchain_community.document_loaders import AsyncChromiumLoader

from langchain_text_splitters import RecursiveCharacterTextSplitter

schema = {
    "properties": {
        "news_article_title": {"type": "string"},
        "news_article_summary": {"type": "string"},
    },
    "required": ["news_article_title", "news_article_summary"],
}
def scrape_with_playwright(urls, schema):
    loader = AsyncChromiumLoader(urls)
    docs = loader.load()
    bs_transformer = BeautifulSoupTransformer()
    docs_transformed = bs_transformer.transform_documents(
        docs, tags_to_extract=["span"]
    )
    print("Extracting content with LLM")

    # Grab the first 1000 tokens of the site
    splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=1000, chunk_overlap=0
    )
    splits = splitter.split_documents(docs_transformed)

    # Process the first split
    extracted_content = extract(schema=schema, content=splits[0].page_content)
    pprint.pprint(extracted_content)
    return extracted_content


urls = ["https://www.wsj.com"]
extracted_content = scrape_with_playwright(urls, schema=schema)

USER_AGENT environment variable not set, consider setting it to identify your requests.


RuntimeError: asyncio.run() cannot be called from a running event loop