In [None]:
import os
from IPython.display import HTML
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())  # read local .env file
hf_api_key = os.environ["HF_API_KEY"]

## Text Summarization

### API based function

In [2]:
# Helper function
import requests, json

# Summarization endpoint
def get_completion(
    inputs,
    parameters=None,
    endpoint_url="https://api-inference.huggingface.co/models/facebook/bart-large-cnn",
):
    headers = {
        "Authorization": f"Bearer {hf_api_key}",
        "Content-Type": "application/json",
    }
    data = {"inputs": inputs}
    if parameters is not None:
        data.update({"parameters": parameters})
    response = requests.request(
        "POST", endpoint_url, headers=headers, data=json.dumps(data)
    )
    return json.loads(response.content.decode("utf-8"))

In [3]:
text = """The tower is 324 metres (1,063 ft) tall, about the same height
        as an 81-storey building, and the tallest structure in Paris. 
        Its base is square, measuring 125 metres (410 ft) on each side. 
        During its construction, the Eiffel Tower surpassed the Washington 
        Monument to become the tallest man-made structure in the world,
        a title it held for 41 years until the Chrysler Building
        in New York City was finished in 1930. It was the first structure 
        to reach a height of 300 metres. Due to the addition of a broadcasting 
        aerial at the top of the tower in 1957, it is now taller than the 
        Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the 
        Eiffel Tower is the second tallest free-standing structure in France 
        after the Millau Viaduct."""

In [4]:
get_completion(text)

[{'summary_text': 'The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building. Its base is square, measuring 125 metres (410 ft) on each side. It is the second tallest free-standing structure in France after the Millau Viaduct.'}]

### Local model 

In [5]:
from transformers import pipeline

get_completion = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")


def summarize(input):
    output = get_completion(input)
    return output[0]["summary_text"]

Device set to use cpu


In [6]:
summarize(text)

' The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building . It is the tallest structure in Paris and the second tallest free-standing structure in France after the Millau Viaduct . It was the first structure in the world to reach a height of 300 metres .'

### Basic App

In [None]:
import gradio as gr
import os


def summarize(input_text):
    # Replace this with your actual function to get completion
    output = get_completion(input_text)  # Ensure this function is defined
    return output[0]["summary_text"]


# Create the Gradio interface
demo = gr.Interface(fn=summarize, inputs="text", outputs="text")

# Launch the Gradio interface
demo.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://8c69e321dffe66027a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Your max_length is set to 142, but your input_length is only 133. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=66)


### Tuning a few details of the app
- format the number of lines of input and output to emphasize expectations
- add title and description

In [None]:
gr.close_all()
demo = gr.Interface(
    fn=summarize,
    inputs=[gr.Textbox(label="Text to summarize", lines=6)],
    outputs=[gr.Textbox(label="Result", lines=3)],
    title="Text summarization with distilbart-cnn",
    description="Summarize any text using the `shleifer/distilbart-cnn-12-6` model under the hood!",
)
demo.launch(share=True)

Closing server running on port: 7860
* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://0f7da7bff176ddfcd3.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Your max_length is set to 142, but your input_length is only 133. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=66)


## NER app

In [9]:
from transformers import pipeline

get_completion = pipeline("ner", model="dslim/bert-base-NER")


def ner(input):
    output = get_completion(input)
    return {"text": input, "entities": output}

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu


In [10]:
text = "My name is Andrew, I'm building DeepLearningAI and I live in California"
get_completion(text)

[{'entity': 'B-PER',
  'score': np.float32(0.9990625),
  'index': 4,
  'word': 'Andrew',
  'start': 11,
  'end': 17},
 {'entity': 'B-ORG',
  'score': np.float32(0.9927857),
  'index': 10,
  'word': 'Deep',
  'start': 32,
  'end': 36},
 {'entity': 'I-ORG',
  'score': np.float32(0.99677867),
  'index': 11,
  'word': '##L',
  'start': 36,
  'end': 37},
 {'entity': 'I-ORG',
  'score': np.float32(0.9954496),
  'index': 12,
  'word': '##ear',
  'start': 37,
  'end': 40},
 {'entity': 'I-ORG',
  'score': np.float32(0.9959293),
  'index': 13,
  'word': '##ning',
  'start': 40,
  'end': 44},
 {'entity': 'I-ORG',
  'score': np.float32(0.8917463),
  'index': 14,
  'word': '##A',
  'start': 44,
  'end': 45},
 {'entity': 'I-ORG',
  'score': np.float32(0.5036117),
  'index': 15,
  'word': '##I',
  'start': 45,
  'end': 46},
 {'entity': 'B-LOC',
  'score': np.float32(0.99969244),
  'index': 20,
  'word': 'California',
  'start': 61,
  'end': 71}]

In [11]:
text = "I am Julie. I work as an artist and recently displayed my paintings in MoMA"
get_completion(text)

[{'entity': 'B-PER',
  'score': np.float32(0.9761178),
  'index': 3,
  'word': 'Julie',
  'start': 5,
  'end': 10},
 {'entity': 'B-ORG',
  'score': np.float32(0.94296),
  'index': 16,
  'word': 'Mo',
  'start': 71,
  'end': 73},
 {'entity': 'I-ORG',
  'score': np.float32(0.5930551),
  'index': 17,
  'word': '##MA',
  'start': 73,
  'end': 75}]

In [12]:
def ner(input):
    output = get_completion(input)
    return {"text": input, "entities": output}


gr.close_all()
demo = gr.Interface(
    fn=ner,
    inputs=[gr.Textbox(label="Text to find entities", lines=2)],
    outputs=[gr.HighlightedText(label="Text with entities")],
    title="NER with dslim/bert-base-NER",
    description="Find entities using the `dslim/bert-base-NER` model under the hood!",
    allow_flagging="never",
    # Here we introduce a new tag, examples, easy to use examples for your application
    examples=[
        "My name is Andrew and I live in California",
        "My name is Poli and work at HuggingFace",
    ],
)
demo.launch(share=True)



Closing server running on port: 7860
Closing server running on port: 7860
* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://8355b4121c426e9859.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [13]:
def merge_tokens(tokens):
    merged_tokens = []
    for token in tokens:
        if (
            merged_tokens
            and token["entity"].startswith("I-")
            and merged_tokens[-1]["entity"].endswith(token["entity"][2:])
        ):
            # If current token continues the entity of the last one, merge them
            last_token = merged_tokens[-1]
            last_token["word"] += token["word"].replace("##", "")
            last_token["end"] = token["end"]
            last_token["score"] = (last_token["score"] + token["score"]) / 2
        else:
            # Otherwise, add the token to the list
            merged_tokens.append(token)

    return merged_tokens


def ner(input):
    output = get_completion(input)
    merged_tokens = merge_tokens(output)
    return {"text": input, "entities": merged_tokens}


gr.close_all()
demo = gr.Interface(
    fn=ner,
    inputs=[gr.Textbox(label="Text to find entities", lines=2)],
    outputs=[gr.HighlightedText(label="Text with entities")],
    title="NER with dslim/bert-base-NER",
    description="Find entities using the `dslim/bert-base-NER` model under the hood!",
    allow_flagging="never",
    examples=[
        "My name is Andrew, I'm building DeeplearningAI and I live in California",
        "My name is Poli, I live in Vienna and work at HuggingFace",
    ],
)

demo.launch(share=True)

Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860




* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://9423a6ac9e78bd57f4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


