# NLP with a simple interface

## Import needed libraries

In [78]:
import os
import io
import openai
import base64
import requests
import json
from typing import List

from IPython.display import Image, display, HTML
from PIL import Image


from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv("secrets.env", raise_error_if_not_found=True))

openai.api_key = os.environ["OPENAI_API_KEY"]
HF_API_TOKEN = os.environ["HF_API_TOKEN"]

## Create the summarization call

In [79]:
# make a call to the openAI API
def get_completion(inputs:str):
	response = openai.completions.create(
		model="gpt-3.5-turbo-instruct",
		prompt=f"Summarize the following text:/n----/n{inputs}",
		max_tokens=1000
	)
	return response.choices[0].text

# a text to try it on
text = '''The tower is 324 metres (1,063 ft) tall, about the same height
        as an 81-storey building, and the tallest structure in Paris. 
        Its base is square, measuring 125 metres (410 ft) on each side. 
        During its construction, the Eiffel Tower surpassed the Washington 
        Monument to become the tallest man-made structure in the world,
        a title it held for 41 years until the Chrysler Building
        in New York City was finished in 1930. It was the first structure 
        to reach a height of 300 metres. Due to the addition of a broadcasting 
        aerial at the top of the tower in 1957, it is now taller than the 
        Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the 
        Eiffel Tower is the second tallest free-standing structure in France 
        after the Millau Viaduct.'''

prt(get_completion(text))

('\n'
 '\n'
 'The Eiffel Tower is a 324 metre tall structure in Paris, making it the tallest structure in the '
 'city. It has a square base measuring 125 metres on each side. At the time of its construction, '
 'it was the tallest man-made structure in the world and held this title for 41 years until the '
 'Chrysler Building was built in 1930. It was also the first structure to reach a height of 300 '
 'metres. Since the addition of a broadcasting aerial in 1957, it is now taller than the Chrysler '
 'Building by 5.2 metres. Excluding transmitters, it is the second tallest free-standing structure '
 'in France after the Millau Viaduct.')


## Getting started with gradio gr.Interface

In [80]:
import gradio as gr
def summarize(inputs:str):
	return get_completion(inputs=inputs)

# create the demo
demo = gr.Interface(
	fn=summarize,
	inputs=gr.Textbox(
		placeholder = "your text here", 
		label="Text to summarize",
		lines=6,
		),
	outputs=gr.Textbox(
		label="Summary",
		lines=3,
		),
	title="Text summarizer",
	description="This app summarize text using openAI `GPT-3.5-turbo-instruct`model"
)

demo.launch(share=False)

gr.close_all()


Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7861
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7861


## Building a Named Entity Recognition app

### Using openAI GPT function calls

In [81]:
# As we don't have access to the API call to bert-base-NER, let's try to recreate it using openAI function calls
def ner(input:str):
	'''
	return the names and type of entities in the given text as a list of dictionary
	'''

	# the schema for the funtion call
	ner_gpt_functions = [
		{
			'name': 'ner-gpt',
			'description': 'predict the named entity label for each token in a text sequence, indicating the type of entity it represents.',
			'parameters': {
				'type': 'object',
				'properties': {
					'B-MISC': {
						'type': 'string',
						'description': "Beginning of a miscellaneous entity right after another miscellaneous entity"
					},
					'I-MISC': {
						'type': 'string',
						'description': 'Name of the miscellaneous entity'
					},
					'B-PER': {
						'type': 'string',
						'description': "Beginning of a person's name right after another person's name"
					},
					'I-PER': {
						'type': 'string',
						'description': 'Name of the person'
					},
					'B-ORG': {
						'type': 'string',
						'description': 'Beginning of an organization right after another organization'
					},
					'I-ORG': {
						'type': 'string',
						'description':'Name of the organization'
					},
					'B-LOC': {
						'type': 'string',
						'description': 'Beginning of location right after another location'
					},
					'I-LOC': {
						'type': 'string',
						'description':'Name of the location'
					},
					
				}
			}
		}
	]

	# call the function with the openAI API
	response = openai.chat.completions.create(
    model="gpt-4",
    messages=[
		{
			"role": "user", 
			"content": input
			}],
    functions=ner_gpt_functions,
    function_call="auto",
	)

	# format the answer to match the expected structure
	json_response = json.loads(response.choices[0].message.function_call.arguments)
	mydict = dict(json_response)
	output: List(dict(str, str)) = []
	for key, value in mydict.items():
		output.append(
			{
				"entities": str(key),
				"text": str(value)
			}
		)
	
	# return the input text with the detected entities	
	return {
		"text":input,
		"entities": output
		}

In [82]:
# # close all already open ports
# gr.close_all()

# # create the app
# demo = gr.Interface(
# 	fn=ner,
# 	inputs=[gr.Textbox(label="Text to find entities", lines = 2)],
# 	outputs=[gr.HighlightedText(label="Text with highlighted entities")],
# 	title="Named Entities Recognition using GPT-4",
# 	description="Find entities in a text, using 'GPT-4' with function calls under the hood!",
# 	allow_flagging = "never",
# 	# below some examples for the user to try to see what the app is doing
# 	examples=["Hi, my name is Sylvain, I live in Germany and I work at Hines",
# 		   "Wikipedia is a foundation that develop a community managed encyclopedie"]
# )

# # launch the app!
# demo.launch()

Well it does not work very well. It seems that I need the start token and probably the end token for each word. It is probably doable with regEx, but I should try this late #TODO. Now, let's try with Bert-NER instead.

### Using BERT-BASE-NER via Hugging Face API

In [83]:
def ner(input:str):

	API_URL = "https://api-inference.huggingface.co/models/dslim/bert-base-NER"
	headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}

	# make the API call
	response = requests.post(API_URL, headers=headers, json=input)
	output = response.json()
	
	# return the formated response
	return {
		"text":input,
		"entities":output,
	}

# define the app
app = gr.Interface(
	fn=ner,
	inputs=gr.Textbox(label="write your text here", lines=3),
	outputs=gr.HighlightedText(label="Text with highlighted entities", combine_adjacent=True),
	title="Named Entity Recognition, by Sylvain Hellin",
	description="Highlight the named entities (like persons, firms) from a given Text, using 'BERT-BASE-NER' under the hood",
	allow_flagging="never",
	examples=[
		"Wikipedia is a free online encyclopedia that is collaboratively written and maintained by a community of volunteers, known as Wikipedians, through open collaboration and the use of a wiki-based editing system called MediaWiki.",
		"Alan Mathison Turing was an English mathematician, computer scientist, logician, cryptanalyst, and theoretical biologist. He was a key figure in breaking the Nazi Enigma code during World War II, which significantly contributed to the Allied victory."
	]
)

app.launch()


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [84]:
gr.close_all()

Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7861
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7860
Closing server running on port: 7861
Closing server running on port: 7860
Closing server running on port: 7860
