In [1]:
# Imports the Google Cloud client library
from google.cloud import language_v1
# Instantiates a client
client = language_v1.LanguageServiceClient()

In [2]:
# The useful ones are analyze_entities, moderateText, classifyText
def extract_sections(client, document):
    sections = client.classify_text(request={"document": document})
    sections_scores = sorted([(cat.name, cat.confidence) for cat in sections.categories if cat.confidence > 0.5], reverse=True, key=lambda x: x[1])
    return sections_scores

In [3]:
def extract_categories(client, document):
    categories = client.moderate_text(request={"document": document})
    categories_scores = sorted([(cat.name, cat.confidence) for cat in categories.moderation_categories if cat.confidence > 0.5], reverse=True, key=lambda x: x[1])
    return categories_scores

In [4]:
def analyse_entities(client, document):
    entity = client.analyze_entities(request={"document": document})
    raw_entities = {}
    for en in entity.entities:
        type = str(en.type_)[5:].lower()
        if type not in raw_entities:
            raw_entities[type] = []
        raw_entities[type].append(en)
            
    entities = {}
    for tag_category, tags in raw_entities.items():
        tag_ls = [(en.name, en.salience) for en in raw_entities[tag_category]]
        tag_ls.sort(reverse=True, key=lambda x: x[1])
        entities[tag_category] = tag_ls
    return entities

In [5]:
def extract_entities(text):        
    document = language_v1.types.Document(
        content=text, type_=language_v1.types.Document.Type.PLAIN_TEXT
    )
    cleaned_entities = analyse_entities(client, document)
    categories = extract_categories(client, document)
    sections = extract_sections(client, document)
    properties = {}
    properties['entities'] = cleaned_entities
    properties['categories'] = categories
    properties['sections'] = sections
    return properties

In [8]:
text = '''GENEVA – The remains of a climber discovered in the Swiss Alps in 2022 have been identified as \
            those of a British mountaineer who went missing 52 years ago, local police said on Thursday.\
            It is the latest in a series of discoveries of remains of long-missing climbers revealed as the Alps’ glaciers melt and recede because of global warming.\
            The climber was reported missing in July 1971, \
            but search teams at the time turned up nothing, said police in the canton of Valais, south-west Switzerland.Then on Aug 22, 2022,\
            two climbers found human remains on the Chessjengletscher glacier near Saas-Fee,\
            an Alpine village in the Saas Valley.It took a year to identify the person as experts worked their \
            way through the case files of missing climbers.Finally, with the help of Interpol Manchester and the police in Scotland,\
            a relative was found and a DNA sample allowed them to identify the British mountaineer\
            , \the Swiss police said in a statement.\
            The climber was formally identified on Aug 30.Increasing numbers of human remains, some of them of climbers missing for decades,\
            have been discovered in recent years as glaciers in the Alps melt because of global warming.In late July, \
            the remains of a German climber who went missing in 1986 were discovered on another Swiss glacier. AFP                  ",
            '''

new_text = """EU countries are still discussing the idea of a humanitarian ceasefire in the war between Israel and Hamas but there are different ways to get much-needed aid to Palestinians in Gaza, Swedish foreign minister Tobias Billstrom said on Monday."The discussions are ongoing, but the question really isn't about a ceasefire, but about how to bring aid forward and that can be done in very many different ways," he told reporters after a meeting of EU foreign ministers in Luxembourg. He said Sweden preferred a U.N. proposal for a humanitarian corridor.Earlier on Monday, EU foreign policy chief Josep Borrell voiced support for a "humanitarian pause" but some of the bloc's foreign ministers expressed reservations about the idea. REUTERS
"""
properties = extract_entities(new_text)

In [9]:
properties

{'entities': {'other': [('question', 0.13663709163665771),
   ('idea', 0.08708890527486801),
   ('aid', 0.04305387660861015),
   ('ways', 0.03299892693758011),
   ('ways', 0.02130267396569252),
   ('aid', 0.020042717456817627),
   ('support', 0.00680397218093276),
   ('pause', 0.00680397218093276),
   ('idea', 0.005423919763416052),
   ('reservations', 0.004764935467392206),
   ('proposal', 0.002986403414979577)],
  'location': [('countries', 0.0907101258635521),
   ('EU', 0.05727005749940872),
   ('Gaza', 0.017679721117019653),
   ('Monday.', 0.013600576668977737),
   ('Swedish', 0.011953997425734997),
   ('Luxembourg', 0.0036788482684642076),
   ('corridor.', 0.002986403414979577)],
  'event': [('ceasefire', 0.08708890527486801),
   ('war', 0.07688276469707489),
   ('ceasefire', 0.02319924160838127),
   ('discussions', 0.019669221714138985),
   ('meeting', 0.018732242286205292)],
  'person': [('Tobias Billstrom', 0.059143681079149246),
   ('Palestinians', 0.017679721117019653),
   ('

In [11]:
prev_tags = ['eu countries', 
             'swedish foreign minister tobias billstrom',
             'foreign policy chief josep borrell',
             'foreign ministers', 
             'eu', 'humanitarian corridor', 'humanitarian ceasefire', 
             'humanitarian pause', 
             'many different ways',
             'monday',
             '"the discussions',
             'israel', 'different ways',
             'much-needed aid',
             'idea',
             'reuters', 
             'aid', 
             'meeting', 
             'war', 
             'hamas']
