In [1]:
# Imports the Google Cloud client library
from google.cloud import language_v1
# Instantiates a client
client = language_v1.LanguageServiceClient()

In [2]:
# The useful ones are analyze_entities, moderateText, classifyText
def extract_sections(client, document):
    sections = client.classify_text(request={"document": document})
    sections_scores = sorted([(cat.name, cat.confidence) for cat in sections.categories if cat.confidence > 0.5], reverse=True, key=lambda x: x[1])
    return sections_scores

In [3]:
def extract_categories(client, document):
    categories = client.moderate_text(request={"document": document})
    categories_scores = sorted([(cat.name, cat.confidence) for cat in categories.moderation_categories if cat.confidence > 0.5], reverse=True, key=lambda x: x[1])
    return categories_scores

In [4]:
def analyse_entities(client, document):
    entity = client.analyze_entities(request={"document": document})
    raw_entities = {}
    for en in entity.entities:
        type = str(en.type_)[5:].lower()
        if type not in raw_entities:
            raw_entities[type] = []
        raw_entities[type].append(en)
            
    entities = {}
    for tag_category, tags in raw_entities.items():
        tag_ls = [(en.name, en.salience) for en in raw_entities[tag_category]]
        tag_ls.sort(reverse=True, key=lambda x: x[1])
        entities[tag_category] = tag_ls
    return entities

In [5]:
def extract_entities(text):        
    document = language_v1.types.Document(
        content=text, type_=language_v1.types.Document.Type.PLAIN_TEXT
    )
    cleaned_entities = analyse_entities(client, document)
    categories = extract_categories(client, document)
    sections = extract_sections(client, document)
    properties = {}
    properties['entities'] = cleaned_entities
    properties['categories'] = categories
    properties['sections'] = sections
    return properties

In [6]:
text = '''GENEVA – The remains of a climber discovered in the Swiss Alps in 2022 have been identified as \
            those of a British mountaineer who went missing 52 years ago, local police said on Thursday.\
            It is the latest in a series of discoveries of remains of long-missing climbers revealed as the Alps’ glaciers melt and recede because of global warming.\
            The climber was reported missing in July 1971, \
            but search teams at the time turned up nothing, said police in the canton of Valais, south-west Switzerland.Then on Aug 22, 2022,\
            two climbers found human remains on the Chessjengletscher glacier near Saas-Fee,\
            an Alpine village in the Saas Valley.It took a year to identify the person as experts worked their \
            way through the case files of missing climbers.Finally, with the help of Interpol Manchester and the police in Scotland,\
            a relative was found and a DNA sample allowed them to identify the British mountaineer\
            , \the Swiss police said in a statement.\
            The climber was formally identified on Aug 30.Increasing numbers of human remains, some of them of climbers missing for decades,\
            have been discovered in recent years as glaciers in the Alps melt because of global warming.In late July, \
            the remains of a German climber who went missing in 1986 were discovered on another Swiss glacier. AFP                  ",
            '''
properties = extract_entities(text)

In [7]:
properties

{'entities': {'person': [('climber', 0.30005401372909546),
   ('mountaineer', 0.16543155908584595),
   ('police', 0.025899428874254227),
   ('police', 0.021555233746767044),
   ('climber', 0.015148167498409748),
   ('climbers', 0.013487414456903934),
   ('experts', 0.013271426782011986),
   ('climbers', 0.013096349313855171),
   ('climbers', 0.006331899203360081),
   ('person', 0.0052139549516141415),
   ('some', 0.003916362766176462),
   ('Chessjengletscher', 0.003360557835549116),
   ('police', 0.0021465886384248734),
   ('relative', 0.0017238332657143474)],
  'other': [('remains', 0.06364940106868744),
   ('human remains', 0.041991885751485825),
   ('latest', 0.03244724124670029),
   ('glaciers', 0.012956592254340649),
   ('remains', 0.011855159886181355),
   ('numbers', 0.011418490670621395),
   ('glaciers', 0.005248390603810549),
   ('help', 0.0052139549516141415),
   ('way', 0.0052139549516141415),
   ('case files', 0.0052139549516141415),
   ('remains', 0.004459355492144823),
  