In [31]:
#import needed libraries
from azure.ai.textanalytics import TextAnalyticsClient, ExtractiveSummaryAction
from azure.core.credentials import AzureKeyCredential

import os
from dotenv import load_dotenv

In [2]:
#load environment
load_dotenv()

True

In [3]:
#constants
LANGUAGE_ENDPOINT = os.getenv("LANGUAGE_ENDPOINT")
LANGUAGE_KEY = os.getenv("LANGUAGE_KEY")

In [4]:
credential = AzureKeyCredential(LANGUAGE_KEY)
client = TextAnalyticsClient(
    endpoint=LANGUAGE_ENDPOINT,
    credential=credential
)

In [None]:
txt_files = [i for i in os.listdir('Data/reviews')]

In [7]:
txt_files

['review1.txt', 'review2.txt', 'review3.txt', 'review4.txt', 'review5.txt']

In [None]:
review_data = {} #dict to hold all of the data
for file_name in txt_files:
    file_path = os.path.join('Data/reviews', file_name)
    with open(file_path, 'r', encoding='utf-8') as file:
        review_data[file_name] = file.read()

In [10]:
review_data

{'review1.txt': 'Good Hotel and staff\nThe Royal Hotel, London, UK\n3/2/2018\nClean rooms, good service, great location near Buckingham Palace and Westminster Abbey, and so on. We thoroughly enjoyed our stay. The courtyard is very peaceful and we went to a restaurant which is part of the same group and is Indian ( West coast so plenty of fish) with a Michelin Star. We had the taster menu which was fabulous. The rooms were very well appointed with a kitchen, lounge, bedroom and enormous bathroom. Thoroughly recommended.',
 'review2.txt': "Tired hotel with poor service\nThe Royal Hotel, London, United Kingdom\n5/6/2018\nThis is a old hotel (has been around since 1950's) and the room furnishings are average - becoming a bit old now and require changing. The internet didn't work and had to come to one of their office rooms to check in for my flight home. The website says it's close to the British Museum, but it's too far to walk.",
 'review3.txt': 'Good location and helpful staff, but on a

In [11]:
for k,v in review_data.items():
    print(f'{k} : \n {v} \n\n\n\n')

review1.txt : 
 Good Hotel and staff
The Royal Hotel, London, UK
3/2/2018
Clean rooms, good service, great location near Buckingham Palace and Westminster Abbey, and so on. We thoroughly enjoyed our stay. The courtyard is very peaceful and we went to a restaurant which is part of the same group and is Indian ( West coast so plenty of fish) with a Michelin Star. We had the taster menu which was fabulous. The rooms were very well appointed with a kitchen, lounge, bedroom and enormous bathroom. Thoroughly recommended. 




review2.txt : 
 Tired hotel with poor service
The Royal Hotel, London, United Kingdom
5/6/2018
This is a old hotel (has been around since 1950's) and the room furnishings are average - becoming a bit old now and require changing. The internet didn't work and had to come to one of their office rooms to check in for my flight home. The website says it's close to the British Museum, but it's too far to walk. 




review3.txt : 
 Good location and helpful staff, but on a bu

### Detect Language

In [12]:
review_data

{'review1.txt': 'Good Hotel and staff\nThe Royal Hotel, London, UK\n3/2/2018\nClean rooms, good service, great location near Buckingham Palace and Westminster Abbey, and so on. We thoroughly enjoyed our stay. The courtyard is very peaceful and we went to a restaurant which is part of the same group and is Indian ( West coast so plenty of fish) with a Michelin Star. We had the taster menu which was fabulous. The rooms were very well appointed with a kitchen, lounge, bedroom and enormous bathroom. Thoroughly recommended.',
 'review2.txt': "Tired hotel with poor service\nThe Royal Hotel, London, United Kingdom\n5/6/2018\nThis is a old hotel (has been around since 1950's) and the room furnishings are average - becoming a bit old now and require changing. The internet didn't work and had to come to one of their office rooms to check in for my flight home. The website says it's close to the British Museum, but it's too far to walk.",
 'review3.txt': 'Good location and helpful staff, but on a

In [18]:
detected_language = client.detect_language(documents = [review_data['review5.txt']])

In [19]:
detected_language



In [20]:
detected_languages = client.detect_language(documents= list(review_data.values()))

In [21]:
detected_languages



In [37]:
documents = [{"id": k, "text": v} for k,v in review_data.items()]

In [24]:
detected_languages_new_ids = client.detect_language(documents= documents)

In [25]:
detected_languages_new_ids



### Get Sentiment

In [38]:
sentimentResult = client.analyze_sentiment(documents=documents)

In [40]:
sentimentResult



### Get Key Phrases

In [41]:
phrases = client.extract_key_phrases(documents=documents)

In [42]:
phrases



### Extract Entities

In [43]:
entity_result = client.recognize_entities(documents=documents)

In [44]:
entity_result

[RecognizeEntitiesResult(id=review1.txt, entities=[CategorizedEntity(text=Hotel, category=Location, subcategory=None, length=5, offset=5, confidence_score=0.51), CategorizedEntity(text=staff, category=PersonType, subcategory=None, length=5, offset=15, confidence_score=0.93), CategorizedEntity(text=Royal Hotel, London, UK, category=Address, subcategory=None, length=23, offset=25, confidence_score=0.82), CategorizedEntity(text=3/2/2018, category=DateTime, subcategory=Date, length=8, offset=49, confidence_score=1.0), CategorizedEntity(text=Buckingham Palace, category=Location, subcategory=Structural, length=17, offset=105, confidence_score=0.99), CategorizedEntity(text=Westminster Abbey, category=Location, subcategory=None, length=17, offset=127, confidence_score=0.99), CategorizedEntity(text=courtyard, category=Location, subcategory=Structural, length=9, offset=193, confidence_score=0.72), CategorizedEntity(text=restaurant, category=Location, subcategory=Structural, length=10, offset=237

In [45]:
[x.entities for x in entity_result]

[[CategorizedEntity(text=Hotel, category=Location, subcategory=None, length=5, offset=5, confidence_score=0.51),
  CategorizedEntity(text=staff, category=PersonType, subcategory=None, length=5, offset=15, confidence_score=0.93),
  CategorizedEntity(text=Royal Hotel, London, UK, category=Address, subcategory=None, length=23, offset=25, confidence_score=0.82),
  CategorizedEntity(text=3/2/2018, category=DateTime, subcategory=Date, length=8, offset=49, confidence_score=1.0),
  CategorizedEntity(text=Buckingham Palace, category=Location, subcategory=Structural, length=17, offset=105, confidence_score=0.99),
  CategorizedEntity(text=Westminster Abbey, category=Location, subcategory=None, length=17, offset=127, confidence_score=0.99),
  CategorizedEntity(text=courtyard, category=Location, subcategory=Structural, length=9, offset=193, confidence_score=0.72),
  CategorizedEntity(text=restaurant, category=Location, subcategory=Structural, length=10, offset=237, confidence_score=0.73),
  Categor

### Getting Linked Entities

In [46]:
Linked_entities = client.recognize_linked_entities(documents=documents)

In [47]:
Linked_entities

[RecognizeLinkedEntitiesResult(id=review1.txt, entities=[LinkedEntity(name=GOOD Music, matches=[LinkedEntityMatch(confidence_score=0.01, text=Good, length=4, offset=0)], language=en, data_source_entity_id=GOOD Music, url=https://en.wikipedia.org/wiki/GOOD_Music, data_source=Wikipedia, bing_entity_search_api_id=6f7d0967-026b-1507-79a9-c4e66b497716), LinkedEntity(name=Hotel, matches=[LinkedEntityMatch(confidence_score=0.01, text=Hotel, length=5, offset=5)], language=en, data_source_entity_id=Hotel, url=https://en.wikipedia.org/wiki/Hotel, data_source=Wikipedia, bing_entity_search_api_id=04997f43-c747-bdd9-e5fe-3fa762f7de51), LinkedEntity(name=The Royal Hotel, matches=[LinkedEntityMatch(confidence_score=0.67, text=The Royal Hotel, length=15, offset=21)], language=en, data_source_entity_id=The Royal Hotel, url=https://en.wikipedia.org/wiki/The_Royal_Hotel, data_source=Wikipedia, bing_entity_search_api_id=cab45def-b5e6-6ac4-ce02-697c74a3e578), LinkedEntity(name=London, matches=[LinkedEntity

In [48]:
[x.entities for x in Linked_entities]

[[LinkedEntity(name=GOOD Music, matches=[LinkedEntityMatch(confidence_score=0.01, text=Good, length=4, offset=0)], language=en, data_source_entity_id=GOOD Music, url=https://en.wikipedia.org/wiki/GOOD_Music, data_source=Wikipedia, bing_entity_search_api_id=6f7d0967-026b-1507-79a9-c4e66b497716),
  LinkedEntity(name=Hotel, matches=[LinkedEntityMatch(confidence_score=0.01, text=Hotel, length=5, offset=5)], language=en, data_source_entity_id=Hotel, url=https://en.wikipedia.org/wiki/Hotel, data_source=Wikipedia, bing_entity_search_api_id=04997f43-c747-bdd9-e5fe-3fa762f7de51),
  LinkedEntity(name=The Royal Hotel, matches=[LinkedEntityMatch(confidence_score=0.67, text=The Royal Hotel, length=15, offset=21)], language=en, data_source_entity_id=The Royal Hotel, url=https://en.wikipedia.org/wiki/The_Royal_Hotel, data_source=Wikipedia, bing_entity_search_api_id=cab45def-b5e6-6ac4-ce02-697c74a3e578),
  LinkedEntity(name=London, matches=[LinkedEntityMatch(confidence_score=0.92, text=London, UK, len

### Get Text Summary

In [32]:
document = [
        "The extractive summarization feature uses natural language processing techniques to locate key sentences in an unstructured text document. "
        "These sentences collectively convey the main idea of the document. This feature is provided as an API for developers. " 
        "They can use it to build intelligent solutions based on the relevant information extracted to support various use cases. "
        "Extractive summarization supports several languages. It is based on pretrained multilingual transformer models, part of our quest for holistic representations. "
        "It draws its strength from transfer learning across monolingual and harness the shared nature of languages to produce models of improved quality and efficiency. "
    ]

In [33]:
poller = client.begin_analyze_actions(documents= document,
                                       actions=[
            ExtractiveSummaryAction(max_sentence_count=1)
        ])

In [34]:
document_results = poller.result()

In [36]:
for result in document_results:
    extract_summary_result = result[0]  # first document, first result
    if extract_summary_result.is_error:
        print("...Is an error with code '{}' and message '{}'".format(
            extract_summary_result.code, extract_summary_result.message
        ))
    else:
        print("Summary extracted: \n{}".format(
            " ".join([sentence.text for sentence in extract_summary_result.sentences]))
        )

Summary extracted: 
The extractive summarization feature uses natural language processing techniques to locate key sentences in an unstructured text document.
