# Analyze text with Azure AI Language

The Azure AI Language service enables you to create intelligent apps and services that extract semantic information from text.

- Detect language from text
- Analyze text sentiment
- Extract key phrases, entities, and linked entities

# Importing libraries

In [1]:
from dotenv import load_dotenv
import os

In [2]:
from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics import TextAnalyticsClient

In [31]:
from functools import lru_cache
from IPython.display import display
import pandas as pd

# Loading configurations

In [4]:
load_dotenv()
ai_endpoint = os.getenv('AI_LANGUAGE_SERVICE_ENDPOINT')
ai_key = os.getenv('AI_LANGUAGE_SERVICE_KEY')

True

# Creating client using endpoint and key

In [6]:
credential = AzureKeyCredential(ai_key)
ai_client = TextAnalyticsClient(endpoint=ai_endpoint, credential=credential)

# Default analysis function

In [16]:
def default_analysis():
  # Analyze each text file in the reviews folder
  reviews_folder = 'reviews'
  for file_name in os.listdir(reviews_folder):
    # Read the file contents
    print(f'\n-------------\n{file_name}')
    text = open(os.path.join(reviews_folder, file_name),
                encoding='utf8').read()
    print(f'\n{text}')

    # Get language
    detected_language = ai_client.detect_language(documents=[text])[0]
    print(f'\nLanguage: {detected_language.primary_language.name}')

    # Get sentiment
    sentiment_analysis = ai_client.analyze_sentiment(documents=[text])[0]
    print(f"\nSentiment: {sentiment_analysis.sentiment}")

    # Get key phrases
    phrases = ai_client.extract_key_phrases(documents=[text])[0].key_phrases
    if len(phrases) > 0:
      print("\nKey Phrases:")
      for phrase in phrases:
        print(f'\t{phrase}')

    # Get entities
    entities = ai_client.recognize_entities(documents=[text])[0].entities
    if len(entities) > 0:
      print("\nEntities")
      for entity in entities:
        print(f'\t{entity.text} ({entity.category})')

    # Get linked entities
    entities = ai_client.recognize_linked_entities(documents=[text])[
        0].entities
    if len(entities) > 0:
      print("\nLinks")
      for linked_entity in entities:
        print(f'\t{linked_entity.name} ({linked_entity.url})')

In [19]:
# default_analysis()

# Functions for API calls

In [11]:
@lru_cache(maxsize=None)
def detect_language(text):
  return ai_client.detect_language(documents=[text])[0].primary_language.name


@lru_cache(maxsize=None)
def analyze_sentiment(text):
  return ai_client.analyze_sentiment(documents=[text])[0].sentiment


@lru_cache(maxsize=None)
def extract_key_phrases(text):
  return ai_client.extract_key_phrases(documents=[text])[0].key_phrases


@lru_cache(maxsize=None)
def recognize_entities(text):
  return [(entity.text, entity.category) for entity in ai_client.recognize_entities(documents=[text])[0].entities]


@lru_cache(maxsize=None)
def recognize_linked_entities(text):
  return [(linked_entity.name, linked_entity.url) for linked_entity in ai_client.recognize_linked_entities(documents=[text])[0].entities]

# Separate printing logic

In [28]:
def print_analysis(file_name, text):
  print(f'\n-------------\n{file_name}')
  print(f'\n{text}')

  language = detect_language(text)
  print(f'\nLanguage: {language}')

  sentiment = analyze_sentiment(text)
  print(f"\nSentiment: {sentiment}")

  phrases = extract_key_phrases(text)
  if phrases:
    print("\nKey Phrases:")
    for phrase in phrases:
      print(f'\t{phrase}')

  entities = recognize_entities(text)
  if entities:
    print("\nEntities:")
    for entity, category in entities:
      print(f'\t{entity} ({category})')

  linked_entities = recognize_linked_entities(text)
  if linked_entities:
    print("\nLinked Entities:")
    for linked_entity, url in linked_entities:
      print(f'\t{linked_entity} ({url})')

# Main logic [Improvised]

In [29]:
def main():
  reviews_folder = 'reviews'
  for file_name in os.listdir(reviews_folder):
    file_path = os.path.join(reviews_folder, file_name)
    with open(file_path, encoding='utf8') as file:
      text = file.read()
      print_analysis(file_name, text)

In [30]:
main()


-------------
review1.txt

Good Hotel and staff
The Royal Hotel, London, UK
3/2/2018
Clean rooms, good service, great location near Buckingham Palace and Westminster Abbey, and so on. We thoroughly enjoyed our stay. The courtyard is very peaceful and we went to a restaurant which is part of the same group and is Indian ( West coast so plenty of fish) with a Michelin Star. We had the taster menu which was fabulous. The rooms were very well appointed with a kitchen, lounge, bedroom and enormous bathroom. Thoroughly recommended.

Language: English

Sentiment: positive

Key Phrases:
	The Royal Hotel
	Good Hotel
	good service
	great location
	Buckingham Palace
	Westminster Abbey
	same group
	West coast
	Michelin Star
	taster menu
	enormous bathroom
	Clean rooms
	staff
	London
	UK
	stay
	courtyard
	restaurant
	part
	plenty
	fish
	kitchen
	lounge
	bedroom

Entities:
	staff (PersonType)
	Royal Hotel (Location)
	London (Location)
	London (Location)
	UK (Location)
	3/2/2018 (DateTime)
	rooms (L

# Print analysis dataframe

In [50]:
def print_analysis_df(text, file_name=''):
  data = {}

  data['Language'] = [detect_language(text)]
  data['Sentiment'] = [analyze_sentiment(text)]

  phrases = extract_key_phrases(text)
  data['Key Phrases'] = phrases

  entities = recognize_entities(text)
  data['Entities'] = [
      f"{entity} ({category})" for entity, category in entities]

  linked_entities = recognize_linked_entities(text)
  data['Linked Entities'] = [
      f"{linked_entity} ({url})" for linked_entity, url in linked_entities]

  max_length = max(len(data[key]) for key in data.keys())
  for key in data.keys():
    data[key] += [''] * (max_length - len(data[key]))

  df = pd.DataFrame(data)

  if file_name:
    display(file_name)
  display(df)

In [52]:
reviews_folder = 'reviews'
for file_name in os.listdir(reviews_folder):
  file_path = os.path.join(reviews_folder, file_name)
  with open(file_path, encoding='utf8') as file:
    text = file.read()
    print_analysis_df(text, file_name)

'review1.txt'

Unnamed: 0,Language,Sentiment,Key Phrases,Entities,Linked Entities
0,English,positive,The Royal Hotel,staff (PersonType),GOOD Music (https://en.wikipedia.org/wiki/GOOD...
1,,,Good Hotel,Royal Hotel (Location),Hotel (https://en.wikipedia.org/wiki/Hotel)
2,,,good service,London (Location),The Royal Hotel (https://en.wikipedia.org/wiki...
3,,,great location,London (Location),London (https://en.wikipedia.org/wiki/London)
4,,,Buckingham Palace,UK (Location),Buckingham Palace (https://en.wikipedia.org/wi...
5,,,Westminster Abbey,3/2/2018 (DateTime),Westminster Abbey (https://en.wikipedia.org/wi...
6,,,same group,rooms (Location),India (https://en.wikipedia.org/wiki/India)
7,,,West coast,Buckingham Palace (Location),West Coast Main Line (https://en.wikipedia.org...
8,,,Michelin Star,Westminster Abbey (Location),Michelin Guide (https://en.wikipedia.org/wiki/...
9,,,taster menu,stay (Event),


'review2.txt'

Unnamed: 0,Language,Sentiment,Key Phrases,Entities,Linked Entities
0,English,negative,The Royal Hotel,hotel (Location),The Royal Hotel (https://en.wikipedia.org/wiki...
1,,,Tired hotel,Hotel (Location),London (https://en.wikipedia.org/wiki/London)
2,,,old hotel,London (Location),British Museum (https://en.wikipedia.org/wiki/...
3,,,poor service,London (Location),
4,,,United Kingdom,United Kingdom (Location),
5,,,room furnishings,5/6/2018 (DateTime),
6,,,office rooms,hotel (Location),
7,,,flight home,since 1950 (DateTime),
8,,,British Museum,room (Location),
9,,,London,now (DateTime),


'review3.txt'

Unnamed: 0,Language,Sentiment,Key Phrases,Entities,Linked Entities
0,English,mixed,Golden Gate bridge,staff (PersonType),Lombardy (https://en.wikipedia.org/wiki/Lombardy)
1,,,The Lombard Hotel,road (Location),Hotel (https://en.wikipedia.org/wiki/Hotel)
2,,,The Marina district,Lombard Hotel (Location),San Francisco (https://en.wikipedia.org/wiki/S...
3,,,San Francisco Museum,San Francisco (Location),Chestnut Street (Philadelphia) (https://en.wik...
4,,,Lombard Street,San Francisco (Location),"Marina District, San Francisco (https://en.wik..."
5,,,busy road,San Francisco (Location),"Museum of Fine Arts, Boston (https://en.wikipe..."
6,,,Chestnut Street,USA (Location),Golden Gate Bridge (https://en.wikipedia.org/w...
7,,,trendy area,8/16/2018 (DateTime),Room (https://en.wikipedia.org/wiki/Room)
8,,,interesting houses,August (DateTime),Lombard Street (San Francisco) (https://en.wik...
9,,,Fine Arts,Chestnut Street (Location),


'review4.txt'

Unnamed: 0,Language,Sentiment,Key Phrases,Entities,Linked Entities
0,English,mixed,two queen size beds,rooms (Location),"Lombard, Illinois (https://en.wikipedia.org/wi..."
1,,,busy SIX lane street,Lombard Hotel (Location),Hotel (https://en.wikipedia.org/wiki/Hotel)
2,,,Golden Gate Bridge,San Francisco (Location),San Francisco (https://en.wikipedia.org/wiki/S...
3,,,The Lombard Hotel,San Francisco (Location),Lombard Street (San Francisco) (https://en.wik...
4,,,Lombard street,San Francisco (Location),Golden Gate Bridge (https://en.wikipedia.org/w...
5,,,San Francisco,USA (Location),Traffic (https://en.wikipedia.org/wiki/Traffic)
6,,,early morning,9/5/2018 (DateTime),Noise rock (https://en.wikipedia.org/wiki/Nois...
7,,,cotton balls,Hotel (Location),Room (https://en.wikipedia.org/wiki/Room)
8,,,Marina district,Lombard street (Address),"Marina District, San Francisco (https://en.wik..."
9,,,good places,SIX (Quantity),Presidio of San Francisco (https://en.wikipedi...


'review5.txt'

Unnamed: 0,Language,Sentiment,Key Phrases,Entities,Linked Entities
0,French,positive,hôtel agréable,hôtel (Location),United Nations (https://en.wikipedia.org/wiki/...
1,,,L'Hotel Buckingham,Hotel Buckingham (Location),L'Hôtel (https://en.wikipedia.org/wiki/L'Hôtel)
2,,,Londres,Londres (Location),Buckingham (https://en.wikipedia.org/wiki/Buck...
3,,,UK,Londres (Location),London (https://en.wikipedia.org/wiki/London)
4,,,personnel,Londres (Location),United Kingdom (https://en.wikipedia.org/wiki/...
5,,,chambres,UK (Location),
6,,,,hôtel (Location),
7,,,,personnel (PersonType),
8,,,,amical (Skill),
9,,,,chambres (Location),


# Analyzing real world reviews

Reviews are copied from Flipkart website's various product pages.

In [70]:
print_analysis_df("""
Value for money
R/W Speed almost as mentioned, gets around 450MB/s
Size extreme compact and very light
Only problem is the heating, its getting warmer during operation
""")

Unnamed: 0,Language,Sentiment,Key Phrases,Entities,Linked Entities
0,English,positive,R/W Speed,R (Skill),Value (economics) (https://en.wikipedia.org/wi...
1,,,Value,W Speed (Skill),Speed (https://en.wikipedia.org/wiki/Speed)
2,,,money,450MB/s (Quantity),Size (https://en.wikipedia.org/wiki/Size)
3,,,450MB,heating (Skill),
4,,,Size,operation (Event),
5,,,problem,,
6,,,heating,,
7,,,operation,,


In [71]:

print_analysis_df("""
Had the same issues as the other review stating this is rubbish. Files got automatically deleted and wifi and Bluetooth disconnected frequently. Trackpad and keyboard was dysfunctional at times. There was a lag after I press a key and then it was automatically pressed multiple times.
""")

Unnamed: 0,Language,Sentiment,Key Phrases,Entities,Linked Entities
0,English,negative,same issues,Bluetooth (Product),Computer file (https://en.wikipedia.org/wiki/C...
1,,,other review,Trackpad (Product),Bluetooth (https://en.wikipedia.org/wiki/Bluet...
2,,,rubbish,keyboard (Product),Touchpad (https://en.wikipedia.org/wiki/Touchpad)
3,,,Files,key (Product),
4,,,Trackpad,,
5,,,keyboard,,
6,,,times,,
7,,,lag,,


In [73]:
print_analysis_df("""
The product is good but as a company asus is worst. i have purchased this laptop and got a faulty product, now neither Flipkart is accepting replacement request nor the asus on premise service is responding. don't go for asus laptop, their service is really pour, i am already struggling for 1 month and still stuck
""")

Unnamed: 0,Language,Sentiment,Key Phrases,Entities,Linked Entities
0,English,negative,replacement request,laptop (Product),Flipkart (https://en.wikipedia.org/wiki/Flipkart)
1,,,company asus,now (DateTime),
2,,,faulty product,Flipkart (Organization),
3,,,premise service,asus (Organization),
4,,,laptop,asus laptop (Product),
5,,,Flipkart,1 month (DateTime),
6,,,1 month,,


In [74]:
print_analysis_df("""
This flagship phone is awesome, Camera quality liked it very much. I was earlier samsung family, now onwards OPPO family
""")

Unnamed: 0,Language,Sentiment,Key Phrases,Entities,Linked Entities
0,English,positive,flagship phone,flagship phone (Product),Camera phone (https://en.wikipedia.org/wiki/Ca...
1,,,Camera quality,Camera (Product),Oppo (https://en.wikipedia.org/wiki/Oppo)
2,,,samsung family,samsung (Organization),
3,,,OPPO family,now (DateTime),
4,,,,OPPO (Organization),
