In [1]:
from dotenv import load_dotenv
import os
from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics import TextAnalyticsClient
import pandas as pd
import numpy as np

In [2]:
load_dotenv()
endpoint = os.getenv("AZURE_WORKSPACE_ENDPOINT")
key = os.getenv("AZURE_WORKSPACE_KEY")
endpoint,key

('https://eastus.api.cognitive.microsoft.com/',
 '8dfeb4ae18f843f9a14bf0f27d1e7daa')

In [3]:
text_analytics_client = TextAnalyticsClient(endpoint=endpoint,credential=AzureKeyCredential(key=key))

In [4]:
data = pd.read_csv('Data/Musical_instruments_reviews.csv')

In [5]:
reviews= [i[0] for i in data.head(10).values]
print(reviews)

["Not much to write about here, but it does exactly what it's supposed to. filters out the pop sounds. now my recordings are much more crisp. it is one of the lowest prices pop filters on amazon so might as well buy it, they honestly work the same despite their pricing,", "The product does exactly as it should and is quite affordable.I did not realized it was double screened until it arrived, so it was even better than I had expected.As an added bonus, one of the screens carries a small hint of the smell of an old grape candy I used to buy, so for reminiscent's sake, I cannot stop putting the pop filter next to my nose and smelling it after recording. :DIf you needed a pop filter, this will work just as well as the expensive ones, and it may even come with a pleasing aroma like mine did!Buy this product! :]", 'The primary job of this device is to block the breath that would otherwise produce a popping sound, while allowing your voice to pass through with no noticeable reduction of volu

# Sentiment Analysis

In [6]:
sentiment_analysis = text_analytics_client.analyze_sentiment(documents=reviews,language="en")

In [7]:
for sentiment in sentiment_analysis:
    print(sentiment.sentiment,sentiment.confidence_scores)

neutral {'positive': 0.22, 'neutral': 0.76, 'negative': 0.02}
positive {'positive': 0.85, 'neutral': 0.1, 'negative': 0.05}
positive {'positive': 0.96, 'neutral': 0.04, 'negative': 0.0}
positive {'positive': 1.0, 'neutral': 0.0, 'negative': 0.0}
positive {'positive': 0.99, 'neutral': 0.0, 'negative': 0.0}
positive {'positive': 0.99, 'neutral': 0.0, 'negative': 0.0}
positive {'positive': 0.79, 'neutral': 0.09, 'negative': 0.12}
mixed {'positive': 0.49, 'neutral': 0.01, 'negative': 0.5}
positive {'positive': 0.89, 'neutral': 0.01, 'negative': 0.1}
mixed {'positive': 0.54, 'neutral': 0.04, 'negative': 0.42}


# Text Summarization

In [8]:
poller = text_analytics_client.begin_extract_summary(documents=reviews)
extract_summary_result = poller.result()

In [9]:
summaries = []
for result in extract_summary_result:          
    if result.kind == "ExtractiveSummarization":
        summaries.append("".join([sentence.text for sentence in result.sentences]))
    elif result.is_error is True:
        print(f'Error with {result.error.code}')            
print(summaries)

["Not much to write about here, but it does exactly what it's supposed to.filters out the pop sounds.it is one of the lowest prices pop filters on amazon so might as well buy it, they honestly work the same despite their pricing,", "I did not realized it was double screened until it arrived, so it was even better than I had expected.As an added bonus, one of the screens carries a small hint of the smell of an old grape candy I used to buy, so for reminiscent's sake, I cannot stop putting the pop filter next to my nose and smelling it after recording.:DIf you needed a pop filter, this will work just as well as the expensive ones, and it may even come with a pleasing aroma like mine did!Buy this product!", 'The double cloth filter blocks the pops and lets the voice through with no coloration.The metal clamp mount attaches to the mike stand secure enough to keep it attached.The goose neck needs a little coaxing to stay where you put it.', 'Nice windscreen protects my MXL mic and prevents 

In [10]:
len(summaries)

10

# Entity Recognition

In [11]:
batch_1_entity_recognition_result = text_analytics_client.recognize_entities(documents=reviews[0:5],language="en")
batch_2_entity_recognition_result = text_analytics_client.recognize_entities(documents=reviews[5:],language="en")

In [12]:
for result in batch_1_entity_recognition_result:
    for entity in result.entities:        
        print(f"Entity: {entity.text}, Category: {entity.category}, Confidence Score: {entity.confidence_score}")

for result in batch_2_entity_recognition_result:
    for entity in result.entities:        
        print(f"Entity: {entity.text}, Category: {entity.category}, Confidence Score: {entity.confidence_score}")

Entity: write, Category: Skill, Confidence Score: 0.59
Entity: now, Category: DateTime, Confidence Score: 0.8
Entity: one, Category: Quantity, Confidence Score: 0.8
Entity: amazon, Category: Organization, Confidence Score: 0.98
Entity: expected.As, Category: Organization, Confidence Score: 0.9
Entity: one, Category: Quantity, Confidence Score: 0.8
Entity: grape candy, Category: Product, Confidence Score: 0.94
Entity: cent, Category: Quantity, Confidence Score: 0.8
Entity: pop filter, Category: Product, Confidence Score: 0.9
Entity: pop filter, Category: Product, Confidence Score: 0.97
Entity: may, Category: DateTime, Confidence Score: 0.8
Entity: double cloth filter, Category: Product, Confidence Score: 0.87
Entity: metal clamp mount, Category: Product, Confidence Score: 0.95
Entity: mike stand, Category: Product, Confidence Score: 0.96
Entity: windscreen, Category: Product, Confidence Score: 0.94
Entity: MXL mic, Category: Product, Confidence Score: 0.95
Entity: MXL, Category: Product

# Key Phrase Extraction

In [13]:
key_phrases = text_analytics_client.extract_key_phrases(documents=reviews,language="en")

In [14]:
for result in key_phrases:
    for display_result in result.items():
        if display_result[0]=='key_phrases':
            print("Key Phrases: ",display_result[1])

Key Phrases:  ['lowest prices pop filters', 'pop sounds', 'recordings', 'pricing']
Key Phrases:  ['old grape candy', 'small hint', 'pop filter', 'pleasing aroma', 'product', 'bonus', 'screens', 'smell', 'sake', 'nose']
Key Phrases:  ['double cloth filter', 'primary job', 'popping sound', 'noticeable reduction', 'high frequencies', 'metal clamp', 'mike stand', 'goose neck', 'device', 'breath', 'voice', 'volume', 'pops', 'coloration', 'little']
Key Phrases:  ['Nice windscreen', 'MXL mic', 'careful positioning', 'pops', 'thing', 'gooseneck', 'clamp']
Key Phrases:  ['pop filter', 'studio filter', 'vocals', 'pops']
Key Phrases:  ['heavy cord', 'gold connectors', 'Bass', 'instructions', 'harm']
Key Phrases:  ['good reason', 'lifetime warranty', 'Simple fact', 'monster cables', 'years', 'price', 'cost']
Key Phrases:  ['high end Planet Waves cable', 'Fender Strat jack', 'new Monster cable', 'Fender Amp', 'guitar cable', 'pedal chain', 'pedal board', 'input jacks', 'output', 'money', 'eighties'

# Linked Entity Recognition

In [15]:
linked_entities_batch_1 = text_analytics_client.recognize_linked_entities(documents=reviews[0:5],language="en")
linked_entities_batch_2 = text_analytics_client.recognize_linked_entities(documents=reviews[5:],language="en")

In [16]:
for result in linked_entities_batch_1:
    if result.get('entities') != []:
        for display_result in result.get('entities'):
            print(f"Entity: {display_result.name} URL: {display_result.url}")
for result in linked_entities_batch_2:
    if result.get('entities') != []:
        for display_result in result.get('entities'):
            print(f"Entity: {display_result.name} URL: {display_result.url}")            

Entity: Data Interchange Format URL: https://en.wikipedia.org/wiki/Data_Interchange_Format
Entity: Stone Temple Pilots URL: https://en.wikipedia.org/wiki/Stone_Temple_Pilots
Entity: Goose URL: https://en.wikipedia.org/wiki/Goose
Entity: MusicXML URL: https://en.wikipedia.org/wiki/MusicXML
Entity: Love (Beatles album) URL: https://en.wikipedia.org/wiki/Love_(Beatles_album)
Entity: Gold URL: https://en.wikipedia.org/wiki/Gold
Entity: Bass guitar URL: https://en.wikipedia.org/wiki/Bass_guitar
Entity: Fender amplifier URL: https://en.wikipedia.org/wiki/Fender_amplifier
Entity: Monster Cable URL: https://en.wikipedia.org/wiki/Monster_Cable
Entity: Planet Waves URL: https://en.wikipedia.org/wiki/Planet_Waves
Entity: Once (film) URL: https://en.wikipedia.org/wiki/Once_(film)
Entity: Fender Stratocaster URL: https://en.wikipedia.org/wiki/Fender_Stratocaster
Entity: Wind wave URL: https://en.wikipedia.org/wiki/Wind_wave
Entity: Epiphone Sheraton URL: https://en.wikipedia.org/wiki/Epiphone_Shera

In [17]:
pii_entities_batch_1 =text_analytics_client.recognize_pii_entities(documents=reviews[0:5],language="en")
pii_entities_batch_2 =text_analytics_client.recognize_pii_entities(documents=reviews[5:],language="en")

In [18]:
for result in pii_entities_batch_1:
    if result.entities !=[]:
        for display_result in result.entities:
            print(f"Entity: {display_result.text} Category: {display_result.category} Confidence: {display_result.confidence_score}")
for result in pii_entities_batch_2:
    if result.entities !=[]:
        for display_result in result.entities:
            print(f"Entity: {display_result.text} Category: {display_result.category} Confidence: {display_result.confidence_score}")            

Entity: now Category: DateTime Confidence: 0.8
Entity: expected.As Category: Organization Confidence: 0.9
Entity: may Category: DateTime Confidence: 0.8
Entity: last night Category: DateTime Confidence: 0.8
Entity: now Category: DateTime Confidence: 0.8
Entity: the 1980's Category: DateTime Confidence: 0.8
Entity: the eighties Category: DateTime Confidence: 0.8
Entity: now Category: DateTime Confidence: 0.8
Entity: the day Category: DateTime Confidence: 0.8
Entity: Planet Waves Category: Organization Confidence: 0.85
Entity: Monster Category: Organization Confidence: 0.63
Entity: Monster Cables Category: Organization Confidence: 0.35
