In [None]:

!pip install SpeechRecognition
!pip install pydub
!pip install spacy
!python3 -m spacy download en_core_web_sm


import pandas as pd

import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import speech_recognition as sr
from pydub import AudioSegment

import spacy

recognizer = sr.Recognizer()


transcribe_audio_file = sr.AudioFile("sample_customer_call.wav")
with transcribe_audio_file as source:
    transcribe_audio = recognizer.record(source)


transcribed_text = recognizer.recognize_google(transcribe_audio)


print("Transcribed text: ", transcribed_text)


audio_segment = AudioSegment.from_file("sample_customer_call.wav")
number_channels = audio_segment.channels
frame_rate = audio_segment.frame_rate

print("Number of channels: ", number_channels)
print("Frame rate: ", frame_rate)


df = pd.read_csv("customer_call_transcriptions.csv")

sid = SentimentIntensityAnalyzer()


def find_sentiment(text):
    scores = sid.polarity_scores(text)
    compound_score = scores['compound']

    if compound_score >= 0.05:
        return 'positive'
    elif compound_score <= -0.05:
        return 'negative'
    else:
        return 'neutral'

df['sentiment_predicted'] = df.apply(lambda row: find_sentiment(row["text"]), axis = 1)


true_positive = len(df.loc[(df['sentiment_predicted'] == df['sentiment_label']) &
                (df['sentiment_label'] == 'positive')])

print("True positives: ", true_positive)


nlp = spacy.load("en_core_web_sm")


def extract_entities(text):
    doc = nlp(text)
    entities = [ent.text for ent in doc.ents]
    return entities


df['named_entities'] = df['text'].apply(extract_entities)

all_entities = [ent for entities in df['named_entities'] for ent in entities]


entities_df = pd.DataFrame(all_entities, columns=['entity'])
entities_counts = entities_df['entity'].value_counts().reset_index()
entities_counts.columns = ['entity', 'count']

most_freq_ent = entities_counts["entity"].iloc[0]
print("Most frequent entity: ", most_freq_ent)


nlp = spacy.load("en_core_web_sm")


df['processed_text'] = df['text'].apply(lambda text: nlp(text))

input_query = "wrong package delivery"
processed_query = nlp(input_query)


df['similarity'] = df['processed_text'].apply(lambda text: processed_query.similarity(text))
df = df.sort_values(by='similarity', ascending=False)


most_similar_text = df["text"].iloc[0]
print("Most similar text: ", most_similar_text)