In [26]:
!pip install transformers pydub speechrecognition torch



In [27]:
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import speech_recognition as sr
from pydub import AudioSegment
from pydub.playback import play
import time
import os

model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
sentiment_analyzer = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

def transcribe_audio(audio_chunk, temp_file="temp_audio.wav"):
    recognizer = sr.Recognizer()

    audio_chunk.export(temp_file, format="wav")

    try:
        with sr.AudioFile(temp_file) as source:
            audio_data = recognizer.record(source)
            text = recognizer.recognize_google(audio_data)
            return text
    except sr.UnknownValueError:
        return None
    except sr.RequestError as e:
        print(f"Error with Google API: {e}")
        return None
    finally:
        if os.path.exists(temp_file):
            os.remove(temp_file)

In [28]:
def analyze_audio_sentiment(audio_file, chunk_duration=10):
    audio = AudioSegment.from_wav(audio_file)
    total_duration = audio.duration_seconds
    interval = min(chunk_duration, total_duration)

    sentiment_log = []
    feedback_log = []

    print(f"Total audio duration: {total_duration} seconds")

    for start in range(0, int(total_duration), interval):
        end = start + interval
        print(f"Processing segment {start}-{end} seconds...")

        audio_segment = audio[start*1000:end*1000]
        transcript = transcribe_audio(audio_segment)

        if transcript:
            sentiment = sentiment_analyzer(transcript[:512])
            sentiment_log.append(sentiment)
            print(f"Sentiment analysis result: {sentiment}")

            feedback = generate_feedback(sentiment)
            if feedback:
                feedback_log.append(feedback)
                print(f"Suggested feedback: {feedback}")
        else:
            print("Audio quality insufficient for transcription, skipping this part.")

        time.sleep(interval)
        play(audio_segment)

    return sentiment_log, feedback_log

In [29]:
def generate_feedback(sentiment):
    sentiment_label = sentiment[0]['label']

    if sentiment_label == '1 star' or sentiment_label == '2 stars':
        return "Agent should acknowledge the issue and reassure the customer."
    elif sentiment_label == '3 stars':
        return "Agent is doing okay, but should focus on improving the engagement."
    elif sentiment_label == '4 stars':
        return "Agent is handling it well, maintain the positive tone."
    elif sentiment_label == '5 stars':
        return "Excellent work! Keep up the great service."
    else:
        return None

In [30]:
from google.colab import files

uploaded = files.upload()
audio_file_name = list(uploaded.keys())[0]
print(f"Uploaded file: {audio_file_name}")
sentiment_history, feedback_log = analyze_audio_sentiment(audio_file_name)

Saving customer-support.wav to customer-support (8).wav
Uploaded file: customer-support (8).wav
Total audio duration: 61.4 seconds
Processing segment 0-10 seconds...
Audio quality insufficient for transcription, skipping this part.
Processing segment 10-20 seconds...
Sentiment analysis result: [{'label': '3 stars', 'score': 0.2504223585128784}]
Suggested feedback: Agent is doing okay, but should focus on improving the engagement.
Processing segment 20-30 seconds...
Audio quality insufficient for transcription, skipping this part.
Processing segment 30-40 seconds...
Sentiment analysis result: [{'label': '1 star', 'score': 0.40241777896881104}]
Suggested feedback: Agent should acknowledge the issue and reassure the customer.
Processing segment 40-50 seconds...
Sentiment analysis result: [{'label': '5 stars', 'score': 0.3185940086841583}]
Suggested feedback: Excellent work! Keep up the great service.
Processing segment 50-60 seconds...
Sentiment analysis result: [{'label': '3 stars', 'sco