In [3]:
import os

# Downloading audio from YouTube
!yt-dlp -x --audio-format wav -o "call_audio.%(ext)s" https://www.youtube.com/watch?v=4ostqJD3Psc

# Converting audio to mono 16kHz
import ffmpeg
input_file = "call_audio.wav"
output_file = "processed_call.wav"

ffmpeg.input(input_file).output(output_file, ac=1, ar=16000).run(overwrite_output=True)
print("Audio ready:", output_file)


[youtube] Extracting URL: https://www.youtube.com/watch?v=4ostqJD3Psc
[youtube] 4ostqJD3Psc: Downloading webpage
[youtube] 4ostqJD3Psc: Downloading tv simply player API JSON
[youtube] 4ostqJD3Psc: Downloading tv client config
[youtube] 4ostqJD3Psc: Downloading player 6742b2b9-main
[youtube] 4ostqJD3Psc: Downloading tv player API JSON
[info] 4ostqJD3Psc: Downloading 1 format(s): 251
[download] Sleeping 1.00 seconds as required by the site...
[download] Destination: call_audio.webm
[K[download] 100% of    1.99MiB in [1;37m00:00:00[0m at [0;32m12.98MiB/s[0m
[ExtractAudio] Destination: call_audio.wav
Deleting original file call_audio.webm (pass -k to keep)
Audio ready: processed_call.wav


In [4]:
import whisper
from datetime import timedelta

# Loading small model
model = whisper.load_model("tiny")

result = model.transcribe(output_file, verbose=True)
segments = result["segments"]

print("Transcription complete. Example text:\n", result["text"][:300])


100%|█████████████████████████████████████| 72.1M/72.1M [00:01<00:00, 48.3MiB/s]


Detecting language using up to the first 30 seconds. Use `--language` to specify the language
Detected language: English
[00:00.000 --> 00:11.400]  Thank you for calling me son. My name is Lauren. Can I have your name?
[00:11.400 --> 00:16.000]  Yes, my name is John Smith. Thank you, John. How can I help you?
[00:16.000 --> 00:20.500]  I was just calling about to see how much it would cost to update the map in my car.
[00:20.500 --> 00:24.000]  I'd be happy to help you with that today. Did you receive a mail or from us?
[00:24.000 --> 00:26.500]  I did. Do you need the customer number?
[00:26.500 --> 00:30.500]  Yes, please. Okay. It's 15243.
[00:30.500 --> 00:33.500]  Thank you and the year making model of your vehicle.
[00:33.500 --> 00:37.500]  Yeah, I have a 2009 Nissan Altaman.
[00:37.500 --> 00:38.500]  So nice car.
[00:38.500 --> 00:40.500]  Yeah, thank you. We really enjoy it.
[00:40.500 --> 00:46.500]  Okay. I think I found your profile here. Can I have you verify your address

In [5]:
# Speaker 1 = Sales Rep, Speaker 2 = Customer
for i, seg in enumerate(segments):
    seg["speaker"] = "Sales Rep" if i % 2 == 0 else "Customer"

# Show first few
for seg in segments[:5]:
    print(f"[{seg['speaker']}] {seg['text']}")


[Sales Rep]  Thank you for calling me son. My name is Lauren. Can I have your name?
[Customer]  Yes, my name is John Smith. Thank you, John. How can I help you?
[Sales Rep]  I was just calling about to see how much it would cost to update the map in my car.
[Customer]  I'd be happy to help you with that today. Did you receive a mail or from us?
[Sales Rep]  I did. Do you need the customer number?


In [6]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()

talk_time = {"Sales Rep": 0, "Customer": 0}
num_questions = 0
longest_monologue = 0
sentiments = []

for seg in segments:
    start, end = seg["start"], seg["end"]
    duration = end - start
    talk_time[seg["speaker"]] += duration

    # Longest monologue
    if duration > longest_monologue:
        longest_monologue = duration

    # Count questions
    if "?" in seg["text"] or seg["text"].strip().lower().startswith(("what","why","how","when","where","who")):
        num_questions += 1

    # Sentiment
    sentiments.append(analyzer.polarity_scores(seg["text"])["compound"])

# Talk-time ratio
total_time = talk_time["Sales Rep"] + talk_time["Customer"]
rep_ratio = round((talk_time["Sales Rep"]/total_time)*100,2)
cust_ratio = round((talk_time["Customer"]/total_time)*100,2)

# sentiment
avg_sent = sum(sentiments)/len(sentiments)
if avg_sent > 0.05:
    sentiment = "Positive"
elif avg_sent < -0.05:
    sentiment = "Negative"
else:
    sentiment = "Neutral"

# Insight
if rep_ratio > 70:
    insight = "Rep should give customer more chance to speak."
elif num_questions < 3:
    insight = "Rep should ask more questions to engage customer."
else:
    insight = "Good balance, but can improve engagement."

print("---- RESULTS ----")
print("Talk Time Ratio: Sales Rep =", rep_ratio, "% | Customer =", cust_ratio, "%")
print("Questions Asked:", num_questions)
print("Longest Monologue (s):", round(longest_monologue,2))
print("Call Sentiment:", sentiment)
print("Insight:", insight)


---- RESULTS ----
Talk Time Ratio: Sales Rep = 55.66 % | Customer = 44.34 %
Questions Asked: 7
Longest Monologue (s): 11.4
Call Sentiment: Positive
Insight: Good balance, but can improve engagement.
