In [7]:
import subprocess

def extract_audio(video_file, output_audio_file):
    command = f"ffmpeg -i {video_file} -q:a 0 -map a {output_audio_file} -y"
    subprocess.run(command, shell=True, check=True)

# Example usage
video_file = "HR.mp4"
audio_file = "response_audio.wav"
extract_audio(video_file, audio_file)


In [10]:
import noisereduce as nr
import librosa
import soundfile as sf

# Load audio
audio, sr = librosa.load("response_audio.wav", sr=16000)

# Apply noise reduction
reduced_noise = nr.reduce_noise(y=audio, sr=sr)

# Save processed audio
sf.write("processed_audio.wav", reduced_noise, sr)


In [13]:
import whisper
import wave

def real_time_transcription(audio_file):
    model = whisper.load_model("base")
    audio = whisper.load_audio(audio_file)
    transcription = model.transcribe(audio)
    print("Transcription:", transcription["text"])
    return transcription["text"]

# Usage
# Save live audio to 'audio.wav' in real time
transcription = real_time_transcription("processed_audio.wav")


100%|███████████████████████████████████████| 139M/139M [00:02<00:00, 61.2MiB/s]
  checkpoint = torch.load(fp, map_location=device)




Transcription:  Have you received? Yes, my name is Rizim. What you could make? My name is Rizim Rekres from it. So, Rizim, today you are going to come take a walk into the hall or an extra right desk. So, 10 month ago, yes, firstly, and then to give you a 30-minute service, to introduce this writer, my name is Rizim Rekres from it. I am from the work of studies in Gaita, Sanctuary. I completed my PhD in the Seabop, Computer Science and Engineering, in Gaita, Sanctuary, and in the college, I have a good hall of the village. I set up my PhD with sound coin, 960, CTA. Coming to my family, there are former Muslim family, Ebrood Budi, and my technical skills are, and Budi, C. E. Paita, data switches, and basics of the school. And my interpersonal skills are, I am able to communicate with them in people's and good listener. And my hobby is spending time with family and friends, cooking, listening music, and crafting at three times. My stents are, I am a Holger, and B. Budi, and I am a self-m

In [16]:
import language_tool_python

def grammar_analysis(text):
    tool = language_tool_python.LanguageTool('en-IN')
    matches = tool.check(text)
    errors = [match.ruleId for match in matches]
    suggestions = [match.message for match in matches]
    return errors, suggestions

# Example Usage
text = "This is an test sentence with error."
errors, suggestions = grammar_analysis(text)
print("Errors:", errors)
print("Suggestions:", suggestions)


Downloading LanguageTool 6.4: 100%|██████████| 246M/246M [00:12<00:00, 20.2MB/s]
INFO:language_tool_python.download_lt:Unzipping /tmp/tmpdcu3942e.zip to /root/.cache/language_tool_python.
INFO:language_tool_python.download_lt:Downloaded https://www.languagetool.org/download/LanguageTool-6.4.zip to /root/.cache/language_tool_python.


Errors: ['EN_A_VS_AN']
Suggestions: ['Use “a” instead of ‘an’ if the following word doesn’t start with a vowel sound, e.g. ‘a sentence’, ‘a university’.']


In [19]:
from nltk.corpus import cmudict

def check_pronunciation(word):
    pronunciation = cmudict.dict()
    return pronunciation.get(word.lower(), "Word not found in dictionary")

# Example Usage
word = "hello"
result = check_pronunciation(word)
print(f"Pronunciation for '{word}': {result}")


Pronunciation for 'hello': [['HH', 'AH0', 'L', 'OW1'], ['HH', 'EH0', 'L', 'OW1']]


In [20]:
def speaking_rate(transcribed_text, duration_seconds):
    words = len(transcribed_text.split())
    rate = words / (duration_seconds / 60)  # Words per minute
    return rate

# Example Usage
transcription = "This is a sample sentence for evaluation."
rate = speaking_rate(transcription, 45)
print(f"Speaking Rate: {rate} WPM")


Speaking Rate: 9.333333333333334 WPM


In [24]:
from pydub import AudioSegment, silence

def pause_analysis(audio_file):
    audio = AudioSegment.from_wav(audio_file)
    pauses = silence.detect_silence(audio, min_silence_len=500, silence_thresh=-40)
    return len(pauses), pauses

# Example Usage
num_pauses, pause_details = pause_analysis("processed_audio.wav")
print(f"Number of Pauses: {num_pauses}")
print("Pause Durations:", pause_details)


Number of Pauses: 3
Pause Durations: [[0, 172230], [172581, 195434], [195446, 278036]]


In [25]:
def filler_word_usage(text):
    fillers = ["um", "uh", "like", "you know", "sort of"]
    filler_count = {filler: text.lower().count(filler) for filler in fillers}
    return filler_count

# Example Usage
text = "Um, I think this is, like, a sort of test."
fillers = filler_word_usage(text)
print("Filler Word Usage:", fillers)


Filler Word Usage: {'um': 1, 'uh': 0, 'like': 1, 'you know': 0, 'sort of': 1}


In [27]:
from pydub import AudioSegment
import numpy as np

def calculate_snr(audio_file):
    audio = AudioSegment.from_file(audio_file)
    samples = np.array(audio.get_array_of_samples())
    signal = np.mean(samples**2)
    noise = np.var(samples)
    snr = 10 * np.log10(signal / noise)
    return snr

# Example Usage
snr = calculate_snr("processed_audio.wav")
print(f"Signal-to-Noise Ratio: {snr} dB")


Signal-to-Noise Ratio: -9.984965081863532 dB


In [29]:
def generate_feedback(transcription, audio_file, duration):
    # Grammar
    errors, suggestions = grammar_analysis(transcription)

    # Speaking Rate
    rate = speaking_rate(transcription, duration)

    # Pause Patterns
    num_pauses, pauses = pause_analysis(audio_file)

    # Filler Word Usage
    fillers = filler_word_usage(transcription)

    # Voice Clarity
    snr = calculate_snr(audio_file)

    feedback = {
        "Grammar Errors": errors,
        "Grammar Suggestions": suggestions,
        "Speaking Rate (WPM)": rate,
        "Number of Pauses": num_pauses,
        "Pause Details (ms)": pauses,
        "Filler Word Usage": fillers,
        "Voice Clarity (SNR in dB)": snr,
    }
    return feedback

# Example Usage
transcription = "This is um a sample uh response with errors."
feedback = generate_feedback(transcription, "processed_audio.wav", 45)
for key, value in feedback.items():
    print(f"{key}: {value}")


Grammar Errors: []
Grammar Suggestions: []
Speaking Rate (WPM): 12.0
Number of Pauses: 3
Pause Details (ms): [[0, 172230], [172581, 195434], [195446, 278036]]
Filler Word Usage: {'um': 1, 'uh': 1, 'like': 0, 'you know': 0, 'sort of': 0}
Voice Clarity (SNR in dB): -9.984965081863532
