In [5]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [6]:
from google.colab import drive
import os
import whisper
import librosa
import numpy as np
from textblob import TextBlob
import pandas as pd

# Mount Google Drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Define the path to your folder in Google Drive
folder_path = '/content/drive/MyDrive/Call Recordings'  # Update this path accordingly

# Get the list of audio files in the folder
audio_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]

# Load the model for transcription
model = whisper.load_model("base")

# Transcribe the audio file
def transcribe_audio(audio_file_path):
    result = model.transcribe(audio_file_path)
    return result["text"]

# Extract audio features
def extract_audio_features(audio_file_path):
    y, sr = librosa.load(audio_file_path)
    duration = librosa.get_duration(y=y, sr=sr)
    amplitude_mean = np.mean(y)
    amplitude_max = np.max(y)
    amplitude_min = np.min(y)
    amplitude_std = np.std(y)
    non_silent_intervals = librosa.effects.split(y, top_db=20)
    total_silence_duration = duration - sum((e - s) for s, e in non_silent_intervals) / sr

    return {
        "duration": duration,
        "amplitude_mean": amplitude_mean,
        "amplitude_max": amplitude_max,
        "amplitude_min": amplitude_min,
        "amplitude_std": amplitude_std,
        "total_silence_duration": total_silence_duration
    }

# Analyze the transcription
def analyze_transcription(text):
    blob = TextBlob(text)
    sentiment = blob.sentiment
    return {
        "sentiment_polarity": sentiment.polarity,
        "sentiment_subjectivity": sentiment.subjectivity
    }

# Compile features into a DataFrame
def compile_features(audio_files):
    data = []
    for audio_file in audio_files:
        features = {}
        features['file_name'] = os.path.basename(audio_file)

        # Transcribe the audio
        transcription = transcribe_audio(audio_file)
        features['transcription'] = transcription

        # Extract audio features
        audio_features = extract_audio_features(audio_file)
        features.update(audio_features)

        # Analyze the transcription
        transcription_analysis = analyze_transcription(transcription)
        features.update(transcription_analysis)

        data.append(features)

    df = pd.DataFrame(data)
    return df

# Process the audio files and compile the DataFrame
df = compile_features(audio_files[:500])

# Save the DataFrame to an Excel file
excel_file = '/content/drive/MyDrive/Call_Recordings_Analysis_500.xlsx'
df.to_excel(excel_file, index=False)

print("Analysis complete and saved to:", excel_file)


In [10]:
data

NameError: name 'data' is not defined