In [27]:
import json
import speech_recognition as sr
from pyannote.audio import Inference
from pydub import AudioSegment
from transformers import pipeline

# Load the diarization model
diarization_model = Inference("pyannote/speaker-diarization")

# Load the multilingual transcription model (using Hugging Face transformers)
transcription_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53")

def transcribe_audio_with_diarization(audio_path):
    # Load the audio file
    audio = AudioSegment.from_file(audio_path)
    
    # Convert audio to wav format (if not already in wav format)
    wav_audio_path = "temp_audio.wav"
    audio.export(wav_audio_path, format="wav")
    
    # Step 1: Perform speaker diarization (speaker separation)
    diarization = diarization_model(wav_audio_path)
    
    # Step 2: Transcribe the audio using the ASR model
    recognizer = sr.Recognizer()
    with sr.AudioFile(wav_audio_path) as source:
        audio_data = recognizer.record(source)
    
    # Use the transcription model to get the transcriptions
    transcription_result = transcription_pipeline(wav_audio_path)
    
    # Step 3: Structure the output in JSON format with diarization info
    speakers = []
    for turn, _, speaker in diarization.itertracks(yield_label=True):
        speakers.append({"start_time": turn.start, "end_time": turn.end, "speaker": speaker})

    # Combine the transcription with speaker diarization data
    output = {
        "transcription": transcription_result["text"],
        "speaker_diarization": speakers
    }

    return json.dumps(output, indent=4)

# Example Usage:
audio_path = "speech.wav"  # Path to the audio file
output = transcribe_audio_with_diarization(audio_path)
print(output)




Could not download 'pyannote/speaker-diarization' model.
It might be because the model is private or gated so make
sure to authenticate. Visit https://hf.co/settings/tokens to
create your access token and retry with:

   >>> Model.from_pretrained('pyannote/speaker-diarization',
   ...                       use_auth_token=YOUR_AUTH_TOKEN)

If this still does not work, it might be because the model is gated:
visit https://hf.co/pyannote/speaker-diarization to accept the user conditions.


AttributeError: 'NoneType' object has no attribute 'device'

In [25]:
!pip install SpeechRecognition pyannote.audio transformers pydub



Collecting SpeechRecognition
  Downloading speechrecognition-3.14.2-py3-none-any.whl.metadata (30 kB)
Downloading speechrecognition-3.14.2-py3-none-any.whl (32.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.9/32.9 MB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.14.2


In [31]:
pip install numpy scipy torch torchvision torchaudio librosa
pip install pyannote.audio openai-whisper


SyntaxError: invalid syntax (2537142567.py, line 1)

In [33]:
# On Linux:
sudo apt install ffmpeg

# On Mac:
brew install ffmpeg


SyntaxError: invalid syntax (3690927489.py, line 2)

In [35]:
pip install numpy scipy torch torchvision torchaudio librosa

Collecting librosa
  Downloading librosa-0.11.0-py3-none-any.whl.metadata (8.7 kB)
Collecting audioread>=2.1.9 (from librosa)
  Downloading audioread-3.0.1-py3-none-any.whl.metadata (8.4 kB)
Collecting pooch>=1.1 (from librosa)
  Downloading pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting soxr>=0.3.2 (from librosa)
  Downloading soxr-0.5.0.post1-cp312-abi3-macosx_11_0_arm64.whl.metadata (5.6 kB)
Downloading librosa-0.11.0-py3-none-any.whl (260 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m260.7/260.7 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading audioread-3.0.1-py3-none-any.whl (23 kB)
Downloading pooch-1.8.2-py3-none-any.whl (64 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.6/64.6 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading soxr-0.5.0.post1-cp312-abi3-macosx_11_0_arm64.whl (156 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m156.4/156.4 kB[0m [31m22.9 MB/s[0m eta [36m0:00:00

In [37]:
pip install pyannote.audio openai-whisper


Note: you may need to restart the kernel to use updated packages.


In [43]:
import os
import tempfile
import whisper
from pyannote.audio import Pipeline
import json

# Load Whisper model for transcription
whisper_model = whisper.load_model("base")  # You can use "small", "medium", etc., based on your needs

# Load Pyannote model for speaker diarization
try:
    diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
    print("Diarization pipeline loaded successfully!")
except Exception as e:
    print(f"Error loading diarization pipeline: {e}")

def transcribe_with_diarization(audio_path):
    """
    Transcribes audio with speaker diarization.
    :param audio_path: Path to the audio file (e.g., 'speech.mp3')
    :return: JSON structure with speaker info and transcriptions
    """
    # Process the audio file through the diarization pipeline
    try:
        diarization = diarization_pipeline({'uri': 'audio', 'audio': audio_path})
    except Exception as e:
        print(f"Error during diarization: {e}")
        return {"error": "Diarization failed"}
    
    segments = []

    # Iterate over the diarization output to extract speaker segments
    for turn, _, speaker in diarization.itertracks(yield_label=True):
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
            start, end = turn.start, turn.end
            temp_audio_path = temp_audio.name

            # Extract segment using ffmpeg
            os.system(f"ffmpeg -i \"{audio_path}\" -ss {start} -to {end} -ar 16000 -ac 1 -y \"{temp_audio_path}\" -loglevel quiet")

            # Transcribe the audio segment with Whisper
            result = whisper_model.transcribe(temp_audio_path)
            os.remove(temp_audio_path)

            # Add the transcription to the results
            segments.append({
                "speaker": speaker,
                "start_time": str(start),
                "end_time": str(end),
                "text": result["text"].strip()
            })

    return {"transcription": segments}


# Usage example: transcribe the 'speech.mp3' file
audio_file_path = "speech-94649.mp3"  # Path to your audio file

# Get transcription results
result = transcribe_with_diarization(audio_file_path)

# Display the result as a structured JSON
print(json.dumps(result, indent=2))




Could not download 'pyannote/speaker-diarization' pipeline.
It might be because the pipeline is private or gated so make
sure to authenticate. Visit https://hf.co/settings/tokens to
create your access token and retry with:

   >>> Pipeline.from_pretrained('pyannote/speaker-diarization',
   ...                          use_auth_token=YOUR_AUTH_TOKEN)

If this still does not work, it might be because the pipeline is gated:
visit https://hf.co/pyannote/speaker-diarization to accept the user conditions.
Diarization pipeline loaded successfully!
Error during diarization: 'NoneType' object is not callable
{
  "error": "Diarization failed"
}


In [3]:
import os
import tempfile
import whisper
from pyannote.audio import Pipeline
import json

# Load Whisper model for transcription
whisper_model = whisper.load_model("base")  # You can choose other models like "small", "medium" based on your needs

# Load Pyannote model for speaker diarization
diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token="YOUR_HF_TOKEN")

def transcribe_with_diarization(audio_path):
    """
    Transcribes audio with speaker diarization.
    :param audio_path: Path to the audio file (e.g., 'speech.mp3')
    :return: JSON structure with speaker info and transcriptions
    """
    try:
        # Process the audio file through the diarization pipeline
        diarization = diarization_pipeline({'uri': 'audio', 'audio': audio_path})
        segments = []

        # Iterate over the diarization output to extract speaker segments
        for turn, _, speaker in diarization.itertracks(yield_label=True):
            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
                start, end = turn.start, turn.end
                temp_audio_path = temp_audio.name

                # Extract segment using ffmpeg
                os.system(f"ffmpeg -i \"{audio_path}\" -ss {start} -to {end} -ar 16000 -ac 1 -y \"{temp_audio_path}\" -loglevel quiet")

                # Transcribe the audio segment with Whisper
                result = whisper_model.transcribe(temp_audio_path)
                os.remove(temp_audio_path)

                # Add the transcription to the results
                segments.append({
                    "speaker": speaker,
                    "start_time": str(start),
                    "end_time": str(end),
                    "text": result["text"].strip()
                })

        return {"transcription": segments}
    
    except Exception as e:
        return {"error": str(e)}

# Usage example: transcribe the 'speech.mp3' file
audio_file_path = "speech-94649.mp3"  # Path to your audio file

# Get transcription results
result = transcribe_with_diarization(audio_file_path)

# Display the result as a structured JSON
print(json.dumps(result, indent=2))


HfHubHTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/pyannote/speaker-diarization/resolve/main/config.yaml (Request ID: Root=1-67fe605a-1c3b79c00b72fc2601e131e9;eabc3eea-2b3d-4f67-9bde-39989f19770a)

Invalid credentials in Authorization header

In [5]:
import os
import tempfile
import whisper
from pyannote.audio import Pipeline
import json

# Load Whisper model for transcription
whisper_model = whisper.load_model("base")  # You can choose other models like "small", "medium" based on your needs

# Load Pyannote model for speaker diarization using your Hugging Face token
diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token="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")

def transcribe_with_diarization(audio_path):
    """
    Transcribes audio with speaker diarization.
    :param audio_path: Path to the audio file (e.g., 'speech.mp3')
    :return: JSON structure with speaker info and transcriptions
    """
    try:
        # Process the audio file through the diarization pipeline
        diarization = diarization_pipeline({'uri': 'audio', 'audio': audio_path})
    except Exception as e:
        return {"error": f"Diarization failed: {str(e)}"}
    
    segments = []

    # Iterate over the diarization output to extract speaker segments
    for turn, _, speaker in diarization.itertracks(yield_label=True):
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
            start, end = turn.start, turn.end
            temp_audio_path = temp_audio.name

            # Extract segment using ffmpeg
            os.system(f"ffmpeg -i \"{audio_path}\" -ss {start} -to {end} -ar 16000 -ac 1 -y \"{temp_audio_path}\" -loglevel quiet")

            # Transcribe the audio segment with Whisper
            result = whisper_model.transcribe(temp_audio_path)
            os.remove(temp_audio_path)

            # Add the transcription to the results
            segments.append({
                "speaker": speaker,
                "start_time": str(start),
                "end_time": str(end),
                "text": result["text"].strip()
            })

    return {"transcription": segments}


# Usage example: transcribe the 'speech.mp3' file
audio_file_path = "speech-94649.mp3"  # Path to your audio file

# Get transcription results
result = transcribe_with_diarization(audio_file_path)

# Display the result as a structured JSON
print(json.dumps(result, indent=2))



Could not download 'pyannote/speaker-diarization' pipeline.
It might be because the pipeline is private or gated so make
sure to authenticate. Visit https://hf.co/settings/tokens to
create your access token and retry with:

   >>> Pipeline.from_pretrained('pyannote/speaker-diarization',
   ...                          use_auth_token=YOUR_AUTH_TOKEN)

If this still does not work, it might be because the pipeline is gated:
visit https://hf.co/pyannote/speaker-diarization to accept the user conditions.
{
  "error": "Diarization failed: 'NoneType' object is not callable"
}


In [7]:
from pyannote.audio import Pipeline

# Use your Hugging Face authentication token
auth_token = "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"

# Load Pyannote model with token
diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=auth_token)



Could not download 'pyannote/speaker-diarization' pipeline.
It might be because the pipeline is private or gated so make
sure to authenticate. Visit https://hf.co/settings/tokens to
create your access token and retry with:

   >>> Pipeline.from_pretrained('pyannote/speaker-diarization',
   ...                          use_auth_token=YOUR_AUTH_TOKEN)

If this still does not work, it might be because the pipeline is gated:
visit https://hf.co/pyannote/speaker-diarization to accept the user conditions.


In [9]:
import os
import tempfile
import whisper
from pyannote.audio import Pipeline
import json

# Load Whisper model for transcription
whisper_model = whisper.load_model("base")  # You can choose other models like "small", "medium" based on your needs

# Load Pyannote model for speaker diarization with your Hugging Face authentication token
auth_token = "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"  # Your Hugging Face token
diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=auth_token)

def transcribe_with_diarization(audio_path):
    """
    Transcribes audio with speaker diarization.
    :param audio_path: Path to the audio file (e.g., 'speech.mp3')
    :return: JSON structure with speaker info and transcriptions
    """
    try:
        # Process the audio file through the diarization pipeline
        diarization = diarization_pipeline({'uri': 'audio', 'audio': audio_path})
        segments = []

        # Iterate over the diarization output to extract speaker segments
        for turn, _, speaker in diarization.itertracks(yield_label=True):
            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
                start, end = turn.start, turn.end
                temp_audio_path = temp_audio.name

                # Extract segment using ffmpeg
                os.system(f"ffmpeg -i \"{audio_path}\" -ss {start} -to {end} -ar 16000 -ac 1 -y \"{temp_audio_path}\" -loglevel quiet")

                # Transcribe the audio segment with Whisper
                result = whisper_model.transcribe(temp_audio_path)
                os.remove(temp_audio_path)

                # Add the transcription to the results
                segments.append({
                    "speaker": speaker,
                    "start_time": str(start),
                    "end_time": str(end),
                    "text": result["text"].strip()
                })

        return {"transcription": segments}
    except Exception as e:
        return {"error": str(e)}

# Usage example: transcribe the 'speech.mp3' file
audio_file_path = "speech-94649.mp3"  # Path to your audio file

# Get transcription results
result = transcribe_with_diarization(audio_file_path)

# Display the result as a structured JSON
print(json.dumps(result, indent=2))



Could not download 'pyannote/speaker-diarization' pipeline.
It might be because the pipeline is private or gated so make
sure to authenticate. Visit https://hf.co/settings/tokens to
create your access token and retry with:

   >>> Pipeline.from_pretrained('pyannote/speaker-diarization',
   ...                          use_auth_token=YOUR_AUTH_TOKEN)

If this still does not work, it might be because the pipeline is gated:
visit https://hf.co/pyannote/speaker-diarization to accept the user conditions.
{
  "error": "'NoneType' object is not callable"
}


In [11]:
import os
import tempfile
import whisper
from pyannote.audio import Pipeline
import json

# Load Whisper model for transcription
whisper_model = whisper.load_model("base")  # You can choose other models like "small", "medium" based on your needs

# Set your Hugging Face token here
auth_token = "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"  # Your Hugging Face token

# Load Pyannote model for speaker diarization with authentication
try:
    diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token=auth_token)
    print("Diarization pipeline loaded successfully!")
except Exception as e:
    print(f"Error loading diarization pipeline: {e}")

def transcribe_with_diarization(audio_path):
    """
    Transcribes audio with speaker diarization.
    :param audio_path: Path to the audio file (e.g., 'speech.mp3')
    :return: JSON structure with speaker info and transcriptions
    """
    try:
        # Process the audio file through the diarization pipeline
        diarization = diarization_pipeline({'uri': 'audio', 'audio': audio_path})
        segments = []

        # Iterate over the diarization output to extract speaker segments
        for turn, _, speaker in diarization.itertracks(yield_label=True):
            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
                start, end = turn.start, turn.end
                temp_audio_path = temp_audio.name

                # Extract segment using ffmpeg
                os.system(f"ffmpeg -i \"{audio_path}\" -ss {start} -to {end} -ar 16000 -ac 1 -y \"{temp_audio_path}\" -loglevel quiet")

                # Transcribe the audio segment with Whisper
                result = whisper_model.transcribe(temp_audio_path)
                os.remove(temp_audio_path)

                # Add the transcription to the results
                segments.append({
                    "speaker": speaker,
                    "start_time": str(start),
                    "end_time": str(end),
                    "text": result["text"].strip()
                })

        return {"transcription": segments}
    except Exception as e:
        return {"error": str(e)}

# Usage example: transcribe the 'speech.mp3' file
audio_file_path = "speech-94649.mp3"  # Path to your audio file

# Get transcription results
result = transcribe_with_diarization(audio_file_path)

# Display the result as a structured JSON
print(json.dumps(result, indent=2))



Could not download 'pyannote/speaker-diarization' pipeline.
It might be because the pipeline is private or gated so make
sure to authenticate. Visit https://hf.co/settings/tokens to
create your access token and retry with:

   >>> Pipeline.from_pretrained('pyannote/speaker-diarization',
   ...                          use_auth_token=YOUR_AUTH_TOKEN)

If this still does not work, it might be because the pipeline is gated:
visit https://hf.co/pyannote/speaker-diarization to accept the user conditions.
Diarization pipeline loaded successfully!
{
  "error": "'NoneType' object is not callable"
}


In [15]:
!pip install django
!pip install transformers

Collecting transformers
  Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Downloading tokenizers-0.21.1-cp39-abi3-macosx_11_0_arm64.whl.metadata (6.8 kB)
Collecting safetensors>=0.4.3 (from transformers)
  Downloading safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl.metadata (3.8 kB)
Downloading transformers-4.51.3-py3-none-any.whl (10.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.4/10.4 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl (418 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m418.4/418.4 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading tokenizers-0.21.1-cp39-abi3-macosx_11_0_arm64.whl (2.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling co

In [1]:
# blog_title_suggestor/utils.py
from transformers import pipeline

def generate_titles(blog_content):
    # Load a pre-trained model and tokenizer from Hugging Face
    title_generator = pipeline("text-generation", model="gpt2")

    # Generate three titles based on the blog content
    generated_titles = title_generator(blog_content, max_length=50, num_return_sequences=3)
    
    # Extract and clean up the generated titles
    titles = [title['generated_text'].strip() for title in generated_titles]
    return titles


In [3]:
# blog_title_suggestor/views.py
from django.http import JsonResponse
from .utils import generate_titles

def get_title_suggestions(request):
    # Get the blog content from POST request
    blog_content = request.POST.get('content')
    
    if not blog_content:
        return JsonResponse({'error': 'No blog content provided'}, status=400)
    
    # Generate titles using the NLP model
    titles = generate_titles(blog_content)
    
    # Return the titles as a JSON response
    return JsonResponse({'titles': titles})


ImportError: attempted relative import with no known parent package

In [5]:
import whisper
from pyannote.audio import Pipeline

def transcribe_with_diarization(audio_path):
    # Load models
    asr_model = whisper.load_model("medium")
    diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
    
    # Perform diarization
    diarization = diarization_pipeline(audio_path)
    
    # Perform transcription
    transcription = asr_model.transcribe(audio_path, word_timestamps=True)
    
    # Align diarization with transcription
    results = []
    for segment in transcription["segments"]:
        speaker = "Unknown"
        for turn, _, speaker_id in diarization.itertracks(yield_label=True):
            if segment["start"] >= turn.start and segment["end"] <= turn.end:
                speaker = speaker_id
                break
        
        results.append({
            "start": segment["start"],
            "end": segment["end"],
            "speaker": speaker,
            "text": segment["text"],
            "words": segment.get("words", [])
        })
    
    return {
        "audio_file": audio_path,
        "segments": results,
        "speaker_count": len(set([turn[2] for turn in diarization.itertracks(yield_label=True)]))
    }

In [19]:
import whisper
from pyannote.audio import Pipeline
import json
from pydub import AudioSegment

def convert_mp3_to_wav(mp3_path, wav_path):
    """Convert MP3 to WAV format for better compatibility"""
    audio = AudioSegment.from_mp3(mp3_path)
    audio.export(wav_path, format="wav")
    return wav_path

def transcribe_with_diarization(audio_path):
    """Transcribe audio with speaker identification"""
    # Load models
    asr_model = whisper.load_model("medium")
    diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
    
    # Perform diarization
    diarization = diarization_pipeline(audio_path)
    
    # Perform transcription
    transcription = asr_model.transcribe(audio_path, word_timestamps=True)
    
    # Align diarization with transcription
    results = []
    for segment in transcription["segments"]:
        speaker = "Unknown"
        for turn, _, speaker_id in diarization.itertracks(yield_label=True):
            if segment["start"] >= turn.start and segment["end"] <= turn.end:
                speaker = speaker_id
                break
        
        results.append({
            "start": round(segment["start"], 2),
            "end": round(segment["end"], 2),
            "speaker": speaker,
            "text": segment["text"],
            "confidence": round(segment.get("confidence", 0), 2)
        })
    
    return {
        "audio_file": audio_path,
        "duration": round(transcription["duration"], 2),
        "language": transcription["language"],
        "speaker_count": len(set([turn[2] for turn in diarization.itertracks(yield_label=True)])),
        "segments": results,
        "metadata": {
            "processing_time": round(transcription["processing_time"], 2),
            "model": "whisper-medium",
        }
    }

def main():
    input_mp3 = "speech-94649.mp3"
    output_wav = "speech.wav"
    output_json = "speech.json"
    
    # Convert MP3 to WAV
    print(f"Converting {input_mp3} to WAV format...")
    wav_path = convert_mp3_to_wav(input_mp3, output_wav)
    
    # Process the audio
    print("Processing audio with transcription and diarization...")
    result = transcribe_with_diarization(wav_path)
    
    # Save results
    with open(output_json, 'w') as f:
        json.dump(result, f, indent=2)
    
    print(f"Processing complete. Results saved to {output_json}")
    print(f"Found {result['speaker_count']} speakers in {result['duration']} seconds of audio.")

if __name__ == "__main__":
    main()

Converting speech-94649.mp3 to WAV format...




FileNotFoundError: [Errno 2] No such file or directory: 'ffprobe'

In [9]:
!pip install pydub ffmpeg-python

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting future (from ffmpeg-python)
  Downloading future-1.0.0-py3-none-any.whl.metadata (4.0 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Downloading future-1.0.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.3/491.3 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydub, future, ffmpeg-python
Successfully installed ffmpeg-python-0.2.0 future-1.0.0 pydub-0.25.1


In [15]:
brew install ffmpeg

SyntaxError: invalid syntax (2552295309.py, line 1)

In [17]:
!pip install torch whisper pyannote.audio pydub

Collecting whisper
  Downloading whisper-1.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: whisper
  Building wheel for whisper (setup.py) ... [?25ldone
[?25h  Created wheel for whisper: filename=whisper-1.1.10-py3-none-any.whl size=41120 sha256=1ee8e1459fd89999a0afedf9915ba8493142e197dd3af73b7e8e33b6b86a424f
  Stored in directory: /Users/shubhamthakur/Library/Caches/pip/wheels/34/b8/4e/9c4c3351d670e06746a340fb4b7d854c76517eec225e5b32b1
Successfully built whisper
Installing collected packages: whisper
Successfully installed whisper-1.1.10
