In [1]:
!pip install moviepy openai-whisper torch numpy

Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting triton>=2.0.0 (from openai-whisper)
  Downloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)
Downloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (209.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.5/209.5 MB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K 

In [2]:
!pip install better-profanity

Collecting better-profanity
  Downloading better_profanity-0.7.0-py3-none-any.whl.metadata (7.1 kB)
Downloading better_profanity-0.7.0-py3-none-any.whl (46 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/46.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.1/46.1 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: better-profanity
Successfully installed better-profanity-0.7.0


In [3]:
!pip install moviepy openai-whisper better-profanity torch numpy



In [4]:
!pip install pydub

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [6]:
import moviepy.editor as mp
import whisper
import json
from pathlib import Path
import torch
from better_profanity import profanity
from typing import List, Dict
from pydub import AudioSegment
import tempfile
import os

class VideoProfanityMuter:
    def __init__(self, model_size="base"):
        """
        Initialize the system with Whisper model and profanity checker
        """
        self.model = whisper.load_model(model_size)
        profanity.load_censor_words()

    def process_video(self, input_video_path: str, output_video_path: str = None):
        """
        Main processing pipeline
        """
        if output_video_path is None:
            output_video_path = str(Path(input_video_path).with_stem(Path(input_video_path).stem + "_clean"))

        print("Step 1: Transcribing video...")
        transcription = self._transcribe_video(input_video_path)

        print("Step 2: Detecting profanity...")
        profanity_instances = self._detect_profanity(transcription)

        if not profanity_instances:
            print("No profanity detected in the video.")
            return input_video_path

        print("Step 3: Muting profane sections...")
        self._create_clean_video(input_video_path, output_video_path, profanity_instances)

        print(f"\nProcessing complete! Clean video saved to: {output_video_path}")
        return output_video_path

    def _transcribe_video(self, video_path: str) -> dict:
        """
        Extract audio and transcribe with word-level timestamps
        """
        try:
            # Create temporary directory for intermediate files
            with tempfile.TemporaryDirectory() as temp_dir:
                # Extract audio using moviepy
                video = mp.VideoFileClip(video_path)
                if video.audio is None:
                    raise ValueError("Video has no audio track")

                temp_audio_path = os.path.join(temp_dir, "temp_audio.wav")
                video.audio.write_audiofile(temp_audio_path, verbose=False, logger=None)
                video.close()

                # Transcribe
                result = self.model.transcribe(
                    temp_audio_path,
                    language="en",
                    word_timestamps=True,
                    verbose=False
                )

                # Save transcription to JSON
                transcription_path = str(Path(video_path).with_suffix('.json'))
                with open(transcription_path, 'w') as f:
                    json.dump(result, f, indent=2)

                return result

        except Exception as e:
            print(f"Error during transcription: {str(e)}")
            raise

    def _detect_profanity(self, transcription: dict) -> List[Dict]:
        """
        Detect profanity in transcription with exact timestamps
        """
        profanity_instances = []

        if isinstance(transcription, str):
            with open(transcription, 'r') as f:
                transcription = json.load(f)

        for segment in transcription["segments"]:
            if "words" not in segment:
                continue

            for word in segment["words"]:
                word_text = word["word"].strip().lower()
                if profanity.contains_profanity(word_text):
                    instance = {
                        "word": word["word"],
                        "start_time": int(word["start"] * 1000),  # Convert to milliseconds
                        "end_time": int(word["end"] * 1000)
                    }
                    profanity_instances.append(instance)

        return sorted(profanity_instances, key=lambda x: x["start_time"])

    def _create_clean_video(self, input_path: str, output_path: str, profanity_instances: List[Dict]):
        """
        Create new video with muted profane sections using pydub
        """
        try:
            with tempfile.TemporaryDirectory() as temp_dir:
                # Extract audio using moviepy
                video = mp.VideoFileClip(input_path)
                temp_audio_path = os.path.join(temp_dir, "temp_audio.wav")
                video.audio.write_audiofile(temp_audio_path, verbose=False, logger=None)

                # Load audio with pydub
                audio = AudioSegment.from_wav(temp_audio_path)

                # Create silent segment for muting
                silent_segment = AudioSegment.silent(duration=100)  # 100ms silence

                # Process each profanity instance
                for instance in profanity_instances:
                    start_time = max(0, instance["start_time"] - 100)  # 100ms buffer
                    end_time = min(len(audio), instance["end_time"] + 100)

                    # Calculate duration and create appropriate silence
                    duration = end_time - start_time
                    mute_segment = AudioSegment.silent(duration=duration)

                    # Replace the segment with silence
                    audio = audio[:start_time] + mute_segment + audio[end_time:]

                # Export processed audio
                temp_processed_audio = os.path.join(temp_dir, "processed_audio.wav")
                audio.export(temp_processed_audio, format="wav")

                # Load processed audio back into video
                processed_audio = mp.AudioFileClip(temp_processed_audio)
                final_video = video.set_audio(processed_audio)

                # Write final video
                final_video.write_videofile(
                    output_path,
                    codec='libx264',
                    audio_codec='aac',
                    temp_audiofile=os.path.join(temp_dir, 'temp-final-audio.m4a'),
                    remove_temp=True,
                    verbose=False,
                    logger=None
                )

                # Cleanup
                video.close()
                final_video.close()
                processed_audio.close()

        except Exception as e:
            print(f"Error during video processing: {str(e)}")
            raise

def main():
    try:
        input_video = "/content/video_1.mp4"
        output_video = "/content/video_1_clean.mp4"

        muter = VideoProfanityMuter(model_size="base")
        muter.process_video(input_video, output_video)

    except Exception as e:
        print(f"Error in main: {str(e)}")

if __name__ == "__main__":
    main()

  checkpoint = torch.load(fp, map_location=device)




Step 1: Transcribing video...


100%|██████████| 1054/1054 [00:29<00:00, 35.71frames/s]

Step 2: Detecting profanity...
No profanity detected in the video.



