In [37]:
import subprocess

def get_video_duration(video_file):
    result = subprocess.run(
        ["ffprobe", "-v", "error", "-show_entries", "format=duration",
         "-of", "default=noprint_wrappers=1:nokey=1", video_file],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT
    )
    return float(result.stdout)

def trim_video_ffmpeg(video_file, trimmed_video_file, trim_duration=3.5 * 60):
    video_duration = get_video_duration(video_file)
    
    end_time = video_duration - trim_duration
    
    command = [
        'ffmpeg',
        '-i', video_file,
        '-ss', '0',
        '-to', str(end_time),
        '-c', 'copy',
        trimmed_video_file
    ]
    subprocess.run(command)

video_file = "/Users/miranyildirim/Desktop/Metadata/video/UK401 ENG - Full episode.mov"
trimmed_video_file = "video_processed/trimmed_video.mov"

trim_video_ffmpeg(video_file, trimmed_video_file)


ffmpeg version 4.2.2 Copyright (c) 2000-2019 the FFmpeg developers
  built with clang version 12.0.0
  configuration: --prefix=/Users/ktietz/demo/mc3/conda-bld/ffmpeg_1628925491858/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_plac --cc=arm64-apple-darwin20.0.0-clang --disable-doc --enable-avresample --enable-gmp --enable-hardcoded-tables --enable-libfreetype --enable-libvpx --enable-pthreads --enable-libopus --enable-postproc --enable-pic --enable-pthreads --enable-shared --enable-static --enable-version3 --enable-zlib --enable-libmp3lame --disable-nonfree --enable-gpl --enable-gnutls --disable-openssl --enable-libopenh264 --enable-libx264
  libavutil      56. 31.100 / 56. 31.100
  libavcodec     58. 54.100 / 58. 54.100
  libavformat    58. 29.100 / 58. 29.100
  libavdevice    58.  8.100 / 58.  8.100
  libavfilter     7. 57.100 /  7. 57

In [38]:
from moviepy.editor import VideoFileClip

def extract_audio_from_video(video_file, audio_output):
    from moviepy.editor import VideoFileClip
    video = VideoFileClip(video_file)
    video.audio.write_audiofile(audio_output)

video_file = "video_processed/trimmed_video.mov"
audio_output = "video_processed/extracted_audio.wav"

extract_audio_from_video(video_file, audio_output)


MoviePy - Writing audio in video_processed/extracted_audio.wav


                                                                        

MoviePy - Done.




In [65]:
import librosa
import numpy as np

def detect_music_by_silence(audio_file, silence_threshold=0.02, min_duration=5.0, min_silence_duration=2.0):
    y, sr = librosa.load(audio_file, sr=None)
    
    energy = librosa.feature.rms(y=y, frame_length=2048, hop_length=512)[0]
    times = librosa.frames_to_time(np.arange(len(energy)), sr=sr, hop_length=512)
    
    non_silent = energy > silence_threshold
    segments = []
    start_time = None
    
    for t, is_non_silent in zip(times, non_silent):
        if is_non_silent and start_time is None:
            start_time = t
        elif not is_non_silent and start_time is not None:
            segment_duration = t - start_time
            if segment_duration >= min_duration:
                segments.append((start_time, t))
            start_time = None
    
    if start_time is not None:
        segment_duration = times[-1] - start_time
        if segment_duration >= min_duration:
            segments.append((start_time, times[-1]))
    
    filtered_segments = []
    for i in range(len(segments) - 1):
        if segments[i+1][0] - segments[i][1] >= min_silence_duration:
            filtered_segments.append(segments[i])
    
    return filtered_segments

min_song_duration = 20.0
silence_threshold = 0.02
min_silence_duration = 2.0 

music_segments = detect_music_by_silence(audio_output, silence_threshold=silence_threshold, min_duration=min_song_duration, min_silence_duration=min_silence_duration)
print("Filtered music segments (likely songs):", music_segments)


Filtered music segments (likely songs): [(np.float64(21.89641723356009), np.float64(43.10784580498866)), (np.float64(963.7442176870749), np.float64(990.9928344671201)), (np.float64(1002.9627210884354), np.float64(1026.9373242630386))]


In [66]:
music_segments = np.array(music_segments)
print(music_segments.shape)

(3, 2)


In [67]:
def calculate_bpm(y, sr):
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    return tempo

def get_bpm_for_music_sections(audio_file, music_segments):
    y, sr = librosa.load(audio_file, sr=None)
    bpm_list = []

    for segment in music_segments:
        start_time = segment[0]
        end_time = segment[-1] if len(segment) > 1 else segment[0] + 1 
        start_sample = librosa.time_to_samples(start_time, sr=sr)
        end_sample = librosa.time_to_samples(end_time, sr=sr)

        y_segment = y[start_sample:end_sample]
        bpm = calculate_bpm(y_segment, sr)
        bpm_list.append(bpm)

    return bpm_list

bpm_values = get_bpm_for_music_sections(audio_output, music_segments)
print("BPM for each music segment:", bpm_values)

BPM for each music segment: [array([120.18531977]), array([120.18531977]), array([120.18531977])]


In [68]:
def calculate_weighted_average_bpm(bpm_values, segments):
    total_duration = 0
    weighted_bpm_sum = 0
    
    for bpm, segment in zip(bpm_values, segments):
        start_time = segment[0]
        end_time = segment[-1] if len(segment) > 1 else segment[0] + 1
        duration = end_time - start_time
        
        weighted_bpm_sum += bpm * duration
        total_duration += duration
    
    weighted_average_bpm = weighted_bpm_sum / total_duration if total_duration > 0 else 0
    return weighted_average_bpm

weighted_average_bpm = calculate_weighted_average_bpm(bpm_values, music_segments)
print(f"Weighted Average BPM: {weighted_average_bpm}")

Weighted Average BPM: [120.18531977]
