In [None]:
import numpy as np
from scipy.io import wavfile
from scipy.signal import find_peaks
import matplotlib.pyplot as plt

In [None]:
MUSIC_PATH = './samples/Imagine_Dragons_Converted.wav'

In [None]:
import librosa

In [None]:
y , sr = librosa.load(MUSIC_PATH, sr=44100)

In [None]:
y

In [None]:
sr

In [None]:
seconds = len(y) / sr
seconds

In [None]:
import IPython.display as ipd
ipd.Audio(MUSIC_PATH)

### Определяем биты

In [None]:
tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
y_beats = librosa.clicks(frames=beats, sr=sr, length=len(y))
beat_song = 0.5 * y + 0.5 * y_beats
scaled = np.int16(beat_song / np.max(np.abs(beat_song)) * 32767)
wavfile.write('beat_signal.wav', sr, scaled)

In [None]:
def define_seconds_of_beat(y, sr):
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
    return librosa.frames_to_time(beats)

In [None]:
beat_seconds = define_seconds_of_beat(y, sr)

### Определяем сегменты мелодии

In [None]:
tempo, beats = librosa.beat.beat_track(y=y, sr=sr, hop_length=512)
beat_times = librosa.frames_to_time(beats, sr=sr, hop_length=512)
cqt = np.abs(librosa.cqt(y, sr=sr, hop_length=512))
subseg = librosa.segment.subsegment(cqt, beats, n_segments=2)
y_melody = librosa.clicks(frames=subseg, sr=sr, length=len(y))
melody_song = 0.5 * y + 0.5 * y_melody
scaled = np.int16(melody_song / np.max(np.abs(melody_song)) * 32767)
wavfile.write('melody_signal.wav', sr, scaled)

In [None]:
def define_seconds_of_melody(y, sr):
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr, hop_length=512)
    beat_times = librosa.frames_to_time(beats, sr=sr, hop_length=512)
    cqt = np.abs(librosa.cqt(y, sr=sr, hop_length=512))
    subseg = librosa.segment.subsegment(cqt, beats, n_segments=3)
    subseg_t = librosa.frames_to_time(subseg, sr=sr, hop_length=512)
    return subseg_t

### Определяем интенсивные моменты

In [None]:
FRAME_SIZE = 1024
HOP_LENGTH = 512

def amplitude_envelope(y, frame_size, hop_length):
    amplitude_envelope = []
    for i in range(0, len(y), hop_length):
        current_frame_amplitude_envelope = max(y[i:i+frame_size])
        amplitude_envelope.append(current_frame_amplitude_envelope)
    return np.array(amplitude_envelope)

In [None]:
ae = amplitude_envelope(y, FRAME_SIZE, HOP_LENGTH)

In [None]:
frames = range(0, ae.size)
t = librosa.frames_to_time(frames, hop_length=HOP_LENGTH)

In [None]:
import collections
from itertools import islice

def sliding_window(iterable, n):
    "Collect data into overlapping fixed-length chunks or blocks."
    # sliding_window('ABCDEFG', 4) --> ABCD BCDE CDEF DEFG
    it = iter(iterable)
    window = collections.deque(islice(it, n-1), maxlen=n)
    for x in it:
        window.append(x)
        yield tuple(window)

plt.figure(figsize=(15,5))
plt.plot(t, ae)
mean_arr = [0]
n = 100
for elems in sliding_window(ae, n):
    mean_arr.append(np.mean(elems))
mean_arr.extend([0] * (n-2))
mean_arr = np.array(mean_arr)
mean_arr += 1
plt.plot(t, mean_arr-1)

In [None]:
plt.figure(figsize=(15,5))
# plt.plot(t, mean_arr)
volume = np.power(10000, mean_arr)
mean_vol = [0]
n = 3
for elems in sliding_window(volume, n):
    mean_vol.append(np.mean(elems))
mean_vol.extend([0] * (n-2))
mean_vol = np.array(mean_vol)
normalized_vol = np.abs(mean_vol / np.max(mean_vol))
plt.plot(t, normalized_vol)
peaks, _ = find_peaks(normalized_vol)
peaks_t = librosa.frames_to_time(peaks)
plt.plot(peaks_t, normalized_vol[peaks], "x")

In [None]:
np.argmax(normalized_vol)
len(normalized_vol)

In [None]:
y_new = []
for t_new in np.linspace(t[0], t[-1], num=len(y)):
    y_new.append(np.interp(t_new, t, normalized_vol))

In [None]:
y_new = np.array(y_new)
plt.plot(y_new * y)
scaled = np.int16(y_new * y / np.max(np.abs(y_new * y)) * 32767)
wavfile.write('sample_with_vol_scaling.wav', sr, scaled)

In [None]:
ipd.Audio('sample_with_vol_scaling.wav')

### MP3 to WAV

In [None]:
from pydub import AudioSegment
INPUT_PATH = 'samples/Jamie_Christopherson_-_Rules_of_Nature_OST_Metal_Gear_Rising_Revengeance_73652795.mp3'
OUTPUT_PATH = 'samples/Jamie_Christopherson_-_Rules_of_Nature_OST_Metal_Gear_Rising_Revengeance_73652795.wav'
sound = AudioSegment.from_mp3(INPUT_PATH)
sound.export(OUTPUT_PATH, format="wav")

In [None]:
ipd.Audio(OUTPUT_PATH)