In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import utils as u

In [11]:
# Load audiofile at 16kHz
audiofile = "thodi/Karuna Nidhi Illalo/Karuna Nidhi Illalo.multitrack-violin-alaapana.mp3"

# Track info
raga, piece, instrument, section = u.process_filename(audiofile)
print("Raga:", raga)
print("Piece:", piece)
print("Instrument:", instrument)
print("Section:", section)

# Load audio
sr = 16000
y, sr = u.load_normalize(audiofile, sr = sr)
length_s = len(y) / sr
print("Audio length:", length_s, "s")

Raga: thodi
Piece: Karuna Nidhi Illalo
Instrument: violin
Section: alaapana
Audio length: 305.1639375 s


#### Tonic processing only for violin tracks

In [5]:
# Calculate tonic and save
tonic = u.identify_tonic(y, sr)
print(f"Estimated tonic: {tonic}")
tonic_file = audiofile.replace(".mp3", ".tonic")
np.savetxt(tonic_file, [tonic], fmt="%s")

Estimated tonic: 135.63082885742188


In [13]:
# Load tonic
tonic_file = "thodi/Karuna Nidhi Illalo/Karuna Nidhi Illalo.multitrack-violin-main.tonic"
tonic = np.loadtxt(tonic_file)
print(f"Loaded tonic: {tonic}")

Loaded tonic: 135.63082885742188


In [14]:
# Apply a high pass filter to the audio signal (tonic - 1)
y_filt = u.high_pass(y, sr, tonic - 1, order = 10)
y_filt = y_filt.astype(np.float32)  

#### Raw pitch extraction

In [15]:
# Calculate raw pitch with melodia
hopsize_ms = 7.5
binResolution = 1
hopSize = u.ms_to_samples(hopsize_ms, sr)
filterIterations = 5

if instrument == "vocal":
    minFrequency = 80
    f0, conf = u.melodia(y, sr, hopSize = hopSize, minFrequency = minFrequency, binResolution = binResolution, filterIterations = filterIterations)
else:
    minFrequency = tonic - 1
    f0, conf = u.melodia(y_filt, sr, hopSize = hopSize, minFrequency = minFrequency, binResolution = binResolution, filterIterations = filterIterations)

In [16]:
# Pitch contour frame rate
framerate = len(f0) / length_s
print(f"Pitch contour frame rate: {framerate}")

Pitch contour frame rate: 133.3381668009183


In [17]:
# Save the pitch contour
pitchfile = audiofile.replace(".mp3", ".pitch")
times = np.linspace(0, length_s, len(f0))
np.savetxt(pitchfile, np.array([times, f0, conf]).T)