## Imports

In [None]:
import json
import glob
import subprocess
from collections import OrderedDict
import itertools
import numpy as np
import matplotlib.pyplot as plt
import librosa
from IPython.display import Audio as ipy_audio
from IPython.core.display import display

from quicktranscribe import tonic, pitch, wave, kde
from mogra import tonnetz
from mogra.datatypes import Swar, normalize_frequency, ratio_to_swar, SWAR_BOUNDARIES

- Clone and install https://github.com/MTG/pycompmusic
- Download data using `download-dunya.py`

In [None]:
DATA_DIR = "/Users/neerajaabhyankar/Repos/icm-shruti-analysis/data-dunya-hindustani/"

In [None]:
# syntonic comma in the 0 to 1 scale
SYNTONIC_COMMA = (librosa.hz_to_midi(220*81/80) - librosa.hz_to_midi(220))/12

## Util Functions

In [None]:
def fetch_audio(ra):
    for raag, vv in ra.items():
        for artist, url in vv.items():
            command = f"/opt/homebrew/bin/yt-dlp {url} -f 'ba' -x --audio-format 'mp3' --ffmpeg-location /opt/homebrew/bin/ffmpeg -P concrete-demo/ -o {raag}-{artist}.mp3"
            result = subprocess.run(command, shell=True, capture_output=True)
            print(result.stdout.decode())
            if len(result.stderr) > 0:
                print("Error:", result.stderr.decode())

In [None]:
def annotate_tonic(track_path):
    DEFAULT_TONIC = 220
    np.set_printoptions(suppress=True)
    
    start=8*60
    end=9*60
    y_stereo, sr = wave.read_audio_section(track_path + ".mp3", start, end)
    y_sample = librosa.to_mono(y_stereo.T)
    
    kde_sample = kde.extract(y_sample, sr=sr, tonic=DEFAULT_TONIC)
    peaks, _ = kde.prominence_based_peak_finder(kde_sample, prominence=0.005)
    print(peaks)

    plt.plot(np.linspace(0, 12, len(kde_sample)), kde_sample, color="teal")
    plt.plot(np.array(peaks) * 12/len(kde_sample), kde_sample[peaks], "o", markersize="3", color="orange")
    
    display(ipy_audio(y_sample, rate=sr))
    input("hear the audio and press any key to continue")
    
    peaks = sorted(peaks, key=lambda x: kde_sample[x], reverse=True)
    found_tonic = False
    for peak in peaks:
        # generate a sine wave of the peak frequency and play it
        fpeak = librosa.midi_to_hz(librosa.hz_to_midi(DEFAULT_TONIC) + 12 * peak / len(kde_sample))
        ypeak = librosa.tone(fpeak, duration=3)
        display(ipy_audio(ypeak, rate=sr))
        ft = input("Is this the tonic? (y/n): ")
        if ft == "y":
            found_tonic = True
            break
    
    if not found_tonic:
        print("No tonic found")
        return None
    
    # write tonic to file
    tonic.write_tonic(track_path + ".ctonic.txt", fpeak)

In [None]:
def read_sample_and_tonic(track_path):
    
    ctonic = tonic.read_tonic(track_path + ".ctonic.txt")
    # metadata = tonic.read_metadata(track_path + ".json")
    # pitch_annotations, aps = pitch.read_pitch(track_path + ".pitch.txt")
    
    # # full audio
    # y_sample, sr = wave.get_audio(track_path + ".mp3")

    # # 10-minute sample
    start=10*60
    end=20*60
    y_stereo, sr = wave.read_audio_section(track_path + ".mp3", start, end)
    y_sample = librosa.to_mono(y_stereo.T)
    # ipy_audio(data=y_sample, rate=sr)
    
    return y_sample, sr, ctonic

In [None]:
def plot_annotations(track):
    ctonic = tonic.read_tonic(DATA_DIR + track + ".ctonic.txt")
    pitch_annotations, aps = pitch.read_pitch(DATA_DIR + track + ".pitch.txt")
    
    pv = pitch.PitchValidator(audio_array=np.array([]), sampling_rate=0)
    pv.set_annotation(pitch_annotations=pitch_annotations, annotation_rate=aps)
    pv.set_tonic(ctonic)
    
    pv.plot_annotations_hist()

## Plot Annotations

In [None]:
plot_annotations("Omkar Dadarkar - Raag Todi")

In [None]:
plot_annotations("Omkar Dadarkar - Raag Multani")

In [None]:
plot_annotations("Kaustuv Kanti Ganguli - Raag Puriya Dhanashree")

In [None]:
plot_annotations("Kaustuv Kanti Ganguli - Raag Shree")

## Plot Detections

In [None]:
for track_mp3 in glob.glob(DATA_DIR + "*Bhoop*.mp3"):
    track_path = track_mp3[:-4]
    
    y_sample, sr, ctonic = read_sample_and_tonic(track_path)
    
    kde_sample = kde.extract(y_sample, sr=sr, tonic=ctonic)

    plt.plot(np.linspace(0, 12, len(kde_sample)), kde_sample, color="green")
    plt.xlabel("relative note index")
    plt.ylabel("normalized duration")

In [None]:
todis = {
    # "Ajoy Chakrabarty - Todi": None,  # corrupted
    "Kumar Gandharva - Raag Todi": None,
    "Omkar Dadarkar - Raag Todi": None,
}
multanis = {
    "Ajoy Chakrabarty - Multani": None,
    "Omkar Dadarkar - Raag Multani": None,
}

In [None]:
for track_name in todis:
    track_mp3 = glob.glob(DATA_DIR + f"*{track_name}*mp3")[0]
    track_path = track_mp3[:-4]
    
    y_sample, sr, ctonic = read_sample_and_tonic(track_path)
    
    kde_sample = kde.extract(y_sample, sr=sr, tonic=ctonic)
    todis[track_name] = kde_sample
    peaks, _ = kde.prominence_based_peak_finder(kde_sample, prominence=0.005)
    print(peaks)

    plt.plot(np.linspace(0, 12, len(kde_sample)), kde_sample, color="teal")
    plt.plot(np.array(peaks) * 12/len(kde_sample), kde_sample[peaks], "o", markersize="3", color="orange")
    plt.xlabel("relative note index")
    plt.ylabel("normalized duration")

plt.show()

In [None]:
for track_name in multanis:
    track_mp3 = glob.glob(DATA_DIR + f"*{track_name}*mp3")[0]
    track_path = track_mp3[:-4]
    
    y_sample, sr, ctonic = read_sample_and_tonic(track_path)
    
    kde_sample = kde.extract(y_sample, sr=sr, tonic=ctonic)
    multanis[track_name] = kde_sample
    peaks, _ = kde.prominence_based_peak_finder(kde_sample, prominence=0.005)
    print(peaks)

    plt.plot(np.linspace(0, 12, len(kde_sample)), kde_sample, color="olivedrab")
    plt.plot(np.array(peaks) * 12/len(kde_sample), kde_sample[peaks], "o", markersize="3", color="orange")
    plt.xlabel("relative note index")
    plt.ylabel("normalized duration")

plt.show()

In [None]:
malkaunses = {
    "Satyasheel Deshpande - Raag Malkauns": None,
    # "Ajoy Chakrabarty - Malkauns": None,
}
bhoops = {
    "Omkar Dadarkar - Raag Bhoopali": None,
    "Sameehan Kashalkar - Raag Bhoopali": None,
}
meghs = {
    "Anol Chatterjee - Raag Megh": None,
}

In [None]:
for track_name in malkaunses:
    track_mp3 = glob.glob(DATA_DIR + f"*{track_name}*mp3")[0]
    track_path = track_mp3[:-4]
    
    y_sample, sr, ctonic = read_sample_and_tonic(track_path)
    
    kde_sample = kde.extract(y_sample, sr=sr, tonic=ctonic)
    malkaunses[track_name] = kde_sample
    peaks, _ = kde.prominence_based_peak_finder(kde_sample, prominence=0.005)
    print(peaks)

    plt.plot(np.linspace(0, 12, len(kde_sample)), kde_sample, color="darkviolet")
    plt.plot(np.array(peaks) * 12/len(kde_sample), kde_sample[peaks], "o", markersize="3", color="orange")
    plt.xlabel("relative note index")
    plt.ylabel("normalized duration")

plt.show()

In [None]:
for track_name in bhoops:
    track_mp3 = glob.glob(DATA_DIR + f"*{track_name}*mp3")[0]
    track_path = track_mp3[:-4]
    
    y_sample, sr, ctonic = read_sample_and_tonic(track_path)
    
    kde_sample = kde.extract(y_sample, sr=sr, tonic=ctonic)
    bhoops[track_name] = kde_sample
    peaks, _ = kde.prominence_based_peak_finder(kde_sample, prominence=0.005)
    print(peaks)

    plt.plot(np.linspace(0, 12, len(kde_sample)), kde_sample, color="crimson")
    plt.plot(np.array(peaks) * 12/len(kde_sample), kde_sample[peaks], "o", markersize="3", color="orange")
    plt.xlabel("relative note index")
    plt.ylabel("normalized duration")

plt.show()

In [None]:
for track_name in meghs:
    track_mp3 = glob.glob(DATA_DIR + f"*{track_name}*mp3")[0]
    track_path = track_mp3[:-4]
    
    y_sample, sr, ctonic = read_sample_and_tonic(track_path)
    
    kde_sample = kde.extract(y_sample, sr=sr, tonic=ctonic)
    meghs[track_name] = kde_sample
    peaks, _ = kde.prominence_based_peak_finder(kde_sample, prominence=0.005)
    print(peaks)

    plt.plot(np.linspace(0, 12, len(kde_sample)), kde_sample, color="darkslateblue")
    plt.plot(np.array(peaks) * 12/len(kde_sample), kde_sample[peaks], "o", markersize="3", color="orange")
    plt.xlabel("relative note index")
    plt.ylabel("normalized duration")

plt.show()

Back-of-the envelope

In [None]:
malkauns = [0, 62, 98, 160, 200, 240]
bhoop = [0, 40, 78, 139, 178, 240]
megh = [0, 41, 100, 140, 202, 240]

In [None]:
print(np.diff(malkauns))
print(np.diff(bhoop))
print(np.diff(megh))

In [None]:
# out of 240, a syntonic comma will manifest as a delta of
(librosa.hz_to_midi(220*81/80) - librosa.hz_to_midi(220)) * 20

## Pitch Class on Tonnetz

In [None]:
for track_mp3 in glob.glob(DATA_DIR + "*Saraswati*.mp3"):
    track_path = track_mp3[:-4]

y_sample, sr, ctonic = read_sample_and_tonic(track_path)
kde_sample = kde.extract(y_sample, sr=sr, tonic=ctonic)

In [None]:
plt.plot(np.linspace(0, 12, len(kde_sample)), kde_sample, color="darkgreen")
plt.xlabel("relative note index")
plt.ylabel("normalized duration")

pc12_sample = kde.bin_into_12(kde_sample)
plt.bar(np.arange(12), pc12_sample, color="limegreen", alpha=0.5, width=0.9)

We will use the `pc12_sample` to infer the ideal note peaks and match them with the peaks of the `kde_sample`

In [None]:
g1 = tonnetz.EFGenus.from_list([3,3,3,5,7])
tn = tonnetz.Tonnetz(g1)

In [None]:
algo1 = tonnetz.TonnetzAlgo1(tn)
algo1.set_pc12(pc12_sample)
tonnetz_swar_set = algo1.execute()

In [None]:
print("Tonnetz-friendly Frequencies")
for swar in tonnetz_swar_set.keys():
    print(f"{swar}: {tonnetz_swar_set[swar]}")

In [None]:
print("Empirical Frequencies")
for swar in tonnetz_swar_set.keys():
    chunk = kde.get_bin_support(Swar[swar].value)
    chunk_vals = kde_sample[chunk]
    max_index = chunk[np.argmax(chunk_vals)]
    print(f"{swar}: {kde.frequency_from_dist_idx(max_index, ctonic)/ctonic}")

## Concrete Demo

In [None]:
raags_and_artists = {
    "Jog": {
        "RashidKhan": "https://www.youtube.com/watch?v=3kXQBzfvZRU",
        "SanjeevAbhyankar": "https://www.youtube.com/watch?v=7CZPKxaW7DE",
        "VyankateshKumar": "https://www.youtube.com/watch?v=TmVBjHej_MU",
    },
    "Jogkauns" : {
        "RashidKhan": "https://www.youtube.com/watch?v=TTKY92oj2uw",
        "SanjeevAbhyankar": "https://www.youtube.com/watch?v=33_UYNZzCyY",
        "VyankateshKumar": "https://www.youtube.com/watch?v=BxrcoduPdoA",
    }
}

In [None]:
# only once
# fetch_audio(raags_and_artists)

In [None]:
# TODO(neeraja): source separation

In [None]:
# only once
# for raag, vv in raags_and_artists.items():
#     raag_samples = {}
#     plt.figure()
#     for artist, _ in vv.items():
#         track_mp3 = glob.glob(f"concrete-demo/{raag}-{artist}*.mp3")[0]
#         annotate_tonic(track_mp3[:-4])

In [None]:
raag_peaks = {}
for raag, vv in raags_and_artists.items():
    print(raag)
    raag_peaks[raag] = []
    plt.figure()
    for artist, _ in vv.items():
        track_mp3 = glob.glob(f"concrete-demo/{raag}-{artist}*.mp3")[0]
        track_path = track_mp3[:-4]
        y_sample, sr, ctonic = read_sample_and_tonic(track_path)
        kde_sample = kde.extract(y_sample, sr=sr, tonic=ctonic)
        peaks, _ = kde.prominence_based_peak_finder(kde_sample, prominence=0.005)
        raag_peaks[raag].append(peaks)
        print("peaks", [pp / len(kde_sample) for pp in peaks])
        plt.plot(np.linspace(0, 12, len(kde_sample)), kde_sample, color="darkgreen")

    plt.xlabel("relative note index")
    plt.ylabel("normalized duration")
    plt.title(raag)

In [None]:
raag_means = {}
for raag, _ in raags_and_artists.items():
    raag_peaks = raag_peaks[raag]
    print(raag)
    print(raag_peaks)
    # make sure these look good, else edit
    for peak_tuples in zip(*raag_peaks):
        print(f"swar = {Swar(round(ii*12)%12).name} maxdelta = {max([abs(ii-jj) for ii, jj in itertools.combinations(peak_tuples, 2)])} mean = {np.mean(peak_tuples)}")
    raag_means[raag] = [np.mean(list(peak_tuples)) for peak_tuples in zip(*raag_peaks)]
