In [1]:
# Utility
import re
import numpy as np
import pandas as pd

# OS I/O
import requests
import inspect
import sys
import os

# Image Processing
import cv2

# Audio Processing
import librosa
import librosa.display
import scipy

# Custom Utils
abspathdir = os.path.abspath(inspect.getfile(inspect.currentframe()))
currentdir = os.path.dirname(abspathdir)
parentdir  = os.path.dirname(currentdir)
sys.path.insert(0, parentdir) 
temp_dir = currentdir + '/tmpdata'
db_dir   = currentdir + '/tmpdata'
print(temp_dur, db_dir)

ModuleNotFoundError: No module named 'pandas'

In [None]:
import joblib
dbname = "/mgdb.pkl"
db = joblib.load(db_dir + dbname)
audio_db = db["Audio"]
video_db = db["Video"]

In [None]:
video_db_simple = {}
for k, v in video_db.items():
    new_entry = {}
    old_entry = video_db[k]

    # Remove a sample if the duration is less than 4 (secs)
    duration = int(old_entry["Duration"])
    if duration < 4:
        continue

    new_entry["MusicID"] = old_entry["MetaInfo"]["MusicID"]
    new_entry["DanceGenre"] = old_entry["MetaInfo"]["DanceGenre"]
    new_entry["MusicEncoding"] = old_entry["MusicEncoding"]
    new_entry["MotiongramX"] = old_entry["MotiongramX"]
    new_entry["MotiongramY"] = old_entry["MotiongramY"]

    video_db_simple[k] = new_entry

audio_db_simple = {}
for k, v in audio_db.items():
    old_entry = audio_db[k]

    # Remove a sample if the duration is less than 4 (secs)
    duration = int(old_entry["Duration"])
    if duration < 4:
        continue    

    audio_db_simple[old_entry["MusicID"]] = old_entry["RawAudio"]

audio_df = pd.DataFrame(audio_db_simple)
video_df = pd.DataFrame(video_db_simple)

In [None]:
# sampling rate
sr = 22050

# min/max freq 
fmin, fmax = 20, sr / 2 

# number of samples for each fft window. 
# for music it is recommended 2048, but with 4096 we are getting better results
n_fft = 4096

#(columns) - so we can get 128 frames 
hop_length = 690

#(rows) - With this, we get nice 128 x 128 spectrograms 
n_mels = 128

In [None]:
# Extract two motiongrams and corresponding audio
video_idx = 300
key = list(video_db.keys())[video_idx]

entry = video_db_simple[key]
print(entry)

mgx = entry["MotiongramX"]
mgx = cv2.cvtColor(mgx, cv2.COLOR_RGB2GRAY)
mgx = cv2.resize(mgx, (n_mels, n_mels))
mgx = mgx.astype(np.float32)
mgx = np.clip(mgx / 255.0, 0, 1)

mgy = entry["MotiongramY"]
mgy = cv2.cvtColor(mgy, cv2.COLOR_RGB2GRAY)
mgy = cv2.resize(mgy, (n_mels, n_mels))
mgy = mgy.astype(np.float32)
mgy = np.clip(mgy / 255.0, 0, 1)

audio = audio_db_simple[entry["MusicID"]]
mel = librosa.feature.melspectrogram(
    audio, n_mels=n_mels, sr=sr, 
    n_fft=n_fft, hop_length=hop_length, 
    window=scipy.signal.hamming,
    fmin=fmin, fmax=fmax
)

mel = librosa.power_to_db(mel, ref=np.max)
mel = 1. + (mel / 80.)

In [None]:
import matplotlib.pyplot as plt

fig, axs = plt.subplots(1, 2, figsize=(10, 5))
axs[1].set_xticks([])
axs[1].set_yticks([])
axs[1].imshow(mgx, interpolation = "bicubic", cmap = "binary", aspect="auto")

axs[0].imshow(mgy, interpolation = "bicubic", cmap = "binary", aspect="auto")
axs[0].set_xticks([])
axs[0].set_yticks([])
plt.show()