In [11]:
DATA_DIR = "/viscam/projects/audio_nerf/transfer/devocalization/data/"
SRC_DIR = "/viscam/projects/audio_nerf/transfer/devocalization/masonstuff"


%load_ext autoreload
%autoreload 2
import os
import sys
sys.path.insert(1, SRC_DIR)

import numpy as np
import matplotlib.pyplot as plt
import scipy
import glob
import viz
from IPython.display import Audio
import librosa

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
RAW_DIR = os.path.join(DATA_DIR, "raw")
REF_DIR = os.path.join(RAW_DIR, "original_recordings/reference")
IM_DIR = os.path.join(RAW_DIR, "vocal_imitations/included")
RATINGS_PATH = os.path.join(RAW_DIR, "vocal_imitations_assessment.txt")
CATEGORIES_PATH = os.path.join(RAW_DIR, "vocal_imitations.txt")

## Find Categories

In [13]:
categories_dict = {}

with open(CATEGORIES_PATH, 'r') as file:
    next(file) # Skip first line
    
    for line in file:
        parts = line.strip().split('\t')
        reference_path = parts[10]
        category_id = int(parts[3])
        categories_dict[reference_path] = category_id

## Filter Entries

In [14]:
def filter_by_threshold(threshold = 30):
    imitation_paths = []
    reference_paths = []
    ratings = []
    category_ids = []
    
    with open(RATINGS_PATH, 'r') as file:
        next(file) # Skip first line
        
        
        for line in file:
            parts = line.strip().split('\t')
            try:
                rating = float(parts[-1])
            except:
                pass
            
            if rating > threshold and parts[2] not in imitation_paths: #Filter out duplicates
                                
                print("\n\nRating\t:" + parts[-1])
                print("Imitation Filename:\t" + parts[2])
                print("Reference Filename:\t" + parts[3])
                print("Category ID:\t" + str(categories_dict[parts[3]]))

                ratings.append(rating)
                imitation_paths.append(parts[2])
                reference_paths.append(parts[3])
                category_ids.append(categories_dict[parts[3]])
    
    print(len(imitation_paths))
    assert len(imitation_paths) == len(reference_paths) == len(ratings)
    return imitation_paths, reference_paths, ratings, category_ids

imitation_paths, reference_paths, ratings, category_ids = filter_by_threshold()



Rating	:79.0
Imitation Filename:	000Animal_Domestic animals_ pets_Cat_Growling-5112317826039808.wav
Reference Filename:	000Animal_Domestic animals_ pets_Cat_Growling_reference.wav
Category ID:	0


Rating	:74.0
Imitation Filename:	000Animal_Domestic animals_ pets_Cat_Growling-5138546386010112.wav
Reference Filename:	000Animal_Domestic animals_ pets_Cat_Growling_reference.wav
Category ID:	0


Rating	:84.0
Imitation Filename:	000Animal_Domestic animals_ pets_Cat_Growling-5680316245082112.wav
Reference Filename:	000Animal_Domestic animals_ pets_Cat_Growling_reference.wav
Category ID:	0


Rating	:90.0
Imitation Filename:	000Animal_Domestic animals_ pets_Cat_Growling-5706033703944192.wav
Reference Filename:	000Animal_Domestic animals_ pets_Cat_Growling_reference.wav
Category ID:	0


Rating	:93.0
Imitation Filename:	000Animal_Domestic animals_ pets_Cat_Growling-5763711893504000.wav
Reference Filename:	000Animal_Domestic animals_ pets_Cat_Growling_reference.wav
Category ID:	0


Rating	:87.0


## Read and Process Data

In [71]:
def process_data(fs=22050, trim=True, min_length = 1, full_length = 5):
    
    num_samples = int(full_length*fs)   
    imitation_recordings = np.zeros((len(imitation_paths), num_samples))
    reference_recordings = np.zeros((len(reference_paths), num_samples))
    filtered_category_ids = []
    valid_count = 0
        
    for i, path in enumerate(imitation_paths):

        imitation_path = os.path.join(IM_DIR, imitation_paths[i])
        reference_path = os.path.join(REF_DIR, reference_paths[i])
        
        if os.path.isfile(imitation_path) and os.path.isfile(reference_path):            
            im, _ = librosa.load(imitation_path, sr=fs) # Resamples, converts to float, and converts to mono
            ref, _ = librosa.load(reference_path, sr=fs)
            
            if trim:
                im, _ = librosa.effects.trim(im)
                ref, _ = librosa.effects.trim(ref)
                
                
            length_s_im = im.shape[0]/fs
            length_s_ref = ref.shape[0]/fs
            
            if length_s_im < min_length or length_s_ref < min_length:
                print("Recording Too short")
                continue
            else:
                imitation_recordings[valid_count, :min(num_samples, im.shape[-1])] = im[...,:num_samples]
                reference_recordings[valid_count, :min(num_samples, ref.shape[-1])] = ref[...,:num_samples]
                filtered_category_ids.append(category_ids[i])
                valid_count += 1
        else:
            print('Recording not found')
            
        if i%100 == 0:
            print(i)
    

    assert len(filtered_category_ids) == valid_count
    print("Number of Examples:\t" + str(valid_count))
    imitation_recordings = imitation_recordings[:valid_count]
    reference_recordings = reference_recordings[:valid_count]
    
    return imitation_recordings, reference_recordings, filtered_category_ids
    
imitation_recordings, reference_recordings, filtered_category_ids = process_data()

0
Recording not found
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording not found
Recording not found
Recording not found
100
Recording Too short
Recording not found
Recording not found
Recording not found
200
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
300
Recording not found
Recording not found
Recording not found
Recording not found
400
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
500
Recording not found
Recording not found
Recording Too short
Recording not found
Recording not found
Recording not foun

Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording not found
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording not found
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording not found
Recording Too short
Recording Too short
Recording not found
Recording not found
Recording Too short
Recording not found
Recording Too short
Recording Too short
Recording not found
Recording not found
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording not found
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found


Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording not found
Recording not found
Recording Too short
Recording Too short
Recording not found
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording Too short
Recording not found
Recording Too short
Recording Too short
Recording not found
Recording not found
5100
Recording not found
Recording not found
Recording not found
Recording not found
Number of Examples:	4336


In [72]:
print(imitation_recordings.shape)
print(reference_recordings.shape)
print(len(filtered_category_ids))

(4336, 110250)
(4336, 110250)
4336


In [73]:
SAVE_DIR = os.path.join(DATA_DIR, "processed")
np.save(os.path.join(SAVE_DIR, "imitations.npy"), imitation_recordings)
np.save(os.path.join(SAVE_DIR, "reference.npy"), reference_recordings)
np.save(os.path.join(SAVE_DIR, "categories.npy"), filtered_category_ids)

# Framewise Data Processing

In [15]:
def process_data_frames(fs=22050, trim=True, frame_length = 2):
    
    samples_per_frame = int(frame_length*fs)   
    imitation_recordings = []
    reference_recordings = []
    filtered_category_ids = []
        
    for i, path in enumerate(imitation_paths):

        imitation_path = os.path.join(IM_DIR, imitation_paths[i])
        reference_path = os.path.join(REF_DIR, reference_paths[i])
        
        if os.path.isfile(imitation_path) and os.path.isfile(reference_path):            
            im, _ = librosa.load(imitation_path, sr=fs) # Resamples, converts to float, and converts to mono
            ref, _ = librosa.load(reference_path, sr=fs)
            
            if trim:
                im, _ = librosa.effects.trim(im)
                ref, _ = librosa.effects.trim(ref)
                
                
            length_s_im = im.shape[0]/fs
            length_s_ref = ref.shape[0]/fs
            
            num_frames = int(min(length_s_im, length_s_ref)/frame_length)
            
            for j in range(num_frames):
                im_frame = im[j*samples_per_frame:(j+1)*samples_per_frame]
                ref_frame = ref[j*samples_per_frame:(j+1)*samples_per_frame]
                
                if im_frame.shape[-1] < samples_per_frame:
                    im_frame = np.pad(im_frame, (0, pad_width), mode='constant')
                
                if ref_frame.shape[-1] < samples_per_frame:
                    ref_frame = np.pad(ref_frame, (0, pad_width), mode='constant')
            
                imitation_recordings.append(im_frame)
                reference_recordings.append(ref_frame)
                filtered_category_ids.append(category_ids[i])
        else:
            print('Recording not found')
            
        if i%100 == 0:
            print(i)
    
    return np.array(imitation_recordings), np.array(reference_recordings), np.array(filtered_category_ids)
    
imitation_frames, reference_frames, frame_category_ids = process_data_frames()

0
Recording not found
Recording not found
Recording not found
Recording not found
100
Recording not found
Recording not found
Recording not found
200
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
300
Recording not found
Recording not found
Recording not found
Recording not found
400
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
500
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
600
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
700
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
Recording not found
Recording 


KeyboardInterrupt



In [None]:
print(imitation_frames.shape)
print(reference_frames.shape)
print(frame_category_ids.shape)

SAVE_DIR = os.path.join(DATA_DIR, "processed_framewise")
np.save(os.path.join(SAVE_DIR, "imitations.npy"), imitation_recordings)
np.save(os.path.join(SAVE_DIR, "reference.npy"), reference_recordings)
np.save(os.path.join(SAVE_DIR, "categories.npy"), filtered_category_ids)