In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
from pydub import AudioSegment

# Listening to existing samples

In [None]:
meta_sample = pd.read_csv('./samples/meta_samples.csv')

In [None]:
meta_sample.head(30)

In [None]:
def get_sample(meta_sample, genre=None, crap_id=None, learner_name=None, overlapping_windows=None, pipeline=None, n=None, return_sample=False):
    if genre is not None:
        meta_sample = meta_sample[meta_sample.genre == genre]
    if crap_id is not None:
        meta_sample = meta_sample[meta_sample.crap_id == crap_id]
    if learner_name is not None:
        meta_sample = meta_sample[meta_sample.learner_name == learner_name]
    if overlapping_windows is not None:
        meta_sample = meta_sample[meta_sample.overlapping_windows == overlapping_windows]
    if n is not None:
        sample_name, crap_id, learner_name, overlapping_window, pipeline, f = meta_sample.iloc[n][['sample_name', 'crap_id', 'learner_name', 'overlapping_window', 'pipeline', 'format']]
    else:
        sample_name, crap_id, learner_name, overlapping_window, pipeline, f = meta_sample.sample().iloc[0][['sample_name', 'crap_id', 'learner_name', 'overlapping_window', 'pipeline', 'format']]
    pipeline == pipeline[0] + 'p'
    orig = AudioSegment.from_file(f'./samples/original/{sample_name}.{f}')
    crap = AudioSegment.from_file(f'./samples/crappified/{sample_name}_{crap_id}.{f}')
    reco = AudioSegment.from_file(f'./samples/reconstructed/{sample_name}_{crap_id}_{pipeline}_{learner_name}_W{int(overlapping_windows)}.{f}')
    print(sample_name)
    print('Crappified file :')
    display(crap)
    print('Reconstructed file :')
    display(reco)
    print('Original file :')
    display(orig)
    if return_sample:
        return crap, reco, orig
    

In [None]:
get_sample(meta_sample, genre = 'Rock', n=2, pipeline='tensor_pipeline', overlapping_windows=True)

# Creating new samples

In [None]:
from crystal_clear.upscale import upscale, FeatureLoss
from fastai.basic_train import load_learner
from crystal_clear.prepare import path_mp3
from crystal_clear.tensor_pipeline import TensorImageImageList, TensorImageList
from pathlib import Path
from fastai.vision import * 
from fastai import *
from tqdm import tqdm as tqdm

In [None]:
torch.cuda.is_available()

In [None]:
meta = pd.read_csv('./data/crappified/dataset_1/meta/meta_mp3.csv')
meta.head()

In [None]:
genre_list = np.unique(meta.genre)
genre_list

In [None]:
def get_proc(pipeline, overlapping_windows):
    if pipeline == 'tensor_pipeline':
        import torch
        from crystal_clear.tensor_pipeline import create_tensor_Processor
        data_stats = torch.load('./data/crappified/dataset_1/tensor_pipeline/data_stats.pkl')
        proc = create_tensor_Processor(data_stats)
        if overlapping_windows:
            from crystal_clear.tensor_pipeline import create_song_tensor_Processor2
            song_proc = create_song_tensor_Processor2()
        else:
            from crystal_clear.tensor_pipeline import create_song_tensor_Processor
            song_proc = create_song_tensor_Processor()
    if pipeline == 'image_pipeline':
        if overlapping_windows:
            from crystal_clear.image_pipeline import Image_proc as proc, Image_song_proc2 as song_proc
        else:
            from crystal_clear.image_pipeline import Image_proc as proc, Image_song_proc as song_proc
    return proc, song_proc

In [None]:
current = {'pipeline': '', 'overlapping_windows': None, 'learner':None, 'proc':None, 'song_proc':None}
name_learner = {'tensor_pipeline': '1a_tensor.pkl', 'image_pipeline': '1b.pkl'}
def update_pipeline(pipeline, overlapping_windows):
    if current['pipeline'] != pipeline:
        current['learner'] = load_learner(f'./data/crappified/dataset_1/{pipeline}/models_export/', name_learner[pipeline])
        current['proc'], current['song_proc'] = get_proc(pipeline, overlapping_windows)
        current['overlapping_windows'] = overlapping_windows
        current['pipeline'] = pipeline
        gc.collect()
    else:
        if current['overlapping_windows'] != overlapping_windows:
            current['proc'], current['song_proc'] = get_proc(pipeline, overlapping_windows)
            current['overlapping_windows'] = overlapping_windows

In [None]:
def get_track_id(genre=None, n=None):
    '''
    Get the id of a track in the validation set.
    If genre is specified, pick a track in the corresponding genre.
    If n is specified, get the n-th track corresponding to the arguments,
    else pick a random corresponding track'''
    meta_valid = meta[meta.subset == 'valid'].reset_index(drop=True)
    if genre is not None:
        meta_valid = meta_valid[meta_valid.genre == genre].reset_index(drop=True)
    if n is not None:
        return meta_valid.loc[n, 'track_id']
    else:
        return meta_valid.track_id.sample().iloc[0]

In [None]:
def inspect(track_id, pipeline='tensor_pipeline', overlapping_windows=False, display_music=True, show_progress=True):
    path_orig = path_mp3(track_id)
    path_crap = Path(f'./data/crappified/dataset_1/mp3/{track_id}.mp3')
    orig = AudioSegment.from_mp3(path_orig)
    crap = AudioSegment.from_mp3(path_crap)
    update_pipeline(pipeline, overlapping_windows)
    reconstructed = upscale(path_crap, current['proc'], current['song_proc'], current['learner'], show_progress=show_progress)
    if display_music:
        print('original')
        display(orig)
        print('crappified')
        display(crap)
        print('reconstructed')
        display(reconstructed)
    return orig, crap, reconstructed

In [None]:
track_id = get_track_id(n=2, genre='Rock')
orig, crap, reconstructed = inspect(track_id, overlapping_windows=False)

In [None]:
def add_sample(sample_name, orig, crap, reconstructed, learner_name, crap_id=1, pipeline='tensor_pipeline', overlapping_windows=False, format='mp3', genre=''):
    for name_folder in ['crappified', 'original', 'reconstructed']:
        if not os.path.exists(f'./samples/{name_folder}'):
            os.makedirs(f'./samples/{name_folder}')
    try:
        df_meta_samples = pd.read_csv('./samples/meta_samples.csv')
        new_meta = df_meta_samples.append({'sample_name':sample_name, 'crap_id':crap_id,
                                           'learner_name': learner_name, 'format':format,
                                           'genre':genre, 'pipeline': pipeline,
                                           'overlapping_windows': overlapping_windows}, ignore_index=True).drop_duplicates()
    except:
        new_meta = pd.DataFrame({'sample_name':sample_name, 'crap_id':crap_id,
                                 'learner_name':learner_name, 'format':format,
                                 'genre':genre, 'pipeline': pipeline,
                                 'overlapping_windows': overlapping_windows}, index= [0])
    new_meta.to_csv('./samples/meta_samples.csv', index=None)
    orig.export(f'./samples/original/{sample_name}.{format}', format=format)
    crap.export(f'./samples/crappified/{sample_name}_{crap_id}.{format}', format=format)
    pipeline = pipeline[0] + 'p'
    reconstructed.export(f'./samples/reconstructed/{sample_name}_{crap_id}_{pipeline}_{learner_name}_W{int(overlapping_windows)}.{format}', format=format)

In [None]:
for pipeline in ['tensor_pipeline', 'image_pipeline']:
    for overlapping_windows in [False, True]:
        for genre in tqdm(genre_list):
            for i in tqdm(range(4)):
                track_id = get_track_id(n=i, genre=genre)
                orig, crap, reconstructed = inspect(track_id, pipeline=pipeline, overlapping_windows=overlapping_windows, display_music=False, show_progress=False)
                learner_name = name_learner[current['pipeline']]
                add_sample(f'{track_id}', orig, crap, reconstructed, learner_name=os.path.splitext(learner_name)[0], pipeline=pipeline, overlapping_windows=overlapping_windows, genre=genre)