In [1]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import skimage.io as skio
from skimage.transform import resize
import pandas as pd
import os
import requests
from pyarrow import feather
import io
import concurrent.futures

import warnings
warnings.filterwarnings("ignore")

In [10]:
def convert_audio_to_mel_image(filepath_to_audio, filepath_to_save, image_size=(128,512), n_mels=128, fmax=8000,):
    signal, sr = librosa.load(filepath_to_audio)
    mels = librosa.power_to_db(librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=n_mels, fmax=fmax), ref=np.max)
    image = (((80+mels)/80)*255)
    image = np.flip(image, axis=0)
    resize_image = resize(image, image_size).astype(np.uint8)
    skio.imsave(filepath_to_save, resize_image)
    
def convert_audio_to_composite_image(filepath_to_audio, filepath_to_save, image_size=(128,512), n_mels=128, fmax=8000,):
    
    signal, sr = librosa.load(filepath_to_audio)
    
    mels = librosa.power_to_db(librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=n_mels, fmax=fmax), ref=np.max)
    mel_image = (((80+mels)/80)*255)
    mel_image = np.flip(mel_image, axis=0)
    mel_image = resize(mel_image, (128,512)).astype(np.uint8)
    
    mfcc = librosa.power_to_db(librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=128, fmax=8000), ref=np.max)
    mfcc_image = (((80+mfcc)/80)*255)
    mfcc_image = np.flip(mfcc_image, axis=0)
    mfcc_image = resize(mfcc_image, (128,512)).astype(np.uint8)
    
    chromagram = librosa.feature.chroma_cqt(y=signal, sr=sr)
    chroma_image = resize(chromagram*255, (128,512)).astype(np.uint8)
    
    composite = np.dstack((mel_image, mfcc_image, chroma_image))
    
    skio.imsave(filepath_to_save, composite)

In [11]:
tracks_df = feather.read_feather('data/all_tracks.feather')

In [12]:
tracks_df = tracks_df.dropna(subset=['track_preview_link'])

In [13]:
random_tracks_df = tracks_df.sample(frac=1, random_state=42).reset_index(drop=True)
random_tracks_df

Unnamed: 0,track_id,track_name,track_preview_link,track_popularity,track_uri,artist_name,artist_id,artist_genres,artist_popularity,release_date
0,3SdSjZ651RbiG0rDEh8AQm,Electrofunk,https://p.scdn.co/mp3-preview/8b32113c205b135d...,1,spotify:track:3SdSjZ651RbiG0rDEh8AQm,TomBad,4T3DN9PLFsn3I8t65jfDAw,[],1,2004-10-15
1,0YLZeGKEk0N921qz6wpW1p,Silly Puddy,https://p.scdn.co/mp3-preview/b61e27bbc60440d6...,39,spotify:track:0YLZeGKEk0N921qz6wpW1p,Zion I,3LLs28LJVlXAjWc8UDkcQZ,"[abstract hip hop, alternative hip hop, pop ra...",45,2009-09-08
2,1Sej8madYsRgWZaYDlrxqE,Her Worth (Remix),https://p.scdn.co/mp3-preview/c771a0a178063357...,0,spotify:track:1Sej8madYsRgWZaYDlrxqE,Devon Sanders,5Ob0wqCDOHjYqBPGQCBO9W,[],12,2020-05-23
3,6RdHnDdeayZWNfN6HFvN6Y,Love Game - Single Version,https://p.scdn.co/mp3-preview/1fedaca4eda2ebaf...,5,spotify:track:6RdHnDdeayZWNfN6HFvN6Y,Facts & Fiction,460oEmwFYbERxguQI2Z0vX,"[italian disco, new italo disco]",22,2010-02-23
4,60rIdEPDrzyLiLC0icp3xz,Flower,https://p.scdn.co/mp3-preview/bc5c66092e62968e...,55,spotify:track:60rIdEPDrzyLiLC0icp3xz,Moby,3OsRAKCvk37zwYcnzRf5XF,"[downtempo, electronica]",67,
...,...,...,...,...,...,...,...,...,...,...
563288,57dJUvTqBEErXvGsViASbV,Wait A While,https://p.scdn.co/mp3-preview/dc685df30cd2f361...,17,spotify:track:57dJUvTqBEErXvGsViASbV,Old Man Luedecke,1L29dr4NAXlj2tGo51txba,"[canadian americana, canadian folk, canadian i...",27,2015-07-24
563289,3YSr4sgJRsYqQgi6XVbP4d,Bu Aşk,https://p.scdn.co/mp3-preview/105ca7017607da2f...,41,spotify:track:3YSr4sgJRsYqQgi6XVbP4d,Gökhan Kırdar,0iTcX6LMMIt0DeYJ3qBSQX,"[turkish pop, turkish rock, turkish singer-son...",41,1999-06-02
563290,5WKgWQSpOC7xvMHyW9pW1j,The Giant,https://p.scdn.co/mp3-preview/487edd889baff2e8...,2,spotify:track:5WKgWQSpOC7xvMHyW9pW1j,Chris Thornborrow,1WNCJp42oApfoL0wB29oCB,[],1,2016-06-28
563291,4R3uUe9AAVa0ltlCWWS8Io,Better,https://p.scdn.co/mp3-preview/d97b8f37da2b67d9...,35,spotify:track:4R3uUe9AAVa0ltlCWWS8Io,Mike Donehey,2jKlK6gsKyZDItyVP36II2,"[ccm, christian pop]",37,2020-12-15


In [14]:
def download_preview_with_index(index):
    track_url = random_tracks_df.loc[index, 'track_preview_link']

    preview = requests.get(track_url)
    
    track_id = random_tracks_df.loc[index, 'track_id']

    filename = f'data/Spotify/mp3s/{track_id}.mp3'
    png_name = f'data/Spotify/comp_pngs/{track_id}.png'

    with open(filename, 'wb') as f:
        f.write(preview.content)

    convert_audio_to_composite_image(filename, png_name)

    os.remove(filename)
    
    print(index, end='\r')
    

In [None]:
pd.options.mode.chained_assignment = None

indices = tracks_df.index
MAX_THREADS = 50
threads = min(MAX_THREADS, len(indices))
count = 0
with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
    executor.map(download_preview_with_index, indices)