In [1]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import skimage.io as skio
from skimage.transform import resize
import pandas as pd
import os
import requests
from pyarrow import feather
import io
import concurrent.futures

import warnings
warnings.filterwarnings("ignore")

In [2]:
from sklearn.preprocessing import MinMaxScaler


def convert_audio_to_mel_image(filepath_to_audio, filepath_to_save, image_size=(128,512), n_mels=128, fmax=8000,):
    signal, sr = librosa.load(filepath_to_audio)
    mels = librosa.power_to_db(librosa.feature.melspectrogram(y=signal, sr=sr, n_mels=n_mels, fmax=fmax), ref=np.max)
    image = (((80+mels)/80)*255)
    image = np.flip(image, axis=0)
    resize_image = resize(image, image_size).astype(np.uint8)
    skio.imsave(filepath_to_save, resize_image)

In [3]:
tracks_df = feather.read_feather('data/all_tracks.feather')

In [4]:
tracks_df = tracks_df.dropna(subset=['track_preview_link'])

In [5]:
tracks_df = tracks_df.reset_index(drop=True)
tracks_df

Unnamed: 0,track_id,track_name,track_preview_link,track_popularity,track_uri,artist_name,artist_id,artist_genres,artist_popularity,release_date
0,79620P7nP5cSS7f8ULHfo8,Ha-le-lu-ja,https://p.scdn.co/mp3-preview/54111ab335474758...,18,spotify:track:79620P7nP5cSS7f8ULHfo8,Klostertaler,6adGYZ9CU5LPwqLYt6yQc7,"[classic schlager, schlager, volksmusik, zille...",36,2007-11-22
1,5k8nNRu2aOjssNyCPzmax3,Gott schütze die Frauen - Radio Version,https://p.scdn.co/mp3-preview/e5bdcf4b6163a4b0...,34,spotify:track:5k8nNRu2aOjssNyCPzmax3,Klostertaler,6adGYZ9CU5LPwqLYt6yQc7,"[classic schlager, schlager, volksmusik, zille...",36,2004-03-09
2,7Jj23tl4vV4SblQgYZgCAL,Her mit meinen Hennen (Der Goggala),https://p.scdn.co/mp3-preview/56e52c31b64dad3d...,31,spotify:track:7Jj23tl4vV4SblQgYZgCAL,Klostertaler,6adGYZ9CU5LPwqLYt6yQc7,"[classic schlager, schlager, volksmusik, zille...",36,2007-11-22
3,7bhIlje38pzEuPllpto7JK,Pico-Bello - Radio Version,https://p.scdn.co/mp3-preview/542685c57d976351...,29,spotify:track:7bhIlje38pzEuPllpto7JK,Klostertaler,6adGYZ9CU5LPwqLYt6yQc7,"[classic schlager, schlager, volksmusik, zille...",36,2004-03-09
4,6cvUVbY2L8Ky6kYGxGrEgQ,Mir Geh´n No Lang Net Ins Bett - Radio Version,https://p.scdn.co/mp3-preview/c7e99e589e7413b7...,28,spotify:track:6cvUVbY2L8Ky6kYGxGrEgQ,Klostertaler,6adGYZ9CU5LPwqLYt6yQc7,"[classic schlager, schlager, volksmusik, zille...",36,2004-03-09
...,...,...,...,...,...,...,...,...,...,...
563288,7310HkEbJ2SLziIte7WZSi,Gott is a Dirndl,https://p.scdn.co/mp3-preview/92807dcc461ecf74...,43,spotify:track:7310HkEbJ2SLziIte7WZSi,Melissa Naschenweng,00unybLgvbIqKxx0OTyYYZ,"[partyschlager, schlager]",47,
563289,1IzYuIgrzZhUp9yomTowk0,Die ganze Nacht - Dance Mix,https://p.scdn.co/mp3-preview/81b696e709b644cf...,41,spotify:track:1IzYuIgrzZhUp9yomTowk0,Melissa Naschenweng,00unybLgvbIqKxx0OTyYYZ,"[partyschlager, schlager]",47,
563290,2sUXc7xn5Xy3mmTcLfOxHo,Braungebrannte Haut,https://p.scdn.co/mp3-preview/ed41988000214477...,40,spotify:track:2sUXc7xn5Xy3mmTcLfOxHo,Melissa Naschenweng,00unybLgvbIqKxx0OTyYYZ,"[partyschlager, schlager]",47,
563291,0OxNysTH08lf6SS2GInHx9,Die Nachbarin,https://p.scdn.co/mp3-preview/765395036326c9b6...,40,spotify:track:0OxNysTH08lf6SS2GInHx9,Melissa Naschenweng,00unybLgvbIqKxx0OTyYYZ,"[partyschlager, schlager]",47,


In [6]:
def download_preview_with_index(index):
    track_url = tracks_df.loc[index, 'track_preview_link']

    preview = requests.get(track_url)
    
    track_id = tracks_df.loc[index, 'track_id']

    filename = f'data/Spotify/mp3s/{track_id}.mp3'
    png_name = f'data/Spotify/pngs/{track_id}.png'

    with open(filename, 'wb') as f:
        f.write(preview.content)

    convert_audio_to_mel_image(filename, png_name)

    os.remove(filename)
    
    print(index, end='\r')

In [None]:
pd.options.mode.chained_assignment = None

indices = tracks_df[63216:].index
MAX_THREADS = 50
threads = min(MAX_THREADS, len(indices))
count = 0
with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
    executor.map(download_preview_with_index, indices)

1803921803614736346463566636036372763788640566407464045640816431764326643356438864515645956459464854648906494765006650366580065921660236603566048660926609066105662176626666328663596636866385664886649166490665826673566778667626670466870669426696867113671286718467377674036752767564675976777267856681516816168244683176836168360684546852868557685596863768669686996876369140692956940669526695916979269848699187011570229702777028870608706407070070756708177107271130711437122271257715777156471591716107165971699717137190272130723387235572582725817276072800728537297173061730687306673079730777308473134733197346473487734857349773520736337373373853738687386673900739847414474179743047436874546745817461374671747897484774934749987503075046750987538875654757267583375832759947613376161766477669976745768317684377011773317731877322773387748777615776577773577760778747797177983781077811278114781727817678256783097836078400785727879178814788537889179043791237920079240793007930179298793917943979611797097981079985

In [None]:
last_index = 180392