In [9]:
import os
import tensorflow as tf
import numpy as np
import math
from random import sample, shuffle
from PIL import Image
import matplotlib.pyplot as plt
from src.DataGenerator import AudioDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D , Flatten, Reshape, Conv2DTranspose, BatchNormalization, Conv1D, Input
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from src.helper_functions import plot_reconstruction
import seaborn as sns


In [15]:
data_gen = AudioDataGenerator(
    directory='data/Spotify/comp_pngs/', 
    image_size=(128,512), 
    color_mode='rgb',
    batch_size=32,
    sample_size=10000,
    shuffle=True,
    train_test_split=True, 
    test_size=0.02,
    output_channel_index=0,
    output_size=(128,128))


Found 9800 files for Training set
Found 200 files for Test set


In [3]:
img_width = 128
img_height = 128
kernel_size = 5
strides = 2

In [63]:
class Time_Freq_Autoencoder_Builder:
    
    def build(width, height, depth, filters=(32,64,128,256), latent_dim=256, kernel_size=5):
        
        strides = 2
        
        input_shape = (height, width, depth)
        inputs = Input(shape = input_shape)
        
        chan_dim = -1
        
        x_time = Reshape(target_shape=(height,width))(inputs)
        x_freq = Reshape(target_shape=(height,width))(tf.transpose(inputs, perm=[0,2,1,3]))
        
        for f in filters:
            
            x_time = Conv1D(f, kernel_size=kernel_size, strides=strides, padding='same', activation='relu')(x_time)
            x_time = BatchNormalization(axis=chan_dim)(x_time)
            
        x_time = Flatten()(x_time)
        latent_time = Dense(latent_dim//2)(x_time)
        
        for f in filters:
            
            x_freq = Conv1D(f, kernel_size=kernel_size, strides=strides, padding='same', activation='relu')(x_freq)
            x_freq = BatchNormalization(axis=chan_dim)(x_freq)
            
        x_freq = Flatten()(x_freq)
        latent_freq = Dense(latent_dim//2)(x_freq)
        
        
        latent_concat = tf.keras.layers.Concatenate(axis=1)([latent_time, latent_freq])
        
        encoder = Model(inputs, latent_concat, name='encoder')
        
        latent_inputs = Input(shape=((latent_dim//2)*2))
        
        x = Dense(16384, activation='relu')(latent_inputs)
        x = Reshape(target_shape=(8,8,256))(x)
        
        for f in filters[::-1]:
            
            x = Conv2DTranspose(f, kernel_size=kernel_size, strides=strides, padding='same', activation='relu')(x)
            x = BatchNormalization(axis=chan_dim)(x)
            
        x = Conv2DTranspose(depth, kernel_size=kernel_size, padding='same', activation='sigmoid')(x)
        
        outputs = x
        
        decoder = Model(latent_inputs, outputs, name='decoder')
        
        autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder')
        
        return (encoder, decoder, autoencoder)
    

class Time_Freq_Autoencoder(tf.keras.Model):
    
    def __init__(self, image_width, image_height, image_depth=1, latent_dim=256, kernel_size=5):
        super().__init__()
        
        self.encoder, self.decoder, self.autoencoder = Time_Freq_Autoencoder_Builder.build(width=image_width, height=image_height, depth=image_depth, latent_dim=256, kernel_size=kernel_size)
        
    def call(self, x):
        autoencoded = self.autoencoder(x)
        return autoencoded
    
autoencoder = Time_Freq_Autoencoder(image_width=img_width, image_height=img_height, latent_dim=256, kernel_size=5)

In [64]:
opt = Adam(learning_rate=1e-3)

autoencoder.compile(optimizer=opt, loss=tf.keras.losses.mse)

In [65]:
autoencoder.encoder.summary()

Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_19 (InputLayer)          [(None, 128, 128, 1  0           []                               
                                )]                                                                
                                                                                                  
 tf.compat.v1.transpose_10 (TFO  (None, 128, 128, 1)  0          ['input_19[0][0]']               
 pLambda)                                                                                         
                                                                                                  
 reshape_28 (Reshape)           (None, 128, 128)     0           ['input_19[0][0]']               
                                                                                            

In [None]:
hist_2 = autoencoder.fit(data_gen.train,
                       batch_size=data_gen.batch_size,
                       epochs=15,
                       validation_data=data_gen.test)

Epoch 1/15

In [None]:
loss = hist_2.history['loss']
val_loss = hist_2.history['val_loss']
plt.plot(range(len(loss)), loss)
plt.plot(range(len(val_loss)), val_loss)

In [None]:
test_img = data_gen.take(1)[0]
prediction = autoencoder(test_img)

plot_reconstruction(test_img, prediction, 1)

In [None]:
autoencoder_path = 'data/autoencoder_256dim_time_freq'
autoencoder.save(autoencoder_path)

In [None]:
from src.LatentSpace import LatentSpace

latent_space = LatentSpace(autoencoder_path=autoencoder_path,
                        image_dir='data/Spotify/comp_pngs/',
                        tracks_feather_path='data/all_tracks.feather', latent_dims=256, output_size=(128, 128),
                        sample_size=10000
                        )

In [None]:
latent_space.build()

try:
    os.mkdir('data/autoencoder_256dim_time_freq')
except:
    pass
latent_space.save('data/autoencoder_256dim_time_freq')

In [54]:
latent_space.tracks

Unnamed: 0,id,latent_0,latent_1,latent_2,latent_3,latent_4,latent_5,latent_6,latent_7,latent_8,latent_9,latent_10,latent_11,latent_12,latent_13,latent_14,latent_15,latent_16,latent_17,latent_18,latent_19,latent_20,latent_21,latent_22,latent_23,latent_24,latent_25,latent_26,latent_27,latent_28,latent_29,latent_30,latent_31,latent_32,latent_33,latent_34,latent_35,latent_36,latent_37,latent_38,...,latent_226,latent_227,latent_228,latent_229,latent_230,latent_231,latent_232,latent_233,latent_234,latent_235,latent_236,latent_237,latent_238,latent_239,latent_240,latent_241,latent_242,latent_243,latent_244,latent_245,latent_246,latent_247,latent_248,latent_249,latent_250,latent_251,latent_252,latent_253,latent_254,latent_255,track_id,track_name,track_preview_link,track_popularity,track_uri,artist_name,artist_id,artist_genres,artist_popularity,release_date
0,019h1GEiJksHksV69crWlb,-0.135969,-1.080742,0.010611,0.264380,-0.282120,-0.386321,-0.531853,0.145832,0.393477,0.316282,-1.275607,-0.783184,0.418306,0.680534,-0.017305,-0.221260,-0.639222,-0.270179,0.154539,0.444995,0.465586,0.862384,-0.327111,0.253675,-0.216548,0.592059,0.045263,0.097859,-0.650825,1.195330,-0.059862,0.045206,0.121386,-0.263237,0.495160,-1.383607,-0.212116,-0.139009,0.618191,...,-0.726995,0.598134,-0.415808,-0.361233,-1.044817,-0.067187,-0.563698,-0.336512,-0.546351,0.662686,-0.643145,0.226183,-0.656796,-1.190917,0.218502,-0.755139,0.296668,0.879694,-0.418990,-0.760232,-0.430035,-0.693480,-0.921463,0.491161,-0.334664,-1.382257,1.075602,0.399877,0.957121,-0.321671,019h1GEiJksHksV69crWlb,Sit Down,https://p.scdn.co/mp3-preview/cedbb28877fff4e6...,8,spotify:track:019h1GEiJksHksV69crWlb,Anthony Kasper,5ds2HgCA0GdYB6myfBuZHr,"[deep dnb, stateside dnb]",14,2017-11-13
1,01FxKeJ7lhpB2aHlOEghSc,-0.490797,0.002244,0.574762,-0.515629,1.138343,-0.730767,-0.697187,0.453615,-0.257210,-0.627807,0.032090,0.085140,0.602219,0.174036,1.002995,0.283522,-0.645016,-0.508837,0.574342,-0.134904,0.439146,-0.110068,-0.174247,-0.985985,-0.722416,0.752691,0.614407,0.928383,-0.012259,-0.309228,-0.753770,0.522862,0.571096,0.572648,0.425755,-0.072612,-0.514846,0.652093,-0.369135,...,-0.147191,0.216931,-0.040296,-0.384967,-0.154374,0.311612,0.347683,-0.723379,-0.271151,0.530671,-0.597877,-0.064482,-0.535407,-0.747391,0.347272,-0.568104,-0.193282,-0.110055,-0.227032,-0.124218,-0.393753,-0.001557,-0.149116,0.492068,-0.482273,-0.575553,0.684747,0.327951,0.080558,0.181550,01FxKeJ7lhpB2aHlOEghSc,Adult Checksum Corrupt,https://p.scdn.co/mp3-preview/6ca7cd89ee01af02...,10,spotify:track:01FxKeJ7lhpB2aHlOEghSc,Covox,2aYQ5jggFTNykHeemiVDFP,"[chiptune, nintendocore]",16,
2,01IgZCWI5jDvFO804ujM7R,1.470836,0.683196,0.125688,1.972370,-2.122741,0.688929,0.790163,-1.564057,0.919398,0.942770,-0.337917,-0.368693,-0.945989,-0.899200,-1.383545,-1.431590,0.804138,1.164736,-0.759336,1.924849,0.865203,0.480521,1.115466,1.373861,1.554525,-0.982789,-0.854785,-0.152988,1.637655,0.590342,1.395814,0.907795,-0.680902,-1.743337,-1.144213,1.186817,0.984626,-0.620861,1.288382,...,-0.156249,0.799900,0.607629,0.743679,-0.166031,-0.729224,-0.366519,-0.256268,0.349623,0.302423,0.307851,-0.319642,0.514621,0.294329,0.392033,-0.147220,0.130790,0.462386,-0.653359,0.395556,0.254648,-1.351691,-0.116340,0.112806,0.139825,-0.572710,-0.015019,0.925475,0.365652,-0.913842,01IgZCWI5jDvFO804ujM7R,Yalopa,https://p.scdn.co/mp3-preview/ba3d224065b69cc0...,33,spotify:track:01IgZCWI5jDvFO804ujM7R,DJ Gregory,08P7D5oQ3HUmQ0Gjbw6nzR,[deep house],36,2009-06-29
3,01QIWtqyX9fe4dIXnBelsq,0.909195,0.547015,-0.113827,1.395812,-1.358532,0.521730,0.877788,-0.611117,0.210469,0.326797,-0.824250,-0.426436,-0.944936,-0.872964,-1.104315,-0.720619,0.672814,0.986196,-1.007625,1.194667,1.097025,0.491482,1.121073,1.084094,1.092085,-0.723085,-0.771082,-0.408404,0.864863,0.484611,0.961268,0.074168,-0.651963,-1.133760,-1.115664,0.195757,1.106431,-0.694969,1.604928,...,-0.141281,0.316031,0.455160,0.807723,-0.269320,-0.915273,-0.171349,0.223956,0.390963,0.105414,0.258039,-0.253498,0.424150,0.524332,0.294682,-0.256294,0.098961,0.570644,-0.482400,0.505310,0.238326,-0.809340,0.019950,0.176680,-0.213899,-0.597033,-0.302303,0.687768,0.346647,-0.948174,01QIWtqyX9fe4dIXnBelsq,I Think I Feel,https://p.scdn.co/mp3-preview/46fec6d7f49bf308...,0,spotify:track:01QIWtqyX9fe4dIXnBelsq,Kitano,3IVmdBDFrha1rgH9Cnzzsv,[],3,
4,01nM7IswfclHxZue3gr1Ft,1.494341,0.001833,-0.996788,-0.086574,-0.500834,0.508247,0.769895,0.130303,-0.255611,0.548543,-0.317713,-0.575505,0.124752,0.992327,-0.706764,0.376543,-0.264500,0.697921,-0.444451,0.179024,0.067294,-0.260403,0.627886,0.386850,-0.064274,-0.038655,-0.547924,-0.011053,-0.664901,1.579373,0.726497,0.254757,0.019871,0.122464,0.264543,-0.398443,-0.086581,-0.492874,0.575711,...,-0.021617,0.459982,-0.179621,0.281065,0.407255,0.118863,0.008917,-0.772240,-0.441826,-0.367483,0.205071,-0.082721,0.421980,0.638279,0.212407,0.779786,0.036541,0.071567,-0.370763,0.012842,0.844595,-0.688054,-0.270520,-0.064220,0.347987,0.337480,-1.659568,0.001646,-0.011123,-0.132992,01nM7IswfclHxZue3gr1Ft,Tentativa,https://p.scdn.co/mp3-preview/2776650c9ba6642d...,31,spotify:track:01nM7IswfclHxZue3gr1Ft,Pupkulies & Rebecca,0LtDMZWibqA2ET2EG6ny0K,[],40,2013-10-04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
987,7y5OqbVWcg4ZULg6DdBNul,-0.860723,-1.453538,1.000647,-0.746422,0.122682,-1.420430,-1.021144,0.675450,-0.072234,-1.555670,-0.131258,-0.639784,1.758622,1.143118,1.015827,0.117259,-0.345011,-1.767075,1.506021,-0.722478,-0.291841,-1.734055,-1.893993,-1.353137,-1.650312,1.587153,1.375753,-0.557760,-0.589533,0.438634,-1.231910,-0.800443,1.584970,0.758792,1.669186,-0.674107,-1.631739,1.556573,-1.360999,...,-0.030882,0.041294,-0.666704,-0.808098,-0.407075,1.018914,0.112272,-0.637893,-0.840247,0.479771,-1.147089,0.428921,-1.515239,-0.790647,0.141487,0.036676,0.151871,-0.134600,0.644347,-1.424622,-0.738559,0.297226,-0.115759,0.051405,-0.725970,0.658364,0.600347,-1.167102,0.292698,0.812080,7y5OqbVWcg4ZULg6DdBNul,Smoothe It Out,https://p.scdn.co/mp3-preview/f0df83c660553d43...,19,spotify:track:7y5OqbVWcg4ZULg6DdBNul,Black Diamond Heavies,1iO8EcGEOjhoIs7OLGh0Qn,"[garage rock, modern blues rock, punk blues]",27,
988,7yLHJvVA1uQyRL7iMSMjR3,0.094078,-0.200016,0.284719,0.549199,-0.423874,-0.457500,-0.550645,-0.217345,0.283547,0.133268,-0.633915,-0.439156,0.328574,0.310750,-0.308262,-0.311936,-0.566472,-0.233040,0.026129,0.140415,1.406902,0.215031,-0.084534,-0.628079,-0.192765,0.410045,0.132768,0.111803,0.256506,-0.240937,-0.040624,0.648620,0.190225,-0.174856,0.299403,0.024189,-0.236816,0.271044,0.322538,...,-0.308434,-0.331405,-0.000124,-0.321691,-0.499158,-0.210182,-0.305224,-0.163021,0.432446,0.290205,-0.378865,-0.322506,-0.587699,-0.111176,0.578411,-0.832433,0.026722,0.262503,-0.532194,-0.053683,0.122315,0.332157,-0.825479,0.317479,-0.615851,-0.986898,0.715195,0.281918,0.292888,-0.492213,7yLHJvVA1uQyRL7iMSMjR3,Lost in the Lasers,https://p.scdn.co/mp3-preview/9e5e8bcd3e8246b6...,0,spotify:track:7yLHJvVA1uQyRL7iMSMjR3,Parker Jones,0MiU0OJmgbTwYjjaUXPDm1,[],5,2009-07-01
989,7yslxPj5TdZrfdFGsytkhe,-1.257105,-0.087672,-0.178861,-1.113753,1.734814,-0.639224,-0.867755,1.609565,-0.618866,0.224139,-0.605972,0.671308,0.331136,-0.161373,1.476383,0.927143,-0.948882,-0.382742,0.454260,-1.119271,0.154887,0.942056,0.155938,-1.306321,-0.942439,0.909529,0.286326,1.345418,-1.322768,-0.106128,-1.065248,0.542836,0.237456,1.203408,0.444129,-1.418514,-0.363139,-0.086035,0.134248,...,-0.203088,-0.086045,0.317132,-0.510114,-0.239773,0.272086,0.174650,-0.459521,0.002902,0.489322,-0.494102,-0.351622,-0.383175,-0.300257,0.521125,-0.632072,-0.194030,-0.120200,-0.364512,0.107711,-0.269386,0.121497,0.275313,0.521300,-0.625881,-0.132178,0.030043,0.656062,0.097299,0.238469,7yslxPj5TdZrfdFGsytkhe,Canoa,https://p.scdn.co/mp3-preview/24293410e567c799...,8,spotify:track:7yslxPj5TdZrfdFGsytkhe,Consuelo De Paula,4pC7MSrPBtfjUWd8irptqY,[deep brazilian pop],12,2020-08-21
990,7yzaxgnFyYruR3xLYKj8yz,-0.560897,-1.106141,0.709608,-0.881778,0.622176,-1.257025,-1.200939,0.588160,0.096991,-1.055063,-0.122432,-0.372625,1.347813,1.045381,0.907664,0.192303,-0.820414,-1.386936,1.267520,-0.811748,0.009814,-1.345290,-1.272078,-1.360608,-1.400919,1.311870,1.103989,-0.008374,-0.456247,-0.340462,-0.994790,-0.100180,1.191600,0.662697,1.255131,-0.258434,-1.378749,1.310895,-1.025855,...,-0.157540,-0.221277,-0.700593,-0.621290,-0.416334,0.709249,0.183296,-0.593150,-0.551469,0.674392,-0.898222,0.410006,-0.887623,-0.766668,0.297223,-0.226918,-0.417540,-0.168294,0.414859,-0.879175,-0.645526,0.572699,-0.173935,0.498340,-0.633166,0.303836,-0.040630,-0.523663,0.058201,0.507192,7yzaxgnFyYruR3xLYKj8yz,O Roteirista,https://p.scdn.co/mp3-preview/3e41c75fa5c0ad08...,20,spotify:track:7yzaxgnFyYruR3xLYKj8yz,Tópaz,2iiIiebCQqwrwCqARl4Q3A,"[brazilian rock, novo rock gaucho]",30,2014-10-03
