# Imports/Setup

In [74]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import clear_output
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

%matplotlib inline

load_dotenv()

conn = create_engine('postgresql://{0}:{1}@{2}:{3}/{4}'.format(os.getenv('DB_USER'),
                                                               os.getenv('DB_PASS'), 
                                                               os.getenv('DB_IP'), 
                                                               os.getenv('DB_PORT'), 
                                                               os.getenv('DB_NAME')))

# Download/Setup Data

In [2]:
dreampop_data = pd.read_sql("SELECT * FROM dreampop;", conn)
other_data = pd.read_sql("SELECT * FROM other;", conn)

In [3]:
#Combine data, drop non needed cols
data = dreampop_data.append(other_data, ignore_index=True)
data = data.drop(['index', 'type', 'uri', 'track_href', 'analysis_url'], axis=1)
data.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,duration_ms,time_signature,name,artist,genre
0,0.42,0.33,9,-13.925,1,0.0275,0.245,0.000679,0.0677,0.175,157.143,1LzNfuep1bnAUR9skqdHCK,295600,3,Fade Into You,Mazzy Star,dreampop
1,0.512,0.838,0,-6.842,1,0.031,0.0118,0.0142,0.116,0.195,117.992,1bwqV8EGVw1RLT3cEzxLpH,203390,4,Closer,Josha Daniel,dreampop
2,0.347,0.55,7,-9.048,1,0.0399,0.00419,0.0492,0.0992,0.0669,124.867,5TRSyGcFfevCbJHFUk4OB0,199680,4,Alive,Josha Daniel,dreampop
3,0.377,0.65,2,-5.902,1,0.029,0.00174,5.8e-05,0.107,0.365,180.074,6sVQNUvcVFTXvlk3ec0ngd,192467,4,Cherry-coloured Funk,Cocteau Twins,dreampop
4,0.638,0.761,4,-7.945,0,0.169,0.147,0.000217,0.321,0.378,119.987,4uXWLG0CBQhJlvqPksiHxu,188000,4,High Enough,Lodola,dreampop


In [4]:
#Create genre ids
genres = list(data.groupby('genre').count().index)
genre_to_id = {}
id_to_genre = {}
for i, g in enumerate(genres):
    genre_to_id[g] = i
    genre_to_id[i] = g

In [5]:
#Map genres
data['genre_id'] = data['genre'].apply(lambda x: genre_to_id[x])
data['is_dreampop'] = data['genre'].apply(lambda x: 1 if x == 'dreampop' else 0)
data = data.sample(frac=1).reset_index(drop=True)
data.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,duration_ms,time_signature,name,artist,genre,genre_id,is_dreampop
0,0.409,0.234,7,-17.126,1,0.0389,0.565,0.000144,0.12,0.151,165.96,18PbKNh8nuvrsq54GpCkoK,258640,4,Creep - Acoustic,Radiohead,sad,104,0
1,0.774,0.721,6,-6.662,0,0.0773,0.0126,6e-06,0.0952,0.654,118.048,6vSq5q5DCs1IvwKIq53hj2,263427,4,Animal,Miike Snow,electro,33,0
2,0.0959,0.899,1,-7.033,0,0.0588,0.104,0.0014,0.675,0.301,87.307,713SXpm5C6kzeVLUZ7hgO9,231973,4,Kill Rhythm,Catherine Wheel,dreampop,28,1
3,0.516,0.34,2,-10.378,0,0.0556,0.909,0.0,0.219,0.623,116.703,2MpzQgvW0TTC3D3bfcCQlP,197902,4,Pa' Lo Que Te Va a Durar,Roberto Goyeneche,tango,120,0
4,0.574,0.698,7,-6.109,1,0.0277,0.00365,0.527,0.675,0.123,120.067,6fyfUncrRzeOOEcVYoDWT7,266160,4,Your Way (feat. Day Wave),Jai Wolf,dreampop,28,1


# Create train, test, validate sets

In [6]:
data_cols = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']
X = data[data_cols].to_numpy()
y = data['is_dreampop'].to_numpy().astype(int)
X.shape, y.shape

((20589, 13), (20589,))

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=1)
X_train.shape, X_test.shape, X_val.shape, y_train.shape, y_test.shape, y_val.shape

((12353, 13), (4118, 13), (4118, 13), (12353,), (4118,), (4118,))

# Create Model

In [351]:
model = keras.models.Sequential()
model.add(keras.layers.Dense(100, input_dim=13, activation='relu'))
model.add(keras.layers.Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_52 (Dense)             (None, 100)               1400      
_________________________________________________________________
dense_53 (Dense)             (None, 1)                 101       
Total params: 1,501
Trainable params: 1,501
Non-trainable params: 0
_________________________________________________________________


In [352]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', keras.metrics.Precision(), keras.metrics.Recall()]
)

# Train/Test Model

In [353]:
history = model.fit(
    X_train,
    y_train,
    batch_size=10,
    epochs=3,
    validation_data=(X_val, y_val)
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [358]:
# Evaluate the model on the test data
results = model.evaluate(X_test, y_test, batch_size=128)



In [356]:
sample = data.sample(2500)
sample_X = sample[data_cols].to_numpy()
sample_y = sample['is_dreampop'].to_numpy()

predictions = model.predict(sample_X)
predictions = [p for pl in predictions.tolist() for p in pl]
predictions = [round(p) for p in predictions]
ts = list(zip(list(sample_y), predictions))
ts = [t for t in ts if t[0] == t[1] and t[0] == 1]
ts

[]