In [104]:
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from scipy.misc import imread, imsave, imresize
import pandas as pd
import numpy as np
import subprocess
import glob
import re
import matplotlib.pyplot as plt

from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Conv2D, AtrousConvolution2D, Flatten, Dense, MaxPooling2D, Dropout, ZeroPadding2D, Activation
from keras.layers.normalization import BatchNormalization
from keras import backend as K
from keras.callbacks import EarlyStopping

### Load label and rename data

In [2]:
data_dir = "data/set1/"
data_type = {"Filename": str, "Genres": str, "Release Year": int}
albums = pd.read_csv(data_dir + "albumlabel.csv", dtype=data_type, parse_dates=["Release Year"])
albums['Artist'] = [name.split('_')[1] for name in albums['Filename']]
albums['NewFileName'] = albums['Artist'] +'_'+ albums['Release Year'].map(lambda x: str(x)[:10])
albums.head()

Unnamed: 0,Filename,Genres,Release Year,Artist,NewFileName
0,TheDarkSideOfTheMoon_PinkFloyd,rock thrash speedmetal heavymetal,1973-03-01,PinkFloyd,PinkFloyd_1973-03-01
1,PeaceSells...ButWho'sBuying(25thAnniversary)_M...,rock poppunk,1986-01-01,Megadeth,Megadeth_1986-01-01
2,Monster_KISS,rock heavymetal,2012-10-09,KISS,KISS_2012-10-09
3,Metallica_Metallica,rock pop alternativerock,1991-08-12,Metallica,Metallica_1991-08-12
4,Nevermind(Remastered)_Nirvana,rock hardrock,1991-09-26,Nirvana,Nirvana_1991-09-26


In [9]:
# Resize pictures
setpath = 'data/set1/'
subprocess.call(['rm', '-rf', setpath+'resize'])
subprocess.call(['cp', '-r', setpath+'img', setpath+'resize'])
for pic in glob.glob(setpath+"resize/*.jpg"):
    img = imread(pic)
    img = imresize(img, (32, 32))
    imsave(pic, img)

# File rename
for i in xrange(albums.shape[0]):
    subprocess.call(['mv', data_dir+'resize/'+albums['Filename'].loc[i]+'.jpg',\
                     data_dir+'resize/'+albums['NewFileName'].loc[i]+'.jpg'])

In [91]:
# Label to index number New genres (12 groups)
token = Tokenizer()
genres = ['rock', 'metal', 'punk', 'pop', 'blue', 'funk', 'jazz', 'electro', 'hiphop', 'rap', 'country', 'other']
token.fit_on_texts(genres)

idx_word = dict()
for key in token.word_index.keys():
    idx_word[token.word_index[key]] = key

label_lst = albums.Genres.get_values()
data_y = np.zeros((len(label_lst), max(token.word_index.values())+1))
for i, album_labels in enumerate(label_lst):
    splt_labels = album_labels.split()
    for label in splt_labels:
        nl = [l for l in token.word_index if l in label]
        if len(nl) > 0:
            for j in nl:
                data_y[i, token.word_index[j]] = 1
        else:
            data_y[i, token.word_index['other']] = 1
print data_y[:5,]

[[ 0.  0.  1.  0.  0.  0.  0.  0.  1.  0.  1.  0.  0.]
 [ 0.  0.  0.  0.  1.  1.  0.  0.  0.  0.  1.  0.  0.]
 [ 0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]
 [ 0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  1.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]]


In [94]:
# Read image
X_origin = np.array([np.array(Image.open(data_dir+'resize/'+filename+'.jpg')) for filename in albums['NewFileName'].get_values()])
X = np.zeros((X_origin.shape[0], 32, 32, 3))
for i in xrange(X_origin.shape[0]):
    X[i] = X_origin[i]

In [122]:
optimizer = 'adadelta'
objective = 'binary_crossentropy'

def center_normalize(x):
    return (x - K.mean(x)) / K.std(x)

def single2multi(x):
    return x/np.max(x)

# split data
train_x, test_x, train_y, test_y = train_test_split(X, data_y, test_size=0.2)

In [139]:
# Build NN Model (model to optimize)
model = Sequential()
model.add(Activation(activation=center_normalize,input_shape=train_x.shape[1:]))

model.add(ZeroPadding2D((1,1),input_shape=train_x.shape[1:], dim_ordering='tf'))
model.add(Conv2D(64, 3, 3, border_mode='valid', activation='relu'))

model.add(ZeroPadding2D((1,1)))
model.add(Conv2D(64, 3, 3, border_mode='valid', activation='relu'))

model.add(ZeroPadding2D((1,1)))
model.add(Conv2D(64, 3, 3, border_mode='valid', activation='relu'))

model.add(MaxPooling2D((2, 2), strides=(2,2)))

model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(train_y.shape[1], activation='sigmoid'))
# model.add(Dense(train_y.shape[1], activation=single2multi))

model.compile(loss=objective,
              optimizer=optimizer,
              metrics=['accuracy'])

In [140]:
early_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1, mode='auto')        
model.fit(train_x, train_y,
          validation_data = (test_x, test_y),
          batch_size=5,
          nb_epoch=10,
          verbose=1,
          callbacks=[early_stopping])

Train on 215 samples, validate on 54 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x15efb3890>

In [141]:
def greater50percent(y):
    p = []
    for s in y:
        tmp = []
        for i in xrange(len(s)):
            if s[i] > 0.5:
                tmp.append(i)
        p.append(tmp)
    return p

pred = model.predict(test_x)
p  = greater50percent(pred)
p_ = greater50percent(test_y)

In [150]:
np.mean(data_y, axis=0)

array([ 0.        ,  0.03345725,  0.2936803 ,  0.00371747,  0.04460967,
        0.22304833,  0.01115242,  0.12639405,  0.39405204,  0.0260223 ,
        0.81784387,  0.0929368 ,  0.05204461])

In [142]:
for i in xrange(len(p)):
    print(i, p[i], p_[i])

(0, [5, 10], [8, 10, 11])
(1, [10], [8, 10])
(2, [8, 10], [8, 10])
(3, [10], [5, 10])
(4, [10], [8, 10])
(5, [10], [5, 8, 11])
(6, [10], [1, 5, 6, 8, 10])
(7, [10], [8, 10])
(8, [10], [2, 8, 10])
(9, [8, 10], [8, 10])
(10, [10], [5, 7, 8, 9, 10, 11])
(11, [10], [7])
(12, [10], [7, 8])
(13, [10], [8, 10])
(14, [10], [10])
(15, [10], [8, 10])
(16, [5, 10], [5, 8, 10])
(17, [5, 10], [5, 10])
(18, [10], [1, 10])
(19, [5, 10], [10])
(20, [5, 10], [7, 11])
(21, [10], [4, 5, 10])
(22, [5, 10], [5, 10])
(23, [10], [2, 10])
(24, [5, 10], [5, 7, 11])
(25, [10], [2, 8, 10])
(26, [10], [7])
(27, [10], [2, 10, 12])
(28, [5, 10], [5, 10])
(29, [10], [10, 12])
(30, [10], [8, 10, 11])
(31, [10], [7])
(32, [5, 8, 10], [5, 12])
(33, [5, 10], [10])
(34, [5, 10], [2, 10])
(35, [8, 10], [2, 10])
(36, [8, 10], [5, 8, 10])
(37, [10], [2, 10])
(38, [5, 10], [10])
(39, [8, 10], [10])
(40, [5, 8, 10], [2, 10])
(41, [10], [4, 5, 8, 10])
(42, [10], [2, 10, 12])
(43, [5, 8, 10], [5])
(44, [10], [7, 8])
(45, [5, 10

In [113]:
def predict_req(filename, model):
    reqdata = np.array([np.array(Image.open(filename))])
    return model.predict(reqdata)

In [151]:
predict_req(data_dir+'resize/'+'Megadeth_1986-01-01.jpg', model)

array([[  3.48932758e-07,   4.83216019e-04,   2.88710669e-02,
          5.07404502e-06,   1.57522280e-02,   2.62485087e-01,
          7.20133103e-05,   6.18063239e-03,   3.58574778e-01,
          5.93831902e-03,   9.65632737e-01,   5.01241209e-03,
          3.89465538e-04]], dtype=float32)