In [1]:
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from scipy.misc import imread, imsave, imresize
import pandas as pd
import numpy as np
import subprocess
import glob
import re
import matplotlib.pyplot as plt

from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Conv2D, AtrousConvolution2D, Flatten, Dense, MaxPooling2D, Dropout, ZeroPadding2D, Activation
from keras.layers.normalization import BatchNormalization
from keras import backend as K
from keras.callbacks import EarlyStopping

Using TensorFlow backend.


### Load label and rename data

In [2]:
data_dir = "data/set2/"
data_type = {"Filename": str, "Genres": str, "Release Year": int}
albums = pd.read_csv(data_dir + "albumlabel.csv", dtype=data_type, parse_dates=["Release Year"])
# albums['Artist'] = [name.split('_')[1] for name in albums['Filename']]
# albums['NewFileName'] = albums['Artist'] +'_'+ albums['Release Year'].map(lambda x: str(x)[:10])
albums.head()

Unnamed: 0,Filename,Genres,Release Year
0,12 X 5_1964,rock pop bluesrock,1964-01-01 00:00:00
1,The Rolling Stones_1964,rock blues pop poprock rhythmblues rockroll,1964-01-01 00:00:00
2,It's All Over Now_1964,rock rhythmblues classicrock,1964-01-01 00:00:00
3,December's Children (And Everybody's)_1966,rock bluesrock garagerock poprock,1966-01-01 00:00:00
4,No. 2_1965,rock bluesrock rockroll,1965-01-01 00:00:00


In [None]:
# Resize pictures
setpath = 'data/set2/'
subprocess.call(['rm', '-rf', setpath+'resize'])
subprocess.call(['cp', '-r', setpath+'img', setpath+'resize'])
for pic in glob.glob(setpath+"resize/*.jpg"):
    img = imread(pic)
    img = imresize(img, (32, 32))
    imsave(pic, img)

# # File rename
# for i in xrange(albums.shape[0]):
#     subprocess.call(['mv', data_dir+'resize/'+albums['Filename'].loc[i]+'.jpg',\
#                      data_dir+'resize/'+albums['NewFileName'].loc[i]+'.jpg'])

In [3]:
# Label to index number New genres (12 groups)
token = Tokenizer()
genres = ['rock', 'metal', 'punk', 'pop', 'blue', 'funk', 'jazz', 'electro', 'hiphop', 'rap', 'country', 'other']
token.fit_on_texts(genres)
for k in token.word_index:
    token.word_index[k] -= 1

idx_word = dict()
for key in token.word_index.keys():
    idx_word[token.word_index[key]] = key

label_lst = albums.Genres.get_values()
data_y = np.zeros((len(label_lst), max(token.word_index.values())+1))
for i, album_labels in enumerate(label_lst):
    splt_labels = album_labels.split()
    for label in splt_labels:
        nl = [l for l in token.word_index if l in label]
        if len(nl) > 0:
            for j in nl:
                data_y[i, token.word_index[j]] = 1
        else:
            data_y[i, token.word_index['other']] = 1
print data_y[:5,]

[[ 1.  0.  0.  0.  1.  0.  0.  0.  0.  1.  0.  0.]
 [ 1.  0.  0.  0.  1.  0.  0.  0.  0.  1.  0.  0.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]
 [ 1.  0.  0.  0.  1.  0.  0.  0.  0.  1.  0.  0.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]]


In [4]:
print token.word_index
print np.sum(data_y, axis=0)

{'blue': 0, 'metal': 1, 'rock': 9, 'jazz': 2, 'punk': 3, 'pop': 4, 'hiphop': 6, 'other': 7, 'rap': 8, 'country': 5, 'electro': 10, 'funk': 11}
[  325.   408.    68.    75.   928.    71.   713.  1096.   186.  1608.
   542.   207.]


In [5]:
# Read image
X_origin = np.array([np.array(Image.open(data_dir+'resize/'+filename+'.jpg')) for filename in albums['Filename'].get_values()])
X = np.zeros((X_origin.shape[0], 32, 32, 3))
for i in xrange(X_origin.shape[0]):
    X[i] = X_origin[i]

In [7]:
optimizer = 'adadelta'
objective = 'binary_crossentropy'

def center_normalize(x):
    return (x - K.mean(x)) / K.std(x)

def single2multi(x):
    return x/np.max(x)

# split data
train_x, test_x, train_y, test_y = train_test_split(X, data_y, test_size=0.2)

In [8]:
# Build NN Model (model to optimize)
model = Sequential()
model.add(Activation(activation=center_normalize,input_shape=train_x.shape[1:]))

model.add(ZeroPadding2D((1,1),input_shape=train_x.shape[1:], dim_ordering='tf'))
model.add(Conv2D(64, 3, 3, border_mode='valid', activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Conv2D(64, 3, 3, border_mode='valid', activation='relu'))
model.add(MaxPooling2D((2, 2), strides=(2,2)))

model.add(ZeroPadding2D((1,1),input_shape=train_x.shape[1:], dim_ordering='tf'))
model.add(Conv2D(128, 3, 3, border_mode='valid', activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Conv2D(128, 3, 3, border_mode='valid', activation='relu'))
model.add(MaxPooling2D((2, 2), strides=(2,2)))

model.add(ZeroPadding2D((1,1),input_shape=train_x.shape[1:], dim_ordering='tf'))
model.add(Conv2D(256, 3, 3, border_mode='valid', activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Conv2D(256, 3, 3, border_mode='valid', activation='relu'))
model.add(MaxPooling2D((2, 2), strides=(2,2)))

model.add(ZeroPadding2D((1,1),input_shape=train_x.shape[1:], dim_ordering='tf'))
model.add(Conv2D(512, 3, 3, border_mode='valid', activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Conv2D(512, 3, 3, border_mode='valid', activation='relu'))
model.add(MaxPooling2D((2, 2), strides=(2,2)))

model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4096, activation='relu'))

model.add(Dense(train_y.shape[1], activation='sigmoid'))
# model.add(Dense(train_y.shape[1], activation=single2multi))

model.compile(loss=objective,
              optimizer=optimizer,
              metrics=['accuracy'])

In [9]:
early_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1, mode='auto')        
model.fit(train_x, train_y,
          validation_data = (test_x, test_y),
          batch_size=5,
          nb_epoch=10,
          class_weight='auto',
          verbose=1,
          callbacks=[early_stopping])

Train on 2180 samples, validate on 545 samples
Epoch 1/10

KeyboardInterrupt: 

In [None]:
def greater50percent(y):
    p = []
    for s in y:
        tmp = []
        for i in xrange(len(s)):
            if s[i] > 0.5:
                tmp.append(i)
        p.append(tmp)
    return p

pred = model.predict(test_x)
p  = greater50percent(pred)
p_ = greater50percent(test_y)

In [None]:
np.mean(data_y, axis=0)

In [None]:
for i in xrange(len(p)):
    print(i, p[i], p_[i])

In [None]:
def predict_req(filename, model):
    reqdata = np.array([np.array(Image.open(filename))])
    return model.predict(reqdata)

In [None]:
predict_req(data_dir+'resize/'+'Megadeth_1986-01-01.jpg', model)