Permalink
Browse files

upload files

  • Loading branch information...
Matthew Murray
Matthew Murray committed Jul 11, 2017
1 parent 357e38b commit ec183278fbc814fc793aa565da816f1c38a28146
@@ -0,0 +1,145 @@
# coding: utf-8
import pickle
import sys
import pandas as pd
import requests
import sox
import numpy as np
import os
from subprocess import run, PIPE
from PIL import Image
import tempfile
import re
import time
import string
from boto.s3.connection import S3Connection
from boto.s3.key import Key
import glob
conn = S3Connection('', '')
s3bucket = conn.get_bucket('spectrograms')
k = Key(s3bucket)
df = pd.read_pickle('final_data.pkl')
DEFAULT_IMG_SIZE = 256
DATA_DIR = ''
# downloads the mp3 from juno
def download(url, file_name):
with open(file_name, "wb") as file:
response = requests.get(url)
file.write(response.content)
# helper function to delete files no longer needed
def delete_file(file_path):
os.remove(file_path)
# creates a mono version of the file
# deletes original stero mp3 and renames the temp
# mono file to the original stero filename
def set_to_mono(input_file):
tmp_name = 'tmp.mp3'
command = "sox {} {} remix 1,2".format(input_file, tmp_name)
run(command, shell=True, stdin=PIPE, stdout=PIPE)
delete_file(input_file)
os.rename(tmp_name, input_file)
# converts the audio to spectrogram
def audio_to_spect(input_file, output_file):
command = "sox {} -n spectrogram -Y 300 -X 50 -m -r -o {}".format(input_file, output_file)
run(command, shell=True, stdin=PIPE, stdout=PIPE)
delete_file(input_file)
# helper function - gets dimensions of the spectrogram
def get_spect_dims(input_img):
img_width, img_height = input_img.size
return img_width, img_height
# helper function - calculates the number of slices available from the full size spectrogram
def get_num_slices(img_width):
n_slices = img_width // DEFAULT_IMG_SIZE
return n_slices
# helper function - returns a list of coordinates/dimensions where to split the spectrogram
def get_slice_dims(input_img):
img_width, img_height = get_spect_dims(input_img)
num_slices = get_num_slices(img_width)
unused_size = img_width - (num_slices * DEFAULT_IMG_SIZE)
start_px = 0 + unused_size
image_dims = []
for i in range(num_slices):
img_width = DEFAULT_IMG_SIZE
image_dims.append((start_px, start_px + DEFAULT_IMG_SIZE))
start_px += DEFAULT_IMG_SIZE
return image_dims
# slices the spectrogram into individual sample images
def slice_spect(input_file):
input_file_cleaned = input_file.replace('.png','')
img = Image.open(input_file)
dims = get_slice_dims(img)
counter = 0
for dim in dims:
counter_formatted = str(counter).zfill(3)
img_name = '{}__{}.png'.format(input_file_cleaned, counter_formatted)
start_width = dim[0]
end_width = dim[1]
sliced_img = img.crop((start_width, 0, end_width, DEFAULT_IMG_SIZE))
sliced_img.save(DATA_DIR + img_name)
counter += 1
delete_file(input_file)
def create_file_names(id):
genre_list = list(df['parent_genre'])
genre_name = str(genre_list[id]).lower()
genre_name = genre_name.replace('/','_')
genre_name = genre_name.replace(' ','_')
genre_name = genre_name.replace('&', 'n')
id_list = list(df['id'])
id_name = (id_list[id])
track_name = '{}__{}.mp3'.format(genre_name, id_name)
spect_name = track_name.replace('.mp3','')
spect_name = '{}.png'.format(spect_name)
return track_name, spect_name, genre_name
url_list = list(df['track_url'])
for track_id in range(len(df)):
url = url_list[track_id]
track_name, spect_name, genre_name = create_file_names(track_id)
print('Track: {}, Spect: {}, Genre: {}'
.format(track_name, spect_name, genre_name))
try:
download(url, track_name)
set_to_mono(track_name)
audio_to_spect(track_name, spect_name)
slice_spect(spect_name)
# all png files should now be in the working directory
file_list = glob.glob('*.png')
for file in file_list:
# get genre from start of file name
genre_name = file.split('__')[0]
# set file name ready to upload to s3
full_key_name = '{}/{}'.format(genre_name, file)
try:
# send file to s3
k.key = full_key_name
k.set_contents_from_filename(file)
# once copied, delete from local
delete_file(file)
except:
print('Problem copying file {}'.format(file))
pass
time.sleep(5)
except KeyboardInterrupt:
sys.exit()
except:
print('Something went wrong. Moving to next file')
pass
View
@@ -0,0 +1,101 @@
from keras import backend as K
from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout, Activation
from keras.optimizers import rmsprop
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import Input, Dense
import os
import numpy as np
import pandas as pd
import numpy as np
import pickle
# Set values
num_classes = 9
image_size = 256
nb_epoch = 20
batch_size = 128
train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
nb_train_samples = 120000
nb_validation_samples = 42000
if K.image_data_format() == 'channels_first':
input_shape = (3, image_size, image_size)
else:
input_shape = (image_size, image_size, 3)
# Specify model
# callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
save_best_model = ModelCheckpoint(filepath='model_.{epoch:02d}_{val_loss:.2f}.hdf5', verbose=1,
monitor='val_loss')
# instantiate Sequential model
model = Sequential()
model.add(Conv2D(filters=64, kernel_size=2, strides=2, activation='elu', kernel_initializer='glorot_normal', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=2, padding='same'))
model.add(Conv2D(filters=128, kernel_size=2, strides=2, activation='elu', kernel_initializer='glorot_normal'))
model.add(MaxPooling2D(pool_size=2, padding='same'))
model.add(Conv2D(filters=256, kernel_size=2, strides=2, activation='elu', kernel_initializer='glorot_normal'))
model.add(MaxPooling2D(pool_size=2, padding='same'))
model.add(Conv2D(filters=512, kernel_size=2, strides=2, activation='elu', kernel_initializer='glorot_normal'))
model.add(MaxPooling2D(pool_size=2, padding='same'))
model.add(Flatten())
model.add(Dense(128))
model.add(Activation('elu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
opt = rmsprop()
model.compile(loss='categorical_crossentropy',
optimizer = opt,
metrics = ['accuracy'])
# Image generators
train_datagen = ImageDataGenerator(rescale= 1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(image_size, image_size),
shuffle=True,
batch_size=batch_size,
class_mode='categorical'
)
validation_generator = validation_datagen.flow_from_directory(
validation_data_dir,
target_size=(image_size, image_size),
batch_size=batch_size,
shuffle=True,
class_mode='categorical'
)
# Fit model
history = model.fit_generator(train_generator,
steps_per_epoch=(nb_train_samples // batch_size),
epochs=nb_epoch,
validation_data=validation_generator,
callbacks=[early_stopping, save_best_model],
validation_steps=(nb_validation_samples // batch_size)
)
# Save model
model.save_weights('full_model_weights.h5')
model.save('model.h5')
Oops, something went wrong.

0 comments on commit ec18327

Please sign in to comment.