Skip to content

Commit

Permalink
Split off pipeline libs from create_dataset binaries for easier inter…
Browse files Browse the repository at this point in the history
…nal support. (#1308)

* Update melodyrnn

* Add perf and pr

* add poly improv drums

* lint

* fix default

* Add vis
  • Loading branch information
adarob committed Oct 17, 2018
1 parent e5ff946 commit cdd6ae4
Show file tree
Hide file tree
Showing 27 changed files with 768 additions and 601 deletions.
9 changes: 6 additions & 3 deletions magenta/models/drums_rnn/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ py_library(
srcs = ["drums_rnn_config_flags.py"],
srcs_version = "PY2AND3",
visibility = [
# internal model:drums_rnn
# internal model:music_rnn
],
deps = [
":drums_rnn_model",
Expand Down Expand Up @@ -62,10 +62,13 @@ py_library(

py_binary(
name = "drums_rnn_create_dataset",
srcs = ["drums_rnn_create_dataset.py"],
srcs = [
"drums_rnn_create_dataset.py",
"drums_rnn_pipeline.py"
],
srcs_version = "PY2AND3",
visibility = [
# internal model:drums_rnn
# internal model:music_rnn
"//magenta/tools/pip:__subpackages__",
],
deps = [
Expand Down
73 changes: 16 additions & 57 deletions magenta/models/drums_rnn/drums_rnn_create_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,74 +21,33 @@

# internal imports
import tensorflow as tf
import magenta

from magenta.models.drums_rnn import drums_rnn_config_flags

from magenta.music import encoder_decoder
from magenta.pipelines import dag_pipeline
from magenta.pipelines import drum_pipelines
from magenta.pipelines import note_sequence_pipelines
from magenta.models.drums_rnn import drums_rnn_pipeline
from magenta.pipelines import pipeline
from magenta.pipelines import pipelines_common
from magenta.protobuf import music_pb2

flags = tf.app.flags
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('input', None,
'TFRecord to read NoteSequence protos from.')
tf.app.flags.DEFINE_string('output_dir', None,
'Directory to write training and eval TFRecord '
'files. The TFRecord files are populated with '
'SequenceExample protos.')
tf.app.flags.DEFINE_float('eval_ratio', 0.1,
'Fraction of input to set aside for eval set. '
'Partition is randomly selected.')
tf.app.flags.DEFINE_string('log', 'INFO',
'The threshold for what messages will be logged '
'DEBUG, INFO, WARN, ERROR, or FATAL.')


def get_pipeline(config, eval_ratio):
"""Returns the Pipeline instance which creates the RNN dataset.
Args:
config: A DrumsRnnConfig object.
eval_ratio: Fraction of input to set aside for evaluation set.
Returns:
A pipeline.Pipeline instance.
"""
partitioner = pipelines_common.RandomPartition(
music_pb2.NoteSequence,
['eval_drum_tracks', 'training_drum_tracks'],
[eval_ratio])
dag = {partitioner: dag_pipeline.DagInput(music_pb2.NoteSequence)}

for mode in ['eval', 'training']:
time_change_splitter = note_sequence_pipelines.TimeChangeSplitter(
name='TimeChangeSplitter_' + mode)
quantizer = note_sequence_pipelines.Quantizer(
steps_per_quarter=config.steps_per_quarter, name='Quantizer_' + mode)
drums_extractor = drum_pipelines.DrumsExtractor(
min_bars=7, max_steps=512, gap_bars=1.0, name='DrumsExtractor_' + mode)
encoder_pipeline = encoder_decoder.EncoderPipeline(
magenta.music.DrumTrack, config.encoder_decoder,
name='EncoderPipeline_' + mode)

dag[time_change_splitter] = partitioner[mode + '_drum_tracks']
dag[quantizer] = time_change_splitter
dag[drums_extractor] = quantizer
dag[encoder_pipeline] = drums_extractor
dag[dag_pipeline.DagOutput(mode + '_drum_tracks')] = encoder_pipeline

return dag_pipeline.DAGPipeline(dag)
flags.DEFINE_string('input', None, 'TFRecord to read NoteSequence protos from.')
flags.DEFINE_string(
'output_dir', None,
'Directory to write training and eval TFRecord files. The TFRecord files '
'are populated with SequenceExample protos.')
flags.DEFINE_float(
'eval_ratio', 0.1,
'Fraction of input to set aside for eval set. Partition is randomly '
'selected.')
flags.DEFINE_string(
'log', 'INFO',
'The threshold for what messages will be logged DEBUG, INFO, WARN, ERROR, '
'or FATAL.')


def main(unused_argv):
tf.logging.set_verbosity(FLAGS.log)

config = drums_rnn_config_flags.config_from_flags()
pipeline_instance = get_pipeline(
pipeline_instance = drums_rnn_pipeline.get_pipeline(
config, FLAGS.eval_ratio)

FLAGS.input = os.path.expanduser(FLAGS.input)
Expand Down
6 changes: 3 additions & 3 deletions magenta/models/drums_rnn/drums_rnn_create_dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import tensorflow as tf
import magenta

from magenta.models.drums_rnn import drums_rnn_create_dataset
from magenta.models.drums_rnn import drums_rnn_pipeline
from magenta.models.shared import events_rnn_model
from magenta.pipelines import drum_pipelines
from magenta.pipelines import note_sequence_pipelines
Expand Down Expand Up @@ -63,8 +63,8 @@ def testDrumsRNNPipeline(self):
expected_result = {'training_drum_tracks': [one_hot],
'eval_drum_tracks': []}

pipeline_inst = drums_rnn_create_dataset.get_pipeline(self.config,
eval_ratio=0.0)
pipeline_inst = drums_rnn_pipeline.get_pipeline(
self.config, eval_ratio=0.0)
result = pipeline_inst.transform(note_sequence)
self.assertEqual(expected_result, result)

Expand Down
60 changes: 60 additions & 0 deletions magenta/models/drums_rnn/drums_rnn_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright 2018 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Pipeline to create DrumsRNN dataset."""

# internal imports

import magenta
from magenta.music import encoder_decoder
from magenta.pipelines import dag_pipeline
from magenta.pipelines import drum_pipelines
from magenta.pipelines import note_sequence_pipelines
from magenta.pipelines import pipelines_common
from magenta.protobuf import music_pb2


def get_pipeline(config, eval_ratio):
"""Returns the Pipeline instance which creates the RNN dataset.
Args:
config: A DrumsRnnConfig object.
eval_ratio: Fraction of input to set aside for evaluation set.
Returns:
A pipeline.Pipeline instance.
"""
partitioner = pipelines_common.RandomPartition(
music_pb2.NoteSequence,
['eval_drum_tracks', 'training_drum_tracks'],
[eval_ratio])
dag = {partitioner: dag_pipeline.DagInput(music_pb2.NoteSequence)}

for mode in ['eval', 'training']:
time_change_splitter = note_sequence_pipelines.TimeChangeSplitter(
name='TimeChangeSplitter_' + mode)
quantizer = note_sequence_pipelines.Quantizer(
steps_per_quarter=config.steps_per_quarter, name='Quantizer_' + mode)
drums_extractor = drum_pipelines.DrumsExtractor(
min_bars=7, max_steps=512, gap_bars=1.0, name='DrumsExtractor_' + mode)
encoder_pipeline = encoder_decoder.EncoderPipeline(
magenta.music.DrumTrack, config.encoder_decoder,
name='EncoderPipeline_' + mode)

dag[time_change_splitter] = partitioner[mode + '_drum_tracks']
dag[quantizer] = time_change_splitter
dag[drums_extractor] = quantizer
dag[encoder_pipeline] = drums_extractor
dag[dag_pipeline.DagOutput(mode + '_drum_tracks')] = encoder_pipeline

return dag_pipeline.DAGPipeline(dag)
7 changes: 5 additions & 2 deletions magenta/models/improv_rnn/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,13 @@ py_library(

py_binary(
name = "improv_rnn_create_dataset",
srcs = ["improv_rnn_create_dataset.py"],
srcs = [
"improv_rnn_create_dataset.py",
"improv_rnn_pipeline.py"
],
srcs_version = "PY2AND3",
visibility = [
# internal model:improv_rnn
# internal model:music_rnn
"//magenta/tools/pip:__subpackages__",
],
deps = [
Expand Down
2 changes: 1 addition & 1 deletion magenta/models/improv_rnn/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,4 @@ improv_rnn_generate \
--hparams="batch_size=64,rnn_layer_sizes=[64,64]" \
--bundle_file=/tmp/improv_rnn.mag \
--save_generator_bundle
```
```
121 changes: 18 additions & 103 deletions magenta/models/improv_rnn/improv_rnn_create_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,120 +21,35 @@

# internal imports
import tensorflow as tf
import magenta

from magenta.models.improv_rnn import improv_rnn_config_flags

from magenta.pipelines import dag_pipeline
from magenta.pipelines import lead_sheet_pipelines
from magenta.pipelines import note_sequence_pipelines
from magenta.models.improv_rnn import improv_rnn_pipeline
from magenta.pipelines import pipeline
from magenta.pipelines import pipelines_common
from magenta.pipelines import statistics

from magenta.protobuf import music_pb2

flags = tf.app.flags
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('input', None,
'TFRecord to read NoteSequence protos from.')
tf.app.flags.DEFINE_string('output_dir', None,
'Directory to write training and eval TFRecord '
'files. The TFRecord files are populated with '
'SequenceExample protos.')
tf.app.flags.DEFINE_float('eval_ratio', 0.1,
'Fraction of input to set aside for eval set. '
'Partition is randomly selected.')
tf.app.flags.DEFINE_string('log', 'INFO',
'The threshold for what messages will be logged '
'DEBUG, INFO, WARN, ERROR, or FATAL.')


class EncoderPipeline(pipeline.Pipeline):
"""A Module that converts lead sheets to a model specific encoding."""

def __init__(self, config, name):
"""Constructs an EncoderPipeline.
Args:
config: An ImprovRnnConfig that specifies the encoder/decoder,
pitch range, and transposition behavior.
name: A unique pipeline name.
"""
super(EncoderPipeline, self).__init__(
input_type=magenta.music.LeadSheet,
output_type=tf.train.SequenceExample,
name=name)
self._conditional_encoder_decoder = config.encoder_decoder
self._min_note = config.min_note
self._max_note = config.max_note
self._transpose_to_key = config.transpose_to_key

def transform(self, lead_sheet):
lead_sheet.squash(
self._min_note,
self._max_note,
self._transpose_to_key)
try:
encoded = [self._conditional_encoder_decoder.encode(
lead_sheet.chords, lead_sheet.melody)]
stats = []
except magenta.music.ChordEncodingException as e:
tf.logging.warning('Skipped lead sheet: %s', e)
encoded = []
stats = [statistics.Counter('chord_encoding_exception', 1)]
except magenta.music.ChordSymbolException as e:
tf.logging.warning('Skipped lead sheet: %s', e)
encoded = []
stats = [statistics.Counter('chord_symbol_exception', 1)]
self._set_stats(stats)
return encoded

def get_stats(self):
return {}


def get_pipeline(config, eval_ratio):
"""Returns the Pipeline instance which creates the RNN dataset.
Args:
config: An ImprovRnnConfig object.
eval_ratio: Fraction of input to set aside for evaluation set.
Returns:
A pipeline.Pipeline instance.
"""
all_transpositions = config.transpose_to_key is None
partitioner = pipelines_common.RandomPartition(
music_pb2.NoteSequence,
['eval_lead_sheets', 'training_lead_sheets'],
[eval_ratio])
dag = {partitioner: dag_pipeline.DagInput(music_pb2.NoteSequence)}

for mode in ['eval', 'training']:
time_change_splitter = note_sequence_pipelines.TimeChangeSplitter(
name='TimeChangeSplitter_' + mode)
quantizer = note_sequence_pipelines.Quantizer(
steps_per_quarter=config.steps_per_quarter, name='Quantizer_' + mode)
lead_sheet_extractor = lead_sheet_pipelines.LeadSheetExtractor(
min_bars=7, max_steps=512, min_unique_pitches=3, gap_bars=1.0,
ignore_polyphonic_notes=False, all_transpositions=all_transpositions,
name='LeadSheetExtractor_' + mode)
encoder_pipeline = EncoderPipeline(config, name='EncoderPipeline_' + mode)

dag[time_change_splitter] = partitioner[mode + '_lead_sheets']
dag[quantizer] = time_change_splitter
dag[lead_sheet_extractor] = quantizer
dag[encoder_pipeline] = lead_sheet_extractor
dag[dag_pipeline.DagOutput(mode + '_lead_sheets')] = encoder_pipeline

return dag_pipeline.DAGPipeline(dag)
flags.DEFINE_string(
'input', None,
'TFRecord to read NoteSequence protos from.')
flags.DEFINE_string(
'output_dir', None,
'Directory to write training and eval TFRecord files. The TFRecord files '
'are populated with SequenceExample protos.')
flags.DEFINE_float(
'eval_ratio', 0.1,
'Fraction of input to set aside for eval set. Partition is randomly '
'selected.')
flags.DEFINE_string(
'log', 'INFO',
'The threshold for what messages will be logged DEBUG, INFO, WARN, ERROR, '
'or FATAL.')


def main(unused_argv):
tf.logging.set_verbosity(FLAGS.log)

config = improv_rnn_config_flags.config_from_flags()
pipeline_instance = get_pipeline(
pipeline_instance = improv_rnn_pipeline.get_pipeline(
config, FLAGS.eval_ratio)

FLAGS.input = os.path.expanduser(FLAGS.input)
Expand Down
6 changes: 3 additions & 3 deletions magenta/models/improv_rnn/improv_rnn_create_dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
import tensorflow as tf
import magenta

from magenta.models.improv_rnn import improv_rnn_create_dataset
from magenta.models.improv_rnn import improv_rnn_model
from magenta.models.improv_rnn import improv_rnn_pipeline
from magenta.pipelines import lead_sheet_pipelines
from magenta.pipelines import note_sequence_pipelines
from magenta.protobuf import music_pb2
Expand Down Expand Up @@ -79,8 +79,8 @@ def testMelodyRNNPipeline(self):
expected_result = {'training_lead_sheets': [encoded],
'eval_lead_sheets': []}

pipeline_inst = improv_rnn_create_dataset.get_pipeline(self.config,
eval_ratio=0.0)
pipeline_inst = improv_rnn_pipeline.get_pipeline(
self.config, eval_ratio=0.0)
result = pipeline_inst.transform(note_sequence)
self.assertEqual(expected_result, result)

Expand Down
Loading

0 comments on commit cdd6ae4

Please sign in to comment.