Skip to content

Commit

Permalink
Merge branch 'master' into bugfix
Browse files Browse the repository at this point in the history
  • Loading branch information
oadams committed Jul 21, 2018
2 parents dc184cf + 4248eb9 commit 9f12ec6
Show file tree
Hide file tree
Showing 10 changed files with 474 additions and 419 deletions.
26 changes: 26 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Changelog
All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Added
- Changelog

## [0.3.1] - 2018-07-14

### Fixed
- Documentation for tutorial running
- Pathlib handling for parameters

## [0.3.0] - 2018-07-14

### Added
- More mypy type annotations
- More test coverage

### Removed
- Removed `ReadyCorpus` in PR #163 (https://github.com/persephone-tools/persephone/pull/163)

3 changes: 2 additions & 1 deletion persephone/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,8 @@ def from_elan(cls: Type[CorpusT], org_dir: Path, tgt_dir: Path,
raise ValueError("A label segmenter must be provided via label_segmenter")

# In case path is supplied as a string, make it a Path
self.tgt_dir = Path(tgt_dir)
if isinstance(tgt_dir, str):
tgt_dir = Path(tgt_dir)

# Read utterances from org_dir.
utterances = elan.utterances_from_dir(org_dir,
Expand Down
10 changes: 6 additions & 4 deletions persephone/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,23 @@
import git
from git import Repo

from typing import Optional

import persephone
from . import config
from . import rnn_ctc
from .corpus_reader import CorpusReader
from .utils import is_git_directory_clean

EXP_DIR = config.EXP_DIR
EXP_DIR = config.EXP_DIR # type: str

def get_exp_dir_num(parent_dir):
def get_exp_dir_num(parent_dir: str) -> int:
""" Gets the number of the current experiment directory."""
return max([int(fn.split(".")[0])
for fn in os.listdir(parent_dir) if fn.split(".")[0].isdigit()]
+ [-1])

def _prepare_directory(directory_path):
def _prepare_directory(directory_path: str) -> str:
"""
Prepare the directory structure required for the experiment
:returns: returns the name of the newly created directory
Expand All @@ -31,7 +33,7 @@ def _prepare_directory(directory_path):
os.makedirs(exp_dir)
return exp_dir

def prep_sub_exp_dir(parent_dir):
def prep_sub_exp_dir(parent_dir: str) -> str:
""" Prepares an experiment subdirectory
:parent_dir: the parent directory
:returns: returns the name of the newly created subdirectory
Expand Down
20 changes: 12 additions & 8 deletions persephone/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
import os
from pathlib import Path
import sys
from typing import Union, Sequence, Set, List
from typing import Optional, Union, Sequence, Set, List

import tensorflow as tf

from .preprocess import labels
from . import utils
from . import config
from .exceptions import PersephoneException
from .corpus_reader import CorpusReader

OPENFST_PATH = config.OPENFST_BIN_PATH

Expand Down Expand Up @@ -103,8 +104,11 @@ class Model:
saved_model_path: Path to where the Tensorflow model is being saved on disk.
"""

def __init__(self, exp_dir, corpus_reader) -> None:
self.exp_dir = exp_dir
def __init__(self, exp_dir: Union[Path, str], corpus_reader: CorpusReader) -> None:
if isinstance(exp_dir, Path):
self.exp_dir = str(exp_dir) # type: str
else:
self.exp_dir = exp_dir # type: str
self.corpus_reader = corpus_reader
self.log_softmax = None
self.batch_x = None
Expand All @@ -114,9 +118,9 @@ def __init__(self, exp_dir, corpus_reader) -> None:
self.ler = None
self.dense_decoded = None
self.dense_ref = None
self.saved_model_path = None
self.saved_model_path = "" # type: str

def transcribe(self, restore_model_path=None) -> None:
def transcribe(self, restore_model_path: Optional[str]=None) -> None:
""" Transcribes an untranscribed dataset. Similar to eval() except
no reference translation is assumed, thus no LER is calculated.
"""
Expand Down Expand Up @@ -158,7 +162,7 @@ def transcribe(self, restore_model_path=None) -> None:
print(" ".join(hyp), file=hyps_f)
print("", file=hyps_f)

def eval(self, restore_model_path=None) -> None:
def eval(self, restore_model_path: Optional[str]=None) -> None:
""" Evaluates the model on a test set."""

saver = tf.train.Saver()
Expand Down Expand Up @@ -197,15 +201,15 @@ def eval(self, restore_model_path=None) -> None:
with open(os.path.join(hyps_dir, "test_per"), "w") as per_f:
print("Test PER: %f, tf LER: %f" % (test_per, test_ler), file=per_f)

def output_best_scores(self, best_epoch_str):
def output_best_scores(self, best_epoch_str: str) -> None:
"""Output best scores to the filesystem"""
BEST_SCORES_FILENAME = "best_scores.txt"
with open(os.path.join(self.exp_dir, BEST_SCORES_FILENAME), "w") as best_f:
print(best_epoch_str, file=best_f, flush=True)

def train(self, early_stopping_steps: int = 10, min_epochs: int = 30,
max_valid_ler: float = 1.0, max_train_ler: float = 0.3,
max_epochs: int = 100, restore_model_path=None) -> None:
max_epochs: int = 100, restore_model_path: Optional[str]=None) -> None:
""" Train the model.
min_epochs: minimum number of epochs to run training for.
Expand Down
6 changes: 3 additions & 3 deletions persephone/rnn_ctc.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def write_desc(self) -> None:
for key, val in self.__dict__.items():
print("%s=%s" % (key, val), file=desc_f)

def __init__(self, exp_dir, corpus_reader, num_layers: int = 3,
def __init__(self, exp_dir: str, corpus_reader, num_layers: int = 3,
hidden_size: int=250, beam_width: int = 100,
decoding_merge_repeated: bool = True) -> None:
super().__init__(exp_dir, corpus_reader)
Expand Down Expand Up @@ -71,15 +71,15 @@ def __init__(self, exp_dir, corpus_reader, num_layers: int = 3,
# For feeding into the next layer
layer_input = self.outputs_concat

self.outputs = tf.reshape(self.outputs_concat, [-1, self.hidden_size*2]) #type: ignore
self.outputs = tf.reshape(self.outputs_concat, [-1, self.hidden_size*2]) # pylint: disable=no-member

# Single-variable names are appropriate for weights an biases.
# pylint: disable=invalid-name
W = tf.Variable(tf.truncated_normal([hidden_size*2, vocab_size],
stddev=np.sqrt(2.0 / (2*hidden_size)))) #type: ignore
b = tf.Variable(tf.zeros([vocab_size])) #type: ignore
self.logits = tf.matmul(self.outputs, W) + b #type: ignore
self.logits = tf.reshape(self.logits, [batch_size, -1, vocab_size]) #type: ignore
self.logits = tf.reshape(self.logits, [batch_size, -1, vocab_size]) # pylint: disable=no-member
# igormq made it time major, because of an optimization in ctc_loss.
self.logits = tf.transpose(self.logits, (1, 0, 2), name="logits") #type: ignore

Expand Down
30 changes: 30 additions & 0 deletions stubs/numpy/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
Copyright (c) 2005-2017, NumPy Developers.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.

* Neither the name of the NumPy Developers nor the names of any
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

0 comments on commit 9f12ec6

Please sign in to comment.