In [1]:
%%bash
pip freeze | grep tensor


tensor2tensor==1.6.6
tensorboard==1.9.0
tensorflow-gpu==1.9.0


In [2]:
%%bash
rm -rf bible
mkdir -p bible/trainer

In [3]:
%pwd


'/home/phil'

In [14]:
%%writefile bible/trainer/problem.py
import os
import tensorflow as tf
from tensor2tensor.utils import registry
from tensor2tensor.models import transformer
from tensor2tensor.data_generators import problem
from tensor2tensor.data_generators import text_encoder
from tensor2tensor.data_generators import text_problems
from tensor2tensor.data_generators import generator_utils


@registry.register_problem
class Bible(text_problems.Text2TextProblem):
    """Predict next line of poetry from the last line. From Gutenberg texts."""

  @property
  def approx_vocab_size(self):
    return 2**13  # ~8k

  @property
  def approx_vocab_size(self):
    return 2**13  # ~8k
  @property
  def is_generate_per_split(self):
    # generate_data will NOT shard the data into TRAIN and EVAL for us.
        return False

  @property
    def dataset_splits(self):
        """Splits of data to produce and number of output shards for each."""
    # 10% evaluation data
        return [{
            "split": problem.DatasetSplit.TRAIN,
            "shards": 90,
        }, {
        "split": problem.DatasetSplit.EVAL,
        "shards": 10,
        }]

    def generate_samples(self, data_dir, tmp_dir, dataset_split):
        with open('/home/phil/bible/trainer/input.txt', 'r') as inp and open('/home/phil/bible/trainer/output.txt', 'r') as out:
        
            for en_line in inp and hi_line in out:       
                yield {
                    "inputs": en_line,
                    "targets": hi_line
                }
             


# Smaller than the typical translate model, and with more regularization
@registry.register_hparams
def transformer_bible():
    hparams = transformer.transformer_base()
    hparams.num_hidden_layers = 2
    hparams.hidden_size = 128
    hparams.filter_size = 512
    hparams.num_heads = 4
    hparams.attention_dropout = 0.6
    hparams.layer_prepostprocess_dropout = 0.6
    hparams.learning_rate = 0.05
    return hparams

# hyperparameter tuning ranges
@registry.register_ranged_hparams
def transformer_bible_range(rhp):
    rhp.set_float("learning_rate", 0.05, 0.25, scale=rhp.LOG_SCALE)
    rhp.set_int("num_hidden_layers", 2, 4)
    rhp.set_discrete("hidden_size", [128, 256, 512])
    rhp.set_float("attention_dropout", 0.4, 0.7)

Overwriting bible/trainer/problem.py


In [5]:
%%writefile bible/trainer/__init__.py
from . import problem

Writing bible/trainer/__init__.py


In [6]:
%%writefile bible/setup.py
from setuptools import find_packages
from setuptools import setup

REQUIRED_PACKAGES = [
  'tensor2tensor'
]

setup(
    name='bible',
    version='0.1',
    author = 'phil',
    author_email = 'phildani7@nith.ac.in',
    install_requires=REQUIRED_PACKAGES,
    packages=find_packages(),
    include_package_data=True,
    description='Bible Translation',
    requires=[]
)

Writing bible/setup.py


In [7]:
!touch bible/__init__.py

In [8]:
!find bible

bible
bible/__init__.py
bible/setup.py
bible/trainer
bible/trainer/__init__.py
bible/trainer/problem.py


In [9]:
! echo $PATH

/home/phil/.conda/envs/t2tj/bin:/home/phil/.conda/envs/t2tj/bin:/home/phil/anaconda3/bin:/home/phil/anaconda3/bin:/home/phil/anaconda3/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin


In [10]:
! echo $PROBLEM




In [11]:
PROBLEM = 'bible'
!echo $PROBLEM

bible


In [12]:
%pwd

'/home/phil'

In [15]:
%%writefile bible/trainer/problem.py
import os
import tensorflow as tf
from tensor2tensor.utils import registry
from tensor2tensor.models import transformer
from tensor2tensor.data_generators import problem
from tensor2tensor.data_generators import text_encoder
from tensor2tensor.data_generators import text_problems
from tensor2tensor.data_generators import generator_utils


@registry.register_problem
class Bible(text_problems.Text2TextProblem):
  """Predict next line of poetry from the last line. From Gutenberg texts."""

  @property
  def approx_vocab_size(self):
    return 2**13  # ~8k

  @property
  def is_generate_per_split(self):
    # generate_data will NOT shard the data into TRAIN and EVAL for us.
    return False

  @property
  def dataset_splits(self):
    """Splits of data to produce and number of output shards for each."""
    # 10% evaluation data
    return [{
        "split": problem.DatasetSplit.TRAIN,
        "shards": 90,
    }, {
        "split": problem.DatasetSplit.EVAL,
        "shards": 10,
    }]

  def generate_samples(self, data_dir, tmp_dir, dataset_split):
    with open('/home/phil/bible/trainer/input.txt', 'r') as inp, open('/home/phil/bible/trainer/output.txt', 'r') as out:
        for en_line, hi_line in zip(inp, out):
            yield {
                    "inputs": en_line,
                    "targets": hi_line
            }       


# Smaller than the typical translate model, and with more regularization
@registry.register_hparams
def transformer_bible():
  hparams = transformer.transformer_base()
  hparams.num_hidden_layers = 2
  hparams.hidden_size = 128
  hparams.filter_size = 512
  hparams.num_heads = 4
  hparams.attention_dropout = 0.6
  hparams.layer_prepostprocess_dropout = 0.6
  hparams.learning_rate = 0.05
  return hparams

# hyperparameter tuning ranges
@registry.register_ranged_hparams
def transformer_bible_range(rhp):
  rhp.set_float("learning_rate", 0.05, 0.25, scale=rhp.LOG_SCALE)
  rhp.set_int("num_hidden_layers", 2, 4)
  rhp.set_discrete("hidden_size", [128, 256, 512])
  rhp.set_float("attention_dropout", 0.4, 0.7)

Overwriting bible/trainer/problem.py


In [17]:
%%bash
DATA_DIR=./t2t_data
TMP_DIR=$DATA_DIR/tmp
rm -rf $DATA_DIR $TMP_DIR
mkdir -p $DATA_DIR $TMP_DIR
# Generate data
t2t-datagen \
  --t2t_usr_dir=/home/phil/bible/trainer \
  --problem=bible \
  --data_dir=$DATA_DIR \
  --tmp_dir=$TMP_DIR

In [18]:
%pwd

'/home/phil'

In [19]:
%%bash
OUTDIR=/home/phil/trained_model
echo $OUTDIR

/home/phil/trained_model


In [20]:
%%bash
DATA_DIR=/home/phil/t2t_data
OUTDIR=/home/phil/trained_model
rm -rf $OUTDIR
t2t-trainer \
  --data_dir=$DATA_DIR \
  --t2t_usr_dir=./bible/trainer \
  --problem=bible \
  --model=transformer \
  --hparams_set=transformer_base \
  --output_dir=$OUTDIR \
  --worker
  --train_steps=10


INFO:tensorflow:Importing user module trainer from path /home/phil/bible
Instructions for updating:
When switching to tf.estimator.Estimator, use tf.estimator.RunConfig instead.
INFO:tensorflow:schedule=continuous_train_and_eval
INFO:tensorflow:worker_gpu=1
INFO:tensorflow:sync=False
INFO:tensorflow:datashard_devices: ['gpu:0']
INFO:tensorflow:caching_devices: None
INFO:tensorflow:ps_devices: ['gpu:0']
INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f91b139f748>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_train_distribute': None, '_device_fn': None, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': None, '_log_step_count_steps': 100, '_session_config': gpu_options {
  per_process_gpu_memory_fractio

In [21]:
%%writefile ./bible/my.txt
Jesus loves you
Thank you heavenly Father

Writing ./bible/my.txt


In [22]:
!echo $MODEL




In [27]:
%%bash
DATA_DIR=/home/phil/t2t_data
OUTDIR=/home/phil/trained_model
DECODE_FILE=/home/phil/bible/my.txt

PROBLEM=bible
MODEL=transformer
HPARAMS=transformer_base



BEAM_SIZE=4
ALPHA=0.6

t2t-decoder \
  --data_dir=$DATADIR \
  --problems=bible \
  --model=transformer \
  --hparams_set=transformer_base \
  --output_dir=/home/phil/trained_model \
  --t2t_usr_dir=/home/phil/bible/trainer \
  --decode_hparams="beam_size=$BEAM_SIZE,alpha=$ALPHA" \
  --decode_from_file=$DECODE_FILE

INFO:tensorflow:Importing user module trainer from path /home/phil/bible
Instructions for updating:
When switching to tf.estimator.Estimator, use tf.estimator.RunConfig instead.
INFO:tensorflow:schedule=continuous_train_and_eval
INFO:tensorflow:worker_gpu=1
INFO:tensorflow:sync=False
INFO:tensorflow:datashard_devices: ['gpu:0']
INFO:tensorflow:caching_devices: None
INFO:tensorflow:ps_devices: ['gpu:0']
INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fdc353ebef0>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_train_distribute': None, '_device_fn': None, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1.0
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': None, '_log_step_count_steps': 100, '_session_config': gpu_options {
  per_process_gpu_memory_fractio