diff --git a/.gitignore b/.gitignore index 05de3c3..618e380 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ notebooks/data* notebooks/.ipynb* .ipynb_checkpoints +.ipynb_checkpoints/ *.pyc .DS_Store notebooks/word2vec_sunghak.ipynb diff --git a/notebooks/.ipynb_checkpoints/char_rnn_sample_hangul-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/char_rnn_sample_hangul-checkpoint.ipynb deleted file mode 100755 index 23d2d2e..0000000 --- a/notebooks/.ipynb_checkpoints/char_rnn_sample_hangul-checkpoint.ipynb +++ /dev/null @@ -1,349 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Sample Hangul RNN" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Packages Imported\n" - ] - } - ], - "source": [ - "# -*- coding: utf-8 -*-\n", - "# Import Packages\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "import collections\n", - "import string\n", - "import argparse\n", - "import time\n", - "import os\n", - "from six.moves import cPickle\n", - "from TextLoader import *\n", - "from Hangulpy import *\n", - "print (\"Packages Imported\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Load dataset using TextLoader" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "loading preprocessed files\n", - "type of 'data_loader' is , length is 76\n", - "\n", - "\n", - "data_loader.vocab looks like \n", - "{u'_': 69, u'6': 59, u':': 57, u'\\n': 19, u'4': 67, u'5': 63, u'>': 75, u'!': 52, u' ': 1, u'\"': 28, u'\\u1d25': 0, u\"'\": 49, u')': 46, u'(': 45, u'-': 65, u',': 27, u'.': 24, u'\\u3131': 7, u'0': 73, u'\\u3133': 60, u'\\u3132': 29, u'\\u3135': 50, u'\\u3134': 4, u'\\u3137': 13, u'\\u3136': 44, u'\\u3139': 5, u'\\u3138': 32, u'\\u313b': 55, u'\\u313a': 48, u'\\u313c': 54, u'?': 41, u'3': 66, u'\\u3141': 12, u'\\u3140': 51, u'\\u3143': 47, u'\\u3142': 17, u'\\u3145': 10, u'\\u3144': 43, u'\\u3147': 2, u'\\u3146': 22, u'\\u3149': 40, u'\\u3148': 15, u'\\u314b': 42, u'\\u314a': 23, u'\\u314d': 31, u'\\u314c': 30, u'\\u314f': 3, u'\\u314e': 14, u'\\u3151': 34, u'\\u3150': 21, u'\\u3153': 11, u'\\u3152': 74, u'\\u3155': 18, u'\\u3154': 20, u'\\u3157': 9, u'\\u3156': 39, u'\\u3159': 53, u'\\u3158': 26, u'\\u315b': 38, u'\\u315a': 33, u'\\u315d': 36, u'\\u315c': 16, u'\\u315f': 35, u'\\u315e': 61, u'\\u3161': 8, u'\\u3160': 37, u'\\u3163': 6, u'\\u3162': 25, u'\\x1a': 72, u'9': 64, u'7': 71, u'2': 62, u'1': 58, u'\\u313f': 56, u'\\u313e': 70, u'8': 68} \n", - "\n", - "\n", - "type of 'data_loader.chars' is , length is 76\n", - "\n", - "\n", - "data_loader.chars looks like \n", - "(u'\\u1d25', u' ', u'\\u3147', u'\\u314f', u'\\u3134', u'\\u3139', u'\\u3163', u'\\u3131', u'\\u3161', u'\\u3157', u'\\u3145', u'\\u3153', u'\\u3141', u'\\u3137', u'\\u314e', u'\\u3148', u'\\u315c', u'\\u3142', u'\\u3155', u'\\n', u'\\u3154', u'\\u3150', u'\\u3146', u'\\u314a', u'.', u'\\u3162', u'\\u3158', u',', u'\"', u'\\u3132', u'\\u314c', u'\\u314d', u'\\u3138', u'\\u315a', u'\\u3151', u'\\u315f', u'\\u315d', u'\\u3160', u'\\u315b', u'\\u3156', u'\\u3149', u'?', u'\\u314b', u'\\u3144', u'\\u3136', u'(', u')', u'\\u3143', u'\\u313a', u\"'\", u'\\u3135', u'\\u3140', u'!', u'\\u3159', u'\\u313c', u'\\u313b', u'\\u313f', u':', u'1', u'6', u'\\u3133', u'\\u315e', u'2', u'5', u'9', u'-', u'3', u'4', u'8', u'_', u'\\u313e', u'7', u'\\x1a', u'0', u'\\u3152', u'>') \n" - ] - } - ], - "source": [ - "data_dir = \"data/nine_dreams\"\n", - "batch_size = 50\n", - "seq_length = 50\n", - "data_loader = TextLoader(data_dir, batch_size, seq_length)\n", - "# This makes \"vocab.pkl\" and \"data.npy\" in \"data/nine_dreams\" \n", - "# from \"data/nine_dreams/input.txt\" \n", - "vocab_size = data_loader.vocab_size\n", - "vocab = data_loader.vocab\n", - "chars = data_loader.chars\n", - "print ( \"type of 'data_loader' is %s, length is %d\" \n", - " % (type(data_loader.vocab), len(data_loader.vocab)) )\n", - "print ( \"\\n\" )\n", - "print (\"data_loader.vocab looks like \\n%s \" %\n", - " (data_loader.vocab))\n", - "print ( \"\\n\" )\n", - "print ( \"type of 'data_loader.chars' is %s, length is %d\" \n", - " % (type(data_loader.chars), len(data_loader.chars)) )\n", - "print ( \"\\n\" )\n", - "print (\"data_loader.chars looks like \\n%s \" % (data_loader.chars,))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Define Network" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Network Ready\n" - ] - } - ], - "source": [ - "rnn_size = 512\n", - "num_layers = 3\n", - "grad_clip = 5.\n", - "\n", - "_batch_size = 1\n", - "_seq_length = 1\n", - "\n", - "vocab_size = data_loader.vocab_size\n", - "\n", - "with tf.device(\"/cpu:0\"):\n", - " # Select RNN Cell\n", - " unitcell = tf.nn.rnn_cell.BasicLSTMCell(rnn_size)\n", - " cell = tf.nn.rnn_cell.MultiRNNCell([unitcell] * num_layers)\n", - " # Set paths to the graph \n", - " input_data = tf.placeholder(tf.int32, [_batch_size, _seq_length])\n", - " targets = tf.placeholder(tf.int32, [_batch_size, _seq_length])\n", - " initial_state = cell.zero_state(_batch_size, tf.float32)\n", - "\n", - " # Set Network\n", - " with tf.variable_scope('rnnlm'):\n", - " softmax_w = tf.get_variable(\"softmax_w\", [rnn_size, vocab_size])\n", - " softmax_b = tf.get_variable(\"softmax_b\", [vocab_size])\n", - " with tf.device(\"/cpu:0\"):\n", - " embedding = tf.get_variable(\"embedding\", [vocab_size, rnn_size])\n", - " inputs = tf.split(1, _seq_length, tf.nn.embedding_lookup(embedding, input_data))\n", - " inputs = [tf.squeeze(input_, [1]) for input_ in inputs]\n", - " \n", - " # Loop function for seq2seq\n", - " def loop(prev, _):\n", - " prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)\n", - " prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))\n", - " return tf.nn.embedding_lookup(embedding, prev_symbol)\n", - " # Output of RNN \n", - " outputs, last_state = tf.nn.seq2seq.rnn_decoder(inputs, initial_state\n", - " , cell, loop_function=None, scope='rnnlm')\n", - " output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])\n", - " logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)\n", - " # Next word probability \n", - " probs = tf.nn.softmax(logits)\n", - " # Define LOSS\n", - " loss = tf.nn.seq2seq.sequence_loss_by_example([logits], # Input\n", - " [tf.reshape(targets, [-1])], # Target\n", - " [tf.ones([_batch_size * _seq_length])], # Weight \n", - " vocab_size)\n", - " # Define Optimizer\n", - " cost = tf.reduce_sum(loss) / _batch_size / _seq_length\n", - " final_state = last_state\n", - " lr = tf.Variable(0.0, trainable=False)\n", - " tvars = tf.trainable_variables()\n", - " grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_clip)\n", - " _optm = tf.train.AdamOptimizer(lr)\n", - " optm = _optm.apply_gradients(zip(grads, tvars))\n", - "\n", - "print (\"Network Ready\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sampling function done.\n" - ] - } - ], - "source": [ - "# Sample ! \n", - "def sample( sess, chars, vocab, __probs, num=200, prime=u'ㅇㅗᴥㄴㅡㄹᴥ '):\n", - " state = sess.run(cell.zero_state(1, tf.float32))\n", - " _probs = __probs\n", - " prime = list(prime)\n", - " for char in prime[:-1]:\n", - " x = np.zeros((1, 1))\n", - " x[0, 0] = vocab[char]\n", - " feed = {input_data: x, initial_state:state}\n", - " [state] = sess.run([final_state], feed)\n", - "\n", - " def weighted_pick(weights):\n", - " weights = weights / np.sum(weights) \n", - " t = np.cumsum(weights)\n", - " s = np.sum(weights)\n", - " return(int(np.searchsorted(t, np.random.rand(1)*s)))\n", - "\n", - " ret = prime\n", - " char = prime[-1]\n", - " for n in range(num):\n", - " x = np.zeros((1, 1))\n", - " x[0, 0] = vocab[char]\n", - " feed = {input_data: x, initial_state:state}\n", - " [_probsval, state] = sess.run([_probs, final_state], feed)\n", - " p = _probsval[0]\n", - " sample = int(np.random.choice(len(p), p=p))\n", - " # sample = weighted_pick(p)\n", - " # sample = np.argmax(p)\n", - " pred = chars[sample]\n", - " ret += pred\n", - " char = pred\n", - " return ret\n", - "print (\"sampling function done.\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Sample" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": false, - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Prime Text : 누구 => ㄴㅜᴥㄱㅜᴥ \n", - "data/nine_dreams/model.ckpt-99000\n", - "SAMPLED TEXT = [u'\\u3134', u'\\u315c', u'\\u1d25', u'\\u3131', u'\\u315c', u'\\u1d25', u' ', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3139', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3145', u'\\u3153', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3146', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3131', u'\\u3153', u'\\u3145', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3157', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u1d25', u'.', u'\"', u'\\n', u' ', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3145', u'\\u3153', u'\\u1d25', u' ', u'\\u3145', u'\\u3153', u'\\u1d25', u'\\u3147', u'\\u315c', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3137', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3146', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3137', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3146', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3137', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3146', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u3146', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u1d25', u'.', u' ', u'\\n', u' ', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3137', u'\\u3150', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u3142', u'\\u1d25', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3147', u'\\u3160', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3137', u'\\u314f', u'\\u1d25', u'\\u3145', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3148', u'\\u3154', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3134', u'\\u3161', u'\\u3134', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3137', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3145', u'\\u3153', u'\\u1d25', u' ', u'\\u3131', u'\\u3161', u'\\u1d25', u' ', u'\\u3147', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3141', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u1d25', u'\\u3147', u'\\u315c', u'\\u3134', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u3146', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u1d25', u'.', u' ', u'\\n', u' ', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3137', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3145', u'\\u3163', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u314f', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u1d25', u' ', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3147', u'\\u3157', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3134', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u3157', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\n', u'\\u3142', u'\\u3157', u'\\u1d25', u'\\u3134', u'\\u3150', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u315c', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3146', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u3146', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u1d25', u'.', u' ', u'\\n', u' ', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3137', u'\\u3150', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u3142', u'\\u1d25', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3147', u'\\u3160', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3148', u'\\u3154', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3148', u'\\u3154', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3147', u'\\u3160', u'\\u1d25', u'\\u3134', u'\\u3161', u'\\u3134', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3141', u'\\u3163', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3137', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3145', u'\\u3163', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u314f', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u1d25', u' ', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3147', u'\\u3157', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u314f', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3147', u'\\u1d25', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3147', u'\\u3155', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\n', u'\\u3142', u'\\u3157', u'\\u1d25', u'\\u3134', u'\\u3150', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u315c', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3146', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u3146', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u1d25', u'.', u' ', u'\\n', u' ', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3137', u'\\u3150', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u3142', u'\\u1d25', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3147', u'\\u3160', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3148', u'\\u3154', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3145', u'\\u3163', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u314f', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u1d25', u' ', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3147', u'\\u3157', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3134', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u3157', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\n', u'\\u3142', u'\\u3157', u'\\u1d25', u'\\u3134', u'\\u3150', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u315c', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3146', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u3146', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u1d25', u'.', u' ', u'\\n', u' ', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3137', u'\\u3150', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u3142', u'\\u1d25', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3147', u'\\u3160', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3145', u'\\u3163', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3146', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u3157', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u314e', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3155', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3134', u'\\u1d25', u' ', u'\\u3131', u'\\u3153', u'\\u3145', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u3144', u'\\u1d25', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3147', u'\\u3157', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3131', u'\\u3161', u'\\u1d25', u' ', u'\\u3147', u'\\u3155', u'\\u1d25', u'\\u3131', u'\\u3158', u'\\u3134', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u3144', u'\\u1d25', u'\\u3134', u'\\u3161', u'\\u3134', u'\\u1d25', u' ', u'\\u3131', u'\\u3153', u'\\u3145', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u1d25', u' ', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3134', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\n', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u315c', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u3146', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u1d25', u'.', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3147', u'\\u3157', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u3157', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3137', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3145', u'\\u3163', u'\\u1d25', u'\\u3131']\n", - "\n", - "-- RESULT --\n", - "누구 소리에 서 있을 것이로다.\"\n", - " 이어서 서울에 들어가니 어찌 이 말을 듣고 있으니 이 말을 듣고 있으니 이 말을 듣고 있었다. \n", - " 이에 대답하기를, \n", - " \"소유가 다시 이르기를, \n", - " \"이제 이에 이르기를, \n", - " \"소저가 어찌 이르기를, \n", - " \"소저는 이 말을 듣고 이르기를, \n", - " \"소저가 어찌 사람을 들어 가서 그 아름다운 일이었다. \n", - " 이 말에 이르기를, \n", - " \"소저가 이르기를, \n", - " \"소저가 어찌 사람을 들으니 이 말을 듣고 이르기를, \n", - " \"소저가 이르기를, \n", - " \"소저가 어찌 사람을 들어 가시가 아니라 하오니 이 일은 이 몸이 어찌 가을을 \n", - "보내어 이름을 이루고 있었다. \n", - " 이에 대답하기를, \n", - " \"소유가 이르기를, \n", - " \"이제 사람이 이르기를, \n", - " \"소저가 이르기를, \n", - " \"이제 이에 이르기를, \n", - " \"소유는 이미 사람을 들으니 이 말을 듣고 이르기를, \n", - " \"소저가 이르기를, \n", - " \"소저가 어찌 사람을 들어 가시가 아니라 하오니 이 아니 어찌 이를 사람을 사랑하여 이 일을 \n", - "보내어 이름을 이루고 있었다. \n", - " 이에 대답하기를, \n", - " \"소유가 이르기를, \n", - " \"이제 사람이 이르기를, \n", - " \"소저가 이르기를, \n", - " \"소저가 어찌 사람을 들어 가시가 아니라 하오니 이 일은 이 몸이 어찌 가을을 \n", - "보내어 이름을 이루고 있었다. \n", - " 이에 대답하기를, \n", - " \"소유가 이름을 들어 가시가 있으니 이 몸이 어찌 가히 여기를 들은 것이 없사오니 어찌 그 여관이 없는 것이라 하나 이 이름을 \n", - "이루었다. \n", - " \"이 말을 들어 오르니 이 몸이 어찌 이르기를, \n", - " \"소저가 이르기를, \n", - " \"소저가 어찌 사람을 들으니 이 말을 듣고 이르기를, \n", - " \"소저가 이르기를, \n", - " \"소저가 어찌 사람을 들어 가시\n" - ] - } - ], - "source": [ - "save_dir = 'data/nine_dreams'\n", - "prime = decompose_text(u\"누구 \")\n", - "\n", - "print (\"Prime Text : %s => %s\" % (automata(prime), \"\".join(prime)))\n", - "n = 2000\n", - "\n", - "sess = tf.Session()\n", - "sess.run(tf.initialize_all_variables())\n", - "saver = tf.train.Saver(tf.all_variables())\n", - "ckpt = tf.train.get_checkpoint_state(save_dir)\n", - "\n", - "# load_name = u'data/nine_dreams/model.ckpt-0'\n", - "load_name = u'data/nine_dreams/model.ckpt-99000'\n", - "\n", - "print (load_name)\n", - "\n", - "if ckpt and ckpt.model_checkpoint_path:\n", - " saver.restore(sess, load_name)\n", - " sampled_text = sample(sess, chars, vocab, probs, n, prime)\n", - " #print (\"\")\n", - " print (u\"SAMPLED TEXT = %s\" % sampled_text)\n", - " print (\"\")\n", - " print (\"-- RESULT --\")\n", - " print (automata(\"\".join(sampled_text)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/notebooks/Hangulpy.py b/notebooks/Hangulpy.py deleted file mode 100755 index e235f04..0000000 --- a/notebooks/Hangulpy.py +++ /dev/null @@ -1,411 +0,0 @@ -# -*- coding: utf-8 -*- -# %load Hangulpy/Hangulpy.py -#!/usr/bin/env python -# encoding: utf-8 -""" -Hangulpy.py - -Copyright (C) 2012 Ryan Rho, Hyunwoo Cho -Text Decompose & Automata Extention by bluedisk@gmail - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" - -import string - -################################################################################ -# Hangul Unicode Variables -################################################################################ - -# Code = 0xAC00 + (Chosung_index * NUM_JOONGSUNGS * NUM_JONGSUNGS) + (Joongsung_index * NUM_JONGSUNGS) + (Jongsung_index) -CHOSUNGS = [u'ㄱ',u'ㄲ',u'ㄴ',u'ㄷ',u'ㄸ',u'ㄹ',u'ㅁ',u'ㅂ',u'ㅃ',u'ㅅ',u'ㅆ',u'ㅇ',u'ㅈ',u'ㅉ',u'ㅊ',u'ㅋ',u'ㅌ',u'ㅍ',u'ㅎ'] -JOONGSUNGS = [u'ㅏ',u'ㅐ',u'ㅑ',u'ㅒ',u'ㅓ',u'ㅔ',u'ㅕ',u'ㅖ',u'ㅗ',u'ㅘ',u'ㅙ',u'ㅚ',u'ㅛ',u'ㅜ',u'ㅝ',u'ㅞ',u'ㅟ',u'ㅠ',u'ㅡ',u'ㅢ',u'ㅣ'] -JONGSUNGS = [u'',u'ㄱ',u'ㄲ',u'ㄳ',u'ㄴ',u'ㄵ',u'ㄶ',u'ㄷ',u'ㄹ',u'ㄺ',u'ㄻ',u'ㄼ',u'ㄽ',u'ㄾ',u'ㄿ',u'ㅀ',u'ㅁ',u'ㅂ',u'ㅄ',u'ㅅ',u'ㅆ',u'ㅇ',u'ㅈ',u'ㅊ',u'ㅋ',u'ㅌ',u'ㅍ',u'ㅎ'] - -# 코딩 효율과 가독성을 위해서 index대신 unicode사용 by bluedisk -JONG_COMP = { - u'ㄱ':{ - u'ㄱ': u'ㄲ', - u'ㅅ': u'ㄳ', - }, - u'ㄴ':{ - u'ㅈ': u'ㄵ', - u'ㅎ': u'ㄶ', - }, - u'ㄹ':{ - u'ㄱ': u'ㄺ', - u'ㅁ': u'ㄻ', - u'ㅂ': u'ㄼ', - u'ㅅ': u'ㄽ', - u'ㅌ': u'ㄾ', - u'ㅍ': u'ㄿ', - u'ㅎ': u'ㅀ', - } -} - -NUM_CHOSUNGS = 19 -NUM_JOONGSUNGS = 21 -NUM_JONGSUNGS = 28 - -FIRST_HANGUL_UNICODE = 0xAC00 #'가' -LAST_HANGUL_UNICODE = 0xD7A3 #'힣' - -# 한자와 라틴 문자 범위 by bluedisk -FIRST_HANJA_UNICODE = 0x4E00 -LAST_HANJA_UNICODE = 0x9FFF - -FIRST_HANJA_EXT_A_UNICODE = 0x3400 -LAST_HANJA_EXT_A_UNICODE = 0x4DBF - -FIRST_LATIN1_UNICODE = 0x0000 # NUL -LAST_LATIN1_UNICODE = 0x00FF # 'ÿ' - -# EXT B~E 는 무시 - -################################################################################ -# Hangul Automata functions by bluedisk@gmail.com -################################################################################ -COMPOSE_CODE = u'ᴥ' - -def decompose_text(text, latin_filter=True): - result=u"" - - for c in list(text): - if is_hangul(c): - - result = result + "".join(decompose(c)) + COMPOSE_CODE - - else: - if latin_filter: # 한글 외엔 Latin1 범위까지만 포함 (한글+영어) - if is_latin1(c): - result = result + c - else: - result = result + c - - return result - -def automata(text): - res_text = u"" - status="CHO" - - for c in text: - - if status == "CHO": - - if c in CHOSUNGS: - chosung = c - status="JOONG" - else: - if c != COMPOSE_CODE: - - res_text = res_text + c - - elif status == "JOONG": - - if c != COMPOSE_CODE and c in JOONGSUNGS: - joongsung = c - status="JONG1" - else: - res_text = res_text + chosung - - if c in CHOSUNGS: - chosung = c - status="JOONG" - else: - if c != COMPOSE_CODE: - - res_text = res_text + c - status="CHO" - - elif status == "JONG1": - - if c != COMPOSE_CODE and c in JONGSUNGS: - jongsung = c - - if c in JONG_COMP: - status="JONG2" - else: - res_text = res_text + compose(chosung, joongsung, jongsung) - status="CHO" - - else: - res_text = res_text + compose(chosung, joongsung) - - if c in CHOSUNGS: - chosung = c - status="JOONG" - else: - if c != COMPOSE_CODE: - - res_text = res_text + c - - status="CHO" - - elif status == "JONG2": - - if c != COMPOSE_CODE and c in JONG_COMP[jongsung]: - jongsung = JONG_COMP[jongsung][c] - c = COMPOSE_CODE # 종성 재 출력 방지 - - res_text = res_text + compose(chosung, joongsung, jongsung) - - if c != COMPOSE_CODE: - - res_text = res_text + c - - status="CHO" - - - return res_text - -################################################################################ -# Boolean Hangul functions -################################################################################ - -def is_hangul(phrase): - """Check whether the phrase is Hangul. - This method ignores white spaces, punctuations, and numbers. - @param phrase a target string - @return True if the phrase is Hangul. False otherwise.""" - - # If the input is only one character, test whether the character is Hangul. - if len(phrase) == 1: return is_all_hangul(phrase) - - # Remove all white spaces, punctuations, numbers. - exclude = set(string.whitespace + string.punctuation + '0123456789') - phrase = ''.join(ch for ch in phrase if ch not in exclude) - - return is_all_hangul(phrase) - -def is_all_hangul(phrase): - """Check whether the phrase contains all Hangul letters - @param phrase a target string - @return True if the phrase only consists of Hangul. False otherwise.""" - - for unicode_value in map(lambda letter:ord(letter), phrase): - if unicode_value < FIRST_HANGUL_UNICODE or unicode_value > LAST_HANGUL_UNICODE: - # Check whether the letter is chosungs, joongsungs, or jongsungs. - if unicode_value not in map(lambda v: ord(v), CHOSUNGS + JOONGSUNGS + JONGSUNGS[1:]): - return False - return True - -def is_hanja(phrase): - for unicode_value in map(lambda letter:ord(letter), phrase): - if ((unicode_value < FIRST_HANJA_UNICODE or unicode_value > LAST_HANJA_UNICODE) and - (unicode_value < FIRST_HANJA_EXT_A_UNICODE or unicode_value > LAST_HANJA_EXT_A_UNICODE)): - return False - return True - -def is_latin1(phrase): - - for unicode_value in map(lambda letter:ord(letter), phrase): - if unicode_value < FIRST_LATIN1_UNICODE or unicode_value > LAST_LATIN1_UNICODE: - return False - return True - - -def has_jongsung(letter): - """Check whether this letter contains Jongsung""" - if len(letter) != 1: - raise Exception('The target string must be one letter.') - if not is_hangul(letter): - raise NotHangulException('The target string must be Hangul') - - unicode_value = ord(letter) - return (unicode_value - FIRST_HANGUL_UNICODE) % NUM_JONGSUNGS > 0 - -def has_batchim(letter): - """This method is the same as has_jongsung()""" - return has_jongsung(letter) - -def has_approximant(letter): - """Approximant makes complex vowels, such as ones starting with y or w. - In Korean there is a unique approximant euㅡ making uiㅢ, but ㅢ does not make many irregularities.""" - if len(letter) != 1: - raise Exception('The target string must be one letter.') - if not is_hangul(letter): - raise NotHangulException('The target string must be Hangul') - - jaso = decompose(letter) - diphthong = (2, 3, 6, 7, 9, 10, 12, 14, 15, 17) - # [u'ㅑ',u'ㅒ',',u'ㅕ',u'ㅖ',u'ㅘ',u'ㅙ',u'ㅛ',u'ㅝ',u'ㅞ',u'ㅠ'] - # excluded 'ㅢ' because y- and w-based complex vowels are irregular. - # vowels with umlauts (ㅐ, ㅔ, ㅚ, ㅟ) are not considered complex vowels. - return jaso[1] in diphthong - -################################################################################ -# Decomposition & Combination -################################################################################ - -def compose(chosung, joongsung, jongsung=u''): - """This function returns a Hangul letter by composing the specified chosung, joongsung, and jongsung. - @param chosung - @param joongsung - @param jongsung the terminal Hangul letter. This is optional if you do not need a jongsung.""" - - if jongsung is None: jongsung = u'' - - try: - chosung_index = CHOSUNGS.index(chosung) - joongsung_index = JOONGSUNGS.index(joongsung) - jongsung_index = JONGSUNGS.index(jongsung) - except Exception, e: - raise NotHangulException('No valid Hangul character can be generated using given combination of chosung, joongsung, and jongsung.') - - return unichr(0xAC00 + chosung_index * NUM_JOONGSUNGS * NUM_JONGSUNGS + joongsung_index * NUM_JONGSUNGS + jongsung_index) - -def decompose(hangul_letter): - """This function returns letters by decomposing the specified Hangul letter.""" - - if len(hangul_letter) < 1: - raise NotLetterException('') - elif not is_hangul(hangul_letter): - raise NotHangulException('') - - code = ord(hangul_letter) - FIRST_HANGUL_UNICODE - jongsung_index = code % NUM_JONGSUNGS - code /= NUM_JONGSUNGS - joongsung_index = code % NUM_JOONGSUNGS - code /= NUM_JOONGSUNGS - chosung_index = code - - if chosung_index < 0: - chosung_index = 0 - - try: - return (CHOSUNGS[chosung_index], JOONGSUNGS[joongsung_index], JONGSUNGS[jongsung_index]) - except: - print "%d / %d / %d"%(chosung_index, joongsung_index, jongsung_index) - print "%s / %s " %( (JOONGSUNGS[joongsung_index].encode("utf8"), JONGSUNGS[jongsung_index].encode('utf8'))) - raise Exception() - -################################################################################ -# Josa functions -################################################################################ - -def josa_en(word): - """add josa either '은' or '는' at the end of this word""" - word = word.strip() - if not is_hangul(word): raise NotHangulException('') - - last_letter = word[-1] - josa = u'은' if has_jongsung(last_letter) else u'는' - return word + josa - -def josa_eg(word): - """add josa either '이' or '가' at the end of this word""" - word = word.strip() - if not is_hangul(word): raise NotHangulException('') - - last_letter = word[-1] - josa = u'이' if has_jongsung(last_letter) else u'가' - return word + josa - -def josa_el(word): - """add josa either '을' or '를' at the end of this word""" - word = word.strip() - if not is_hangul(word): raise NotHangulException('') - - last_letter = word[-1] - josa = u'을' if has_jongsung(last_letter) else u'를' - return word + josa - -def josa_ro(word): - """add josa either '으로' or '로' at the end of this word""" - word = word.strip() - if not is_hangul(word): raise NotHangulException('') - - last_letter = word[-1] - if not has_jongsung(last_letter): - josa = u'로' - elif (ord(last_letter) - FIRST_HANGUL_UNICODE) % NUM_JONGSUNGS == 9: # ㄹ - josa = u'로' - else: - josa = u'으로' - - return word + josa - -def josa_gwa(word): - """add josa either '과' or '와' at the end of this word""" - word = word.strip() - if not is_hangul(word): raise NotHangulException('') - - last_letter = word[-1] - josa = u'과' if has_jongsung(last_letter) else u'와' - return word + josa - -def josa_ida(word): - """add josa either '이다' or '다' at the end of this word""" - word = word.strip() - if not is_hangul(word): raise NotHangulException('') - - last_letter = word[-1] - josa = u'이다' if has_jongsung(last_letter) else u'다' - return word + josa - -################################################################################ -# Prefixes and suffixes -# Practice area; need more organization -################################################################################ - -def add_ryul(word): - """add suffix either '률' or '율' at the end of this word""" - word = word.strip() - if not is_hangul(word): raise NotHangulException('') - - last_letter = word[-1] - if not has_jongsung(last_letter): - ryul = u'율' - elif (ord(last_letter) - FIRST_HANGUL_UNICODE) % NUM_JONGSUNGS == 4: # ㄴ - ryul = u'율' - else: - ryul = u'률' - - return word + ryul - - -################################################################################ -# The formatter, or ultimately, a template system -# Practice area; need more organization -################################################################################ - -def ili(word): - """convert {가} or {이} to their correct respective particles automagically.""" - word = word.strip() - if not is_hangul(word): raise NotHangulException('') - - last_letter = word[word.find(u'{가}')-1] - word = word.replace(u'{가}', (u'이' if has_jongsung(last_letter) else u'가')) - - last_letter = word[word.find(u'{이}')-1] - word = word.replace(u'{이}', (u'이' if has_jongsung(last_letter) else u'가')) - return word - -################################################################################ -# Exceptions -################################################################################ - -class NotHangulException(Exception): - pass - -class NotLetterException(Exception): - pass - -class NotWordException(Exception): - pass diff --git a/notebooks/Hangulpy.pyc b/notebooks/Hangulpy.pyc deleted file mode 100755 index 7e3ef48..0000000 Binary files a/notebooks/Hangulpy.pyc and /dev/null differ diff --git a/notebooks/Hangulpy3.py b/notebooks/Hangulpy3.py deleted file mode 100644 index ffa9cc9..0000000 --- a/notebooks/Hangulpy3.py +++ /dev/null @@ -1,411 +0,0 @@ -# -*- coding: utf-8 -*- -# %load Hangulpy/Hangulpy.py -#!/usr/bin/env python -# encoding: utf-8 -""" -Hangulpy.py -Copyright (C) 2012 Ryan Rho, Hyunwoo Cho -Text Decompose & Automata Extention by bluedisk@gmail -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" - -from __future__ import unicode_literals -from __future__ import division - -from six import unichr -import string - -################################################################################ -# Hangul Unicode Variables -################################################################################ - -# Code = 0xAC00 + (Chosung_index * NUM_JOONGSUNGS * NUM_JONGSUNGS) + (Joongsung_index * NUM_JONGSUNGS) + (Jongsung_index) -CHOSUNGS = [u'ㄱ',u'ㄲ',u'ㄴ',u'ㄷ',u'ㄸ',u'ㄹ',u'ㅁ',u'ㅂ',u'ㅃ',u'ㅅ',u'ㅆ',u'ㅇ',u'ㅈ',u'ㅉ',u'ㅊ',u'ㅋ',u'ㅌ',u'ㅍ',u'ㅎ'] -JOONGSUNGS = [u'ㅏ',u'ㅐ',u'ㅑ',u'ㅒ',u'ㅓ',u'ㅔ',u'ㅕ',u'ㅖ',u'ㅗ',u'ㅘ',u'ㅙ',u'ㅚ',u'ㅛ',u'ㅜ',u'ㅝ',u'ㅞ',u'ㅟ',u'ㅠ',u'ㅡ',u'ㅢ',u'ㅣ'] -JONGSUNGS = [u'',u'ㄱ',u'ㄲ',u'ㄳ',u'ㄴ',u'ㄵ',u'ㄶ',u'ㄷ',u'ㄹ',u'ㄺ',u'ㄻ',u'ㄼ',u'ㄽ',u'ㄾ',u'ㄿ',u'ㅀ',u'ㅁ',u'ㅂ',u'ㅄ',u'ㅅ',u'ㅆ',u'ㅇ',u'ㅈ',u'ㅊ',u'ㅋ',u'ㅌ',u'ㅍ',u'ㅎ'] - -# 코딩 효율과 가독성을 위해서 index대신 unicode사용 by bluedisk -JONG_COMP = { - u'ㄱ':{ - u'ㄱ': u'ㄲ', - u'ㅅ': u'ㄳ', - }, - u'ㄴ':{ - u'ㅈ': u'ㄵ', - u'ㅎ': u'ㄶ', - }, - u'ㄹ':{ - u'ㄱ': u'ㄺ', - u'ㅁ': u'ㄻ', - u'ㅂ': u'ㄼ', - u'ㅅ': u'ㄽ', - u'ㅌ': u'ㄾ', - u'ㅍ': u'ㄿ', - u'ㅎ': u'ㅀ', - } -} - -NUM_CHOSUNGS = 19 -NUM_JOONGSUNGS = 21 -NUM_JONGSUNGS = 28 - -FIRST_HANGUL_UNICODE = 0xAC00 #'가' -LAST_HANGUL_UNICODE = 0xD7A3 #'힣' - -# 한자와 라틴 문자 범위 by bluedisk -FIRST_HANJA_UNICODE = 0x4E00 -LAST_HANJA_UNICODE = 0x9FFF - -FIRST_HANJA_EXT_A_UNICODE = 0x3400 -LAST_HANJA_EXT_A_UNICODE = 0x4DBF - -FIRST_LATIN1_UNICODE = 0x0000 # NUL -LAST_LATIN1_UNICODE = 0x00FF # 'ÿ' - -# EXT B~E 는 무시 - -################################################################################ -# Hangul Automata functions by bluedisk@gmail.com -################################################################################ -COMPOSE_CODE = u'ᴥ' - -def decompose_text(text, latin_filter=True): - result=u"" - - for c in list(text): - if is_hangul(c): - - result = result + "".join(decompose(c)) + COMPOSE_CODE - - else: - if latin_filter: # 한글 외엔 Latin1 범위까지만 포함 (한글+영어) - if is_latin1(c): - result = result + c - else: - result = result + c - - return result - -def automata(text): - res_text = u"" - status="CHO" - - for c in text: - - if status == "CHO": - - if c in CHOSUNGS: - chosung = c - status="JOONG" - else: - if c != COMPOSE_CODE: - - res_text = res_text + c - - elif status == "JOONG": - - if c != COMPOSE_CODE and c in JOONGSUNGS: - joongsung = c - status="JONG1" - else: - res_text = res_text + chosung - - if c in CHOSUNGS: - chosung = c - status="JOONG" - else: - if c != COMPOSE_CODE: - - res_text = res_text + c - status="CHO" - - elif status == "JONG1": - - if c != COMPOSE_CODE and c in JONGSUNGS: - jongsung = c - - if c in JONG_COMP: - status="JONG2" - else: - res_text = res_text + compose(chosung, joongsung, jongsung) - status="CHO" - - else: - res_text = res_text + compose(chosung, joongsung) - - if c in CHOSUNGS: - chosung = c - status="JOONG" - else: - if c != COMPOSE_CODE: - - res_text = res_text + c - - status="CHO" - - elif status == "JONG2": - - if c != COMPOSE_CODE and c in JONG_COMP[jongsung]: - jongsung = JONG_COMP[jongsung][c] - c = COMPOSE_CODE # 종성 재 출력 방지 - - res_text = res_text + compose(chosung, joongsung, jongsung) - - if c != COMPOSE_CODE: - - res_text = res_text + c - - status="CHO" - - - return res_text - -################################################################################ -# Boolean Hangul functions -################################################################################ - -def is_hangul(phrase): - """Check whether the phrase is Hangul. - This method ignores white spaces, punctuations, and numbers. - @param phrase a target string - @return True if the phrase is Hangul. False otherwise.""" - - # If the input is only one character, test whether the character is Hangul. - if len(phrase) == 1: return is_all_hangul(phrase) - - # Remove all white spaces, punctuations, numbers. - exclude = set(string.whitespace + string.punctuation + '0123456789') - phrase = ''.join(ch for ch in phrase if ch not in exclude) - - return is_all_hangul(phrase) - -def is_all_hangul(phrase): - """Check whether the phrase contains all Hangul letters - @param phrase a target string - @return True if the phrase only consists of Hangul. False otherwise.""" - - for unicode_value in map(lambda letter:ord(letter), phrase): - if unicode_value < FIRST_HANGUL_UNICODE or unicode_value > LAST_HANGUL_UNICODE: - # Check whether the letter is chosungs, joongsungs, or jongsungs. - if unicode_value not in map(lambda v: ord(v), CHOSUNGS + JOONGSUNGS + JONGSUNGS[1:]): - return False - return True - -def is_hanja(phrase): - for unicode_value in map(lambda letter:ord(letter), phrase): - if ((unicode_value < FIRST_HANJA_UNICODE or unicode_value > LAST_HANJA_UNICODE) and - (unicode_value < FIRST_HANJA_EXT_A_UNICODE or unicode_value > LAST_HANJA_EXT_A_UNICODE)): - return False - return True - -def is_latin1(phrase): - - for unicode_value in map(lambda letter:ord(letter), phrase): - if unicode_value < FIRST_LATIN1_UNICODE or unicode_value > LAST_LATIN1_UNICODE: - return False - return True - - -def has_jongsung(letter): - """Check whether this letter contains Jongsung""" - if len(letter) != 1: - raise Exception('The target string must be one letter.') - if not is_hangul(letter): - raise NotHangulException('The target string must be Hangul') - - unicode_value = ord(letter) - return (unicode_value - FIRST_HANGUL_UNICODE) % NUM_JONGSUNGS > 0 - -def has_batchim(letter): - """This method is the same as has_jongsung()""" - return has_jongsung(letter) - -def has_approximant(letter): - """Approximant makes complex vowels, such as ones starting with y or w. - In Korean there is a unique approximant euㅡ making uiㅢ, but ㅢ does not make many irregularities.""" - if len(letter) != 1: - raise Exception('The target string must be one letter.') - if not is_hangul(letter): - raise NotHangulException('The target string must be Hangul') - - jaso = decompose(letter) - diphthong = (2, 3, 6, 7, 9, 10, 12, 14, 15, 17) - # [u'ㅑ',u'ㅒ',',u'ㅕ',u'ㅖ',u'ㅘ',u'ㅙ',u'ㅛ',u'ㅝ',u'ㅞ',u'ㅠ'] - # excluded 'ㅢ' because y- and w-based complex vowels are irregular. - # vowels with umlauts (ㅐ, ㅔ, ㅚ, ㅟ) are not considered complex vowels. - return jaso[1] in diphthong - -################################################################################ -# Decomposition & Combination -################################################################################ - -def compose(chosung, joongsung, jongsung=u''): - """This function returns a Hangul letter by composing the specified chosung, joongsung, and jongsung. - @param chosung - @param joongsung - @param jongsung the terminal Hangul letter. This is optional if you do not need a jongsung.""" - - if jongsung is None: jongsung = u'' - - try: - chosung_index = CHOSUNGS.index(chosung) - joongsung_index = JOONGSUNGS.index(joongsung) - jongsung_index = JONGSUNGS.index(jongsung) - except Exception: - raise NotHangulException('No valid Hangul character can be generated using given combination of chosung, joongsung, and jongsung.') - - return unichr(0xAC00 + chosung_index * NUM_JOONGSUNGS * NUM_JONGSUNGS + joongsung_index * NUM_JONGSUNGS + jongsung_index) - -def decompose(hangul_letter): - """This function returns letters by decomposing the specified Hangul letter.""" - - if len(hangul_letter) < 1: - raise NotLetterException('') - elif not is_hangul(hangul_letter): - raise NotHangulException('') - - code = ord(hangul_letter) - FIRST_HANGUL_UNICODE - jongsung_index = int(code % NUM_JONGSUNGS) - code /= NUM_JONGSUNGS - joongsung_index = int(code % NUM_JOONGSUNGS) - code /= NUM_JOONGSUNGS - chosung_index = int(code) - - if chosung_index < 0: - chosung_index = 0 - - try: - return (CHOSUNGS[chosung_index], JOONGSUNGS[joongsung_index], JONGSUNGS[jongsung_index]) - except: - print ("%d / %d / %d"%(chosung_index, joongsung_index, jongsung_index)) - print ("%s / %s " %( (JOONGSUNGS[joongsung_index].encode("utf8"), JONGSUNGS[jongsung_index].encode('utf8')))) - raise Exception() - -################################################################################ -# Josa functions -################################################################################ - -def josa_en(word): - """add josa either '은' or '는' at the end of this word""" - word = word.strip() - if not is_hangul(word): raise NotHangulException('') - - last_letter = word[-1] - josa = u'은' if has_jongsung(last_letter) else u'는' - return word + josa - -def josa_eg(word): - """add josa either '이' or '가' at the end of this word""" - word = word.strip() - if not is_hangul(word): raise NotHangulException('') - - last_letter = word[-1] - josa = u'이' if has_jongsung(last_letter) else u'가' - return word + josa - -def josa_el(word): - """add josa either '을' or '를' at the end of this word""" - word = word.strip() - if not is_hangul(word): raise NotHangulException('') - - last_letter = word[-1] - josa = u'을' if has_jongsung(last_letter) else u'를' - return word + josa - -def josa_ro(word): - """add josa either '으로' or '로' at the end of this word""" - word = word.strip() - if not is_hangul(word): raise NotHangulException('') - - last_letter = word[-1] - if not has_jongsung(last_letter): - josa = u'로' - elif (ord(last_letter) - FIRST_HANGUL_UNICODE) % NUM_JONGSUNGS == 9: # ㄹ - josa = u'로' - else: - josa = u'으로' - - return word + josa - -def josa_gwa(word): - """add josa either '과' or '와' at the end of this word""" - word = word.strip() - if not is_hangul(word): raise NotHangulException('') - - last_letter = word[-1] - josa = u'과' if has_jongsung(last_letter) else u'와' - return word + josa - -def josa_ida(word): - """add josa either '이다' or '다' at the end of this word""" - word = word.strip() - if not is_hangul(word): raise NotHangulException('') - - last_letter = word[-1] - josa = u'이다' if has_jongsung(last_letter) else u'다' - return word + josa - -################################################################################ -# Prefixes and suffixes -# Practice area; need more organization -################################################################################ - -def add_ryul(word): - """add suffix either '률' or '율' at the end of this word""" - word = word.strip() - if not is_hangul(word): raise NotHangulException('') - - last_letter = word[-1] - if not has_jongsung(last_letter): - ryul = u'율' - elif (ord(last_letter) - FIRST_HANGUL_UNICODE) % NUM_JONGSUNGS == 4: # ㄴ - ryul = u'율' - else: - ryul = u'률' - - return word + ryul - - -################################################################################ -# The formatter, or ultimately, a template system -# Practice area; need more organization -################################################################################ - -def ili(word): - """convert {가} or {이} to their correct respective particles automagically.""" - word = word.strip() - if not is_hangul(word): raise NotHangulException('') - - last_letter = word[word.find(u'{가}')-1] - word = word.replace(u'{가}', (u'이' if has_jongsung(last_letter) else u'가')) - - last_letter = word[word.find(u'{이}')-1] - word = word.replace(u'{이}', (u'이' if has_jongsung(last_letter) else u'가')) - return word - -################################################################################ -# Exceptions -################################################################################ - -class NotHangulException(Exception): - pass - -class NotLetterException(Exception): - pass - -class NotWordException(Exception): - pass \ No newline at end of file diff --git a/notebooks/TextLoader.pyc b/notebooks/TextLoader.pyc deleted file mode 100755 index 121d950..0000000 Binary files a/notebooks/TextLoader.pyc and /dev/null differ diff --git a/notebooks/char_rnn_sample_hangul.ipynb b/notebooks/char_rnn_sample_hangul.ipynb index 23d2d2e..066c6b0 100755 --- a/notebooks/char_rnn_sample_hangul.ipynb +++ b/notebooks/char_rnn_sample_hangul.ipynb @@ -32,9 +32,11 @@ "import argparse\n", "import time\n", "import os\n", + "import hgtk\n", + "\n", "from six.moves import cPickle\n", "from TextLoader import *\n", - "from Hangulpy import *\n", + "\n", "print (\"Packages Imported\")" ] }, @@ -57,18 +59,18 @@ "output_type": "stream", "text": [ "loading preprocessed files\n", - "type of 'data_loader' is , length is 76\n", + "type of 'data_loader' is , length is 76\n", "\n", "\n", "data_loader.vocab looks like \n", - "{u'_': 69, u'6': 59, u':': 57, u'\\n': 19, u'4': 67, u'5': 63, u'>': 75, u'!': 52, u' ': 1, u'\"': 28, u'\\u1d25': 0, u\"'\": 49, u')': 46, u'(': 45, u'-': 65, u',': 27, u'.': 24, u'\\u3131': 7, u'0': 73, u'\\u3133': 60, u'\\u3132': 29, u'\\u3135': 50, u'\\u3134': 4, u'\\u3137': 13, u'\\u3136': 44, u'\\u3139': 5, u'\\u3138': 32, u'\\u313b': 55, u'\\u313a': 48, u'\\u313c': 54, u'?': 41, u'3': 66, u'\\u3141': 12, u'\\u3140': 51, u'\\u3143': 47, u'\\u3142': 17, u'\\u3145': 10, u'\\u3144': 43, u'\\u3147': 2, u'\\u3146': 22, u'\\u3149': 40, u'\\u3148': 15, u'\\u314b': 42, u'\\u314a': 23, u'\\u314d': 31, u'\\u314c': 30, u'\\u314f': 3, u'\\u314e': 14, u'\\u3151': 34, u'\\u3150': 21, u'\\u3153': 11, u'\\u3152': 74, u'\\u3155': 18, u'\\u3154': 20, u'\\u3157': 9, u'\\u3156': 39, u'\\u3159': 53, u'\\u3158': 26, u'\\u315b': 38, u'\\u315a': 33, u'\\u315d': 36, u'\\u315c': 16, u'\\u315f': 35, u'\\u315e': 61, u'\\u3161': 8, u'\\u3160': 37, u'\\u3163': 6, u'\\u3162': 25, u'\\x1a': 72, u'9': 64, u'7': 71, u'2': 62, u'1': 58, u'\\u313f': 56, u'\\u313e': 70, u'8': 68} \n", + "{'ㅀ': 51, '7': 71, 'ㅟ': 35, 'ㅓ': 11, 'ㅣ': 6, '-': 65, 'ㄷ': 13, 'ㅈ': 15, 'ㅚ': 33, '2': 62, '9': 64, 'ㅒ': 74, 'ㅐ': 21, '?': 41, ')': 46, \"'\": 49, 'ㅡ': 8, 'ㅇ': 2, 'ㄴ': 4, 'ㅛ': 38, 'ㅌ': 30, 'ㄶ': 44, '3': 66, '8': 68, '_': 69, 'ㅖ': 39, 'ㄺ': 48, ':': 57, '\"': 28, 'ㅘ': 26, 'ㄼ': 54, 'ㅏ': 3, 'ㅄ': 43, 'ㄻ': 55, '5': 63, 'ㅆ': 22, 'ㅞ': 61, 'ㅠ': 37, 'ㄸ': 32, '1': 58, 'ㄿ': 56, 'ㅂ': 17, 'ㅅ': 10, ' ': 1, 'ㅔ': 20, 'ㄱ': 7, 'ㅑ': 34, 'ᴥ': 0, '4': 67, 'ㄲ': 29, '>': 75, 'ㅜ': 16, '0': 73, 'ㅎ': 14, 'ㅗ': 9, 'ㄵ': 50, 'ㅉ': 40, 'ㅢ': 25, 'ㅙ': 53, '!': 52, 'ㄾ': 70, '.': 24, 'ㅃ': 47, '\\n': 19, ',': 27, 'ㅝ': 36, 'ㅊ': 23, '6': 59, 'ㄹ': 5, 'ㄳ': 60, '(': 45, 'ㅕ': 18, 'ㅋ': 42, 'ㅍ': 31, '\\x1a': 72, 'ㅁ': 12} \n", "\n", "\n", - "type of 'data_loader.chars' is , length is 76\n", + "type of 'data_loader.chars' is , length is 76\n", "\n", "\n", "data_loader.chars looks like \n", - "(u'\\u1d25', u' ', u'\\u3147', u'\\u314f', u'\\u3134', u'\\u3139', u'\\u3163', u'\\u3131', u'\\u3161', u'\\u3157', u'\\u3145', u'\\u3153', u'\\u3141', u'\\u3137', u'\\u314e', u'\\u3148', u'\\u315c', u'\\u3142', u'\\u3155', u'\\n', u'\\u3154', u'\\u3150', u'\\u3146', u'\\u314a', u'.', u'\\u3162', u'\\u3158', u',', u'\"', u'\\u3132', u'\\u314c', u'\\u314d', u'\\u3138', u'\\u315a', u'\\u3151', u'\\u315f', u'\\u315d', u'\\u3160', u'\\u315b', u'\\u3156', u'\\u3149', u'?', u'\\u314b', u'\\u3144', u'\\u3136', u'(', u')', u'\\u3143', u'\\u313a', u\"'\", u'\\u3135', u'\\u3140', u'!', u'\\u3159', u'\\u313c', u'\\u313b', u'\\u313f', u':', u'1', u'6', u'\\u3133', u'\\u315e', u'2', u'5', u'9', u'-', u'3', u'4', u'8', u'_', u'\\u313e', u'7', u'\\x1a', u'0', u'\\u3152', u'>') \n" + "('ᴥ', ' ', 'ㅇ', 'ㅏ', 'ㄴ', 'ㄹ', 'ㅣ', 'ㄱ', 'ㅡ', 'ㅗ', 'ㅅ', 'ㅓ', 'ㅁ', 'ㄷ', 'ㅎ', 'ㅈ', 'ㅜ', 'ㅂ', 'ㅕ', '\\n', 'ㅔ', 'ㅐ', 'ㅆ', 'ㅊ', '.', 'ㅢ', 'ㅘ', ',', '\"', 'ㄲ', 'ㅌ', 'ㅍ', 'ㄸ', 'ㅚ', 'ㅑ', 'ㅟ', 'ㅝ', 'ㅠ', 'ㅛ', 'ㅖ', 'ㅉ', '?', 'ㅋ', 'ㅄ', 'ㄶ', '(', ')', 'ㅃ', 'ㄺ', \"'\", 'ㄵ', 'ㅀ', '!', 'ㅙ', 'ㄼ', 'ㄻ', 'ㄿ', ':', '1', '6', 'ㄳ', 'ㅞ', '2', '5', '9', '-', '3', '4', '8', '_', 'ㄾ', '7', '\\x1a', '0', 'ㅒ', '>') \n" ] } ], @@ -128,8 +130,8 @@ "\n", "with tf.device(\"/cpu:0\"):\n", " # Select RNN Cell\n", - " unitcell = tf.nn.rnn_cell.BasicLSTMCell(rnn_size)\n", - " cell = tf.nn.rnn_cell.MultiRNNCell([unitcell] * num_layers)\n", + " unitcell = tf.contrib.rnn.BasicLSTMCell(rnn_size)\n", + " cell = tf.contrib.rnn.MultiRNNCell([unitcell] * num_layers)\n", " # Set paths to the graph \n", " input_data = tf.placeholder(tf.int32, [_batch_size, _seq_length])\n", " targets = tf.placeholder(tf.int32, [_batch_size, _seq_length])\n", @@ -141,7 +143,7 @@ " softmax_b = tf.get_variable(\"softmax_b\", [vocab_size])\n", " with tf.device(\"/cpu:0\"):\n", " embedding = tf.get_variable(\"embedding\", [vocab_size, rnn_size])\n", - " inputs = tf.split(1, _seq_length, tf.nn.embedding_lookup(embedding, input_data))\n", + " inputs = tf.split(tf.nn.embedding_lookup(embedding, input_data), _seq_length, 1)\n", " inputs = [tf.squeeze(input_, [1]) for input_ in inputs]\n", " \n", " # Loop function for seq2seq\n", @@ -150,14 +152,14 @@ " prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))\n", " return tf.nn.embedding_lookup(embedding, prev_symbol)\n", " # Output of RNN \n", - " outputs, last_state = tf.nn.seq2seq.rnn_decoder(inputs, initial_state\n", + " outputs, last_state = tf.contrib.legacy_seq2seq.rnn_decoder(inputs, initial_state\n", " , cell, loop_function=None, scope='rnnlm')\n", - " output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])\n", + " output = tf.reshape(tf.concat(outputs, 1), [-1, rnn_size])\n", " logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)\n", " # Next word probability \n", " probs = tf.nn.softmax(logits)\n", " # Define LOSS\n", - " loss = tf.nn.seq2seq.sequence_loss_by_example([logits], # Input\n", + " loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example([logits], # Input\n", " [tf.reshape(targets, [-1])], # Target\n", " [tf.ones([_batch_size * _seq_length])], # Weight \n", " vocab_size)\n", @@ -175,7 +177,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": { "collapsed": false }, @@ -233,75 +235,91 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": { "collapsed": false, - "scrolled": true + "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ + "누\n", + "구\n", + " \n", "Prime Text : 누구 => ㄴㅜᴥㄱㅜᴥ \n", "data/nine_dreams/model.ckpt-99000\n", - "SAMPLED TEXT = [u'\\u3134', u'\\u315c', u'\\u1d25', u'\\u3131', u'\\u315c', u'\\u1d25', u' ', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3139', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3145', u'\\u3153', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3146', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3131', u'\\u3153', u'\\u3145', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3157', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u1d25', u'.', u'\"', u'\\n', u' ', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3145', u'\\u3153', u'\\u1d25', u' ', u'\\u3145', u'\\u3153', u'\\u1d25', u'\\u3147', u'\\u315c', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3137', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3146', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3137', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3146', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3137', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3146', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u3146', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u1d25', u'.', u' ', u'\\n', u' ', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3137', u'\\u3150', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u3142', u'\\u1d25', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3147', u'\\u3160', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3137', u'\\u314f', u'\\u1d25', u'\\u3145', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3148', u'\\u3154', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3134', u'\\u3161', u'\\u3134', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3137', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3145', u'\\u3153', u'\\u1d25', u' ', u'\\u3131', u'\\u3161', u'\\u1d25', u' ', u'\\u3147', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3141', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u1d25', u'\\u3147', u'\\u315c', u'\\u3134', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u3146', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u1d25', u'.', u' ', u'\\n', u' ', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3137', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3145', u'\\u3163', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u314f', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u1d25', u' ', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3147', u'\\u3157', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3134', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u3157', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\n', u'\\u3142', u'\\u3157', u'\\u1d25', u'\\u3134', u'\\u3150', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u315c', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3146', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u3146', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u1d25', u'.', u' ', u'\\n', u' ', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3137', u'\\u3150', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u3142', u'\\u1d25', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3147', u'\\u3160', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3148', u'\\u3154', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3148', u'\\u3154', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3147', u'\\u3160', u'\\u1d25', u'\\u3134', u'\\u3161', u'\\u3134', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3141', u'\\u3163', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3137', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3145', u'\\u3163', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u314f', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u1d25', u' ', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3147', u'\\u3157', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u314f', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3147', u'\\u1d25', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3147', u'\\u3155', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\n', u'\\u3142', u'\\u3157', u'\\u1d25', u'\\u3134', u'\\u3150', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u315c', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3146', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u3146', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u1d25', u'.', u' ', u'\\n', u' ', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3137', u'\\u3150', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u3142', u'\\u1d25', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3147', u'\\u3160', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3148', u'\\u3154', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3145', u'\\u3163', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u314f', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u1d25', u' ', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3147', u'\\u3157', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3134', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u3157', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\n', u'\\u3142', u'\\u3157', u'\\u1d25', u'\\u3134', u'\\u3150', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u315c', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3146', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u3146', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u1d25', u'.', u' ', u'\\n', u' ', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3147', u'\\u3154', u'\\u1d25', u' ', u'\\u3137', u'\\u3150', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u3142', u'\\u1d25', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3147', u'\\u3160', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3145', u'\\u3163', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u3146', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u3157', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u314e', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3155', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3134', u'\\u1d25', u' ', u'\\u3131', u'\\u3153', u'\\u3145', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u3144', u'\\u1d25', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3147', u'\\u3157', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3131', u'\\u3161', u'\\u1d25', u' ', u'\\u3147', u'\\u3155', u'\\u1d25', u'\\u3131', u'\\u3158', u'\\u3134', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u3144', u'\\u1d25', u'\\u3134', u'\\u3161', u'\\u3134', u'\\u1d25', u' ', u'\\u3131', u'\\u3153', u'\\u3145', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u1d25', u' ', u'\\u314e', u'\\u314f', u'\\u1d25', u'\\u3134', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\n', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u315c', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u3146', u'\\u1d25', u'\\u3137', u'\\u314f', u'\\u1d25', u'.', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3147', u'\\u3157', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u3157', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u1d25', u'\\u3134', u'\\u3163', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u' ', u'\\u3141', u'\\u314f', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3137', u'\\u1d25', u'\\u3131', u'\\u3157', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u1d25', u'\\u3131', u'\\u3163', u'\\u1d25', u'\\u3139', u'\\u3161', u'\\u3139', u'\\u1d25', u',', u' ', u'\\n', u' ', u' ', u'\"', u'\\u3145', u'\\u3157', u'\\u1d25', u'\\u3148', u'\\u3153', u'\\u1d25', u'\\u3131', u'\\u314f', u'\\u1d25', u' ', u'\\u3147', u'\\u3153', u'\\u1d25', u'\\u3149', u'\\u3163', u'\\u1d25', u' ', u'\\u3145', u'\\u314f', u'\\u1d25', u'\\u3139', u'\\u314f', u'\\u3141', u'\\u1d25', u'\\u3147', u'\\u3161', u'\\u3139', u'\\u1d25', u' ', u'\\u3137', u'\\u3161', u'\\u3139', u'\\u1d25', u'\\u3147', u'\\u3153', u'\\u1d25', u' ', u'\\u3131', u'\\u314f', u'\\u1d25', u'\\u3145', u'\\u3163', u'\\u1d25', u'\\u3131']\n", + "SAMPLED TEXT = ㄴㅜᴥㄱㅜᴥ ㅅㅓㅇᴥㄳㅐㅅᴥㅇㅡㄹᴥ ㅇㅣᴥㅁㅣᴥ ㅁㅗㅅᴥㅎㅏㄹᴥ ㅎㅘㅇᴥㅅㅏㄴᴥㅇㅡᴥㄹㅗᴥ ㅊㅗᴥㅁㅕㄴᴥㅎㅏᴥㄴㅣᴥ, ㅇㅣᴥㅇㅔᴥ ㅅㅜㅁᴥㅇㅡㄹᴥ \n", + "ㄱㅏㅂᴥㄱㅗᴥ, ㄱㅣᴥㄷㅓᴥㄲㅔᴥ ㅇㅏᴥㄹㅚᴥㄱㅣᴥㄹㅡㄹᴥ, \n", + " \"ㅇㅝㄹᴥㅇㅘㅇᴥㅇㅣᴥ ㄷㅐᴥㄷㅏㅂᴥㅎㅏᴥㄷㅚᴥ\n", + " \"ㄴㅏㅇᴥㅈㅏᴥ ㅇㅣᴥ ㅅㅗᴥㅈㅓᴥㄱㅏᴥ ㄷㅐㄱᴥ ㄸㅓᴥㅇㅓᴥ ㅈㅏㅇᴥㅊㅓㅂᴥㅇㅡㄹᴥ ㄴㅐᴥㄹㅣᴥㅅㅕㅆᴥㅇㅡᴥㄴㅣᴥ ㅈㅔᴥㄱㅗㅇᴥㅇㅣᴥㅇㅗᴥ?\"\n", + " \"ㅇㅕㅇᴥㅇㅑㅇᴥㄱㅗㅇᴥㅈㅜᴥㄱㅏᴥ ㅅㅗᴥㅁㅐᴥㄱㅏᴥ ㄷㅏᴥㄱㅗᴥㅎㅏᴥㅇㅕᴥ ㅇㅌᴥㅇㅜᴥㄴㅏᴥ ㅇㅕㅁᴥㄹㅏᴥㄷㅏᴥㅇㅘᴥ ㅇㅗᴥㄴㅡㄹᴥㅂㅓㅂᴥㄱㅗᴥ ㄴㅏㅁᴥㄱㅗᴥ ㅇㅓㅄᴥㄴㅏᴥㅇㅣᴥㄲㅏᴥ?\"\n", + " ㅅㅓㅁᴥㅇㅝㄹᴥㅇㅣᴥ ㄷㅏᴥㅅㅣᴥ ㅎㅏᴥㅇㅕᴥㄱㅡㅁᴥㄲㅔᴥ ㄷㅜᴥㄹㅣᴥㄱㅏᴥ ㅌㅡㄹᴥㄹㅣㅁᴥㅇㅣᴥ ㅅㅣㅁᴥㅂㅏㅇᴥㅎㅏㄴᴥ ㅇㅏᴥㅂㅕㄹᴥㅍㅜㄴᴥㅇㅡㄹᴥ ㅅㅏᴥㄹㅡㄹᴥ ㄸㅏᴥㄹㅡᴥㄷㅗᴥㄹㅏᴥ. \n", + "ㄸㅗᴥㅎㅏㄴᴥ ㅅㅣㄴᴥㄱㅗㅇᴥㅁㅕㅇᴥㅇㅡㅁᴥ, ㅈㅣㄱᴥㄴㅕᴥㄱㅏᴥ ㄱㅡᴥㄷㅐᴥㅈㅘᴥㅇㅘᴥ ㅁㅓㄷᴥㄱㅗᴥ ㅈㅔᴥ ㅂㅜᴥㅁㅗᴥㅈㅣᴥ ㄴㅗㅅᴥㅎㅏᴥㅇㅕㅆᴥㄷㅏᴥ. \n", + "\n", + " ㄱㅗㄱᴥㅈㅓㄱᴥㅇㅢᴥ ㅈㅓㄴᴥㄱㅜㄱᴥ(ㅇㅏㄴᴥㅈㅔᴥ(ㅅㅏㅁᴥㅂㅗㄱᴥ, \n", + " ㅅㅣㄴᴥㅅㅓㅇᴥㄷㅗᴥ ㅎㅗㄴᴥㅇㅕㄴᴥㅇㅡㄴᴥ ㅅㅓㄱᴥㅂㅜㄴᴥㅊㅜᴥㅅㅓㅇᴥㅇㅣㄹᴥ ㄴㅏㅇᴥㅈㅏᴥㄱㅏᴥ ㅇㅓㅄᴥㄴㅡㄴᴥ ㅁㅏㄹᴥㅇㅣᴥㄹㅗᴥㄷㅏᴥ.\"\n", + " ㅇㅣᴥㅇㅔᴥ ㅊㅜㄴᴥㅇㅜㄴᴥㅇㅣᴥ ㅇㅣᴥㄹㅡᴥㄱㅣᴥㄹㅡㄹᴥ\n", + " 'ㄴㅓᴥㅎㅢᴥㄹㅡㄹᴥ ㄴㅐᴥㄱㅕᴥ ㅇㅗㄷᴥㅈㅏㄴᴥ ㄱㅗㅇᴥㅈㅜᴥㄱㅏᴥ ㄷㅡㄹᴥㅇㅓᴥㄱㅏᴥㄱㅔᴥ ㅎㅏㄹᴥ \n", + "ㄱㅓㅅᴥㅇㅣᴥㄹㅣᴥㅇㅛᴥ. ㅅㅏㅇᴥㅅㅓᴥㄲㅔᴥㅅㅓᴥ ㅈㅗᴥㅊㅓㅇᴥㅇㅡㄹᴥ ㄴㅏᴥㄱㅔᴥ ㅎㅏᴥㄷㅓᴥㄹㅏᴥ. \n", + " \"ㅅㅐᴥㅅㅗᴥ ㄱㅖᴥㄹㅏㄴᴥㅎㅣᴥ ㄷㅡㄹᴥㅇㅓᴥㄱㅏᴥ ㄱㅖᴥㄹㅏㅇᴥㅇㅔᴥㄱㅔᴥ ㄴㅐᴥㅁㅜᴥㄹㅡㄹᴥ ㅉㅔᴥㅇㅓᴥ ㅈㅗᴥㅎㅏᴥㄱㅗᴥ ㅇㅣㅆᴥㅇㅡㅁᴥㄱㅘᴥ ㅇㅓᴥㅉㅣᴥ ㅅㅓㄹᴥㅇㅣㄴᴥㅈㅡㅌᴥㄱㅔᴥ ㅁㅏㄹᴥㅆㅡㅁᴥㅎㅐㅆᴥㄷㅓᴥㄹㅏᴥ. \n", + " ㄱㅓᴥㅁㅜㄴᴥㄱㅗᴥㅇㅢᴥ ㅈㅏᴥㅈㅜᴥ ㄷㅗᴥㅂㅓㄴᴥㅇㅣᴥ ㄷㅗㄹᴥㅇㅣᴥㄹㅗᴥ ㄱㅟᴥㄹㅗㄴᴥㅎㅏᴥㄴㅡᴥㄴㅑᴥ? ㄴㅓㅀᴥㅇㅓᴥ ㄱㅜㄴᴥㅅㅏᴥㅇㅢᴥ ㅅㅗㄱᴥㅇㅣᴥㅅㅣᴥㅇㅓㅆᴥㄴㅏᴥㅇㅣᴥㄷㅏᴥ.\"\n", + " ㅎㅏㄴᴥㄹㅣㅁᴥㅇㅣᴥ ㅅㅏㄹᴥㅅㅓㄴᴥㅊㅣᴥㄹㅡㄹᴥ ㅅㅏㅁᴥㄱㅣᴥㄱㅗᴥㄴㅡㄴᴥ ㄱㅓㅅᴥㄷㅡㄹᴥㅇㅣᴥㄱㅗᴥ, ㅍㅜㄹᴥㅅㅏᴥㅂㅚᴥㄹㅗᴥ ㅎㅏㅁᴥㅇㅏᴥㄹㅗᴥㅅㅓᴥ ㅇㅛㄴᴥㅌㅣㅁᴥㅎㅏᴥㄹㅏᴥㄴㅏᴥㄴㅣᴥ ㅁㅓㄹᴥㄹㅣᴥ ㅅㅏㅇᴥㄱㅗㅇᴥㄲㅔᴥㅅㅓᴥ ㅇㅣᴥㅇㅔᴥ ㅇㅟᴥㅇㅓㅊᴥ(ㄱㅏㄴᴥㅅㅓㅇᴥㅎㅏᴥㅇㅗᴥㅁㅐᴥ ㅅㅡㄹᴥㅎㅏᴥㄹㅡㄹᴥ ㅎㅏᴥㄱㅣᴥ \n", + "ㅇㅓㅄᴥㄱㅔㅆᴥㄱㅏᴥ ㅇㅏᴥㄹㅣᴥㄷㅓㄼᴥㅇㅔᴥㄱㅔᴥ ㄲㅜㄺᴥㅇㅓᴥㄴㅏᴥㄹㅏᴥ ㅎㅏㅁᴥㅈㅜᴥㅈㅣᴥ ㅇㅏㄶᴥㅇㅡㅁᴥㅇㅣᴥ ㄷㅏᴥ ㅅㅓᴥㅂㅗᴥㄷㅐㄱᴥ \n", + "ㅈㅔᴥㅅㅏᴥㄴㅡㄴᴥ ㅇㅏㄴᴥㄷㅡㅅᴥ(ㅅㅗㄴᴥㅈㅣㄹᴥㅇㅢᴥ ㅈㅣᴥㅂㅜㄱᴥ ㅇㅛㅌᴥㄱㅘᴥ ㄴㅏㄴᴥㅇㅑㅇᴥㄱㅜㄱᴥㅈㅜᴥㄲㅔᴥㅅㅓᴥ ㅎㅚᴥㅂㅗㄱᴥ \n", + "ㅂㅜㄹᴥㅂㅏᴥㅇㅔᴥ ㅅㅔᴥㅅㅏㅇᴥㅎㅏᴥㅁㅕᴥ ㅅㅗᴥㅇㅠᴥㄴㅡㄴᴥ ㅂㅏᴥㅇㅗㅂᴥㄴㅏᴥㅇㅣᴥㄷㅏᴥ. ㄴㅐᴥ ㅇㅓᴥㅅㅣㅅᴥㅅㅏㅂᴥ ㄱㅕㅇᴥㅇㅣᴥ ㄷㅏᴥㄸㅏㄴᴥㅎㅐㅆᴥㄷㅏᴥ. \n", + " ㄱㅖᴥㅅㅓㄱᴥㅇㅢᴥ ㅅㅏᴥㅇㅠᴥㅇㅣㅁᴥㅇㅣᴥ ㅂㅜㄴᴥㅂㅗᴥㄷㅏᴥ ㅅㅐㅇᴥㄱㅟᴥㅎㅏᴥㄱㅗᴥ ㅅㅗㄱᴥㅇㅔᴥ ㄸㅏᴥㄹㅏㅆᴥㅇㅡᴥㄴㅣᴥ ㄱㅡᴥ ㄸㅡㅅᴥㅇㅡㄴᴥ ㄸㅏㄹᴥㅇㅣᴥㅇㅓㅆᴥㄷㅏᴥ. ㅅㅗᴥㅇㅠᴥㄴㅡㄴᴥ ㅎㅗㄱᴥㅅㅣᴥ ㅂㅜᴥㄹㅡᴥㅅㅣᴥㄴㅣᴥ ㄱㅡᴥ ㅅㅗㄱᴥㅅㅔᴥ\n", + " ㅅㅓㅇᴥㄱㅜㅇᴥㅇㅡㄹᴥ ㅁㅏㄹᴥㅎㅏᴥㄴㅡㄴᴥ ㅅㅣᴥㄴㅐᴥㄹㅡㄹᴥ ㅇㅣᴥㄴㅣᴥ ㅅㅗᴥㅈㅓᴥㅇㅢᴥ ㅈㅣㅂᴥㅇㅡㄹᴥ ㅁㅏㄹᴥㅆㅡㅁᴥㄷㅡㄹᴥㄹㅣㅁᴥㅇㅡㄹᴥ ㄱㅓᴥㅊㅜᴥㅇㅔᴥ ㅎㅏᴥㅁㅕㄴᴥ (ㅎㅘᴥㄹㅏㅇᴥㅎㅏᴥㅇㅕᴥ ㅊㅓㅂᴥㄷㅡㄹᴥㅇㅔᴥㄱㅔᴥ ㅁㅐᴥㅇㅜᴥ ㄱㅡᴥ ㅊㅗㄱᴥㅎㅣᴥ ㄷㅚㄴᴥ \n", + "ㄱㅓㅅᴥㅇㅣᴥ ㄸㅗᴥㅎㅏㄴᴥ ㄲㅜㄴᴥㅇㅣㄹᴥ ㄷㅗㅇᴥㅂㅓㄱᴥㅇㅔᴥㅅㅓᴥ ㄷㅗㄹᴥㄹㅏㅆᴥㄱㅗᴥ, ㄴㅏㅁᴥㅂㅗㄱᴥㅇㅢᴥ ㄱㅠᴥㄴㅕㄴᴥ ㄷㅓᴥㄴㅡㄴᴥ ㅅㅓㅁᴥㄱㅘᴥ \n", + "ㄱㅏㅌᴥㅅㅣᴥㅇㅗㄴᴥㅈㅓㄱᴥ ㄱㅏᴥㅊㅓᴥㅎㅏᴥㅁㅕᴥ ㄴㅐᴥ ㄴㅏㄴᴥㅈㅗㅇᴥㅈㅗᴥㄱㅣᴥ ㅈㅔᴥㅈㅗㅇᴥㅎㅜᴥㅇㅔᴥㄷㅡㄹᴥㅇㅗᴥ \n", + "ㅂㅏᴥㅇㅠᴥㅂㅜᴥㄹㅓᴥ ㅈㅣㄴᴥㅊㅓㅅᴥㅇㅡㄹᴥ ㅎㅏㄱᴥㅇㅡᴥㄹㅗᴥㅈㅣᴥ \n", + "ㅇㅏ\n", "\n", "-- RESULT --\n", - "누구 소리에 서 있을 것이로다.\"\n", - " 이어서 서울에 들어가니 어찌 이 말을 듣고 있으니 이 말을 듣고 있으니 이 말을 듣고 있었다. \n", - " 이에 대답하기를, \n", - " \"소유가 다시 이르기를, \n", - " \"이제 이에 이르기를, \n", - " \"소저가 어찌 이르기를, \n", - " \"소저는 이 말을 듣고 이르기를, \n", - " \"소저가 어찌 사람을 들어 가서 그 아름다운 일이었다. \n", - " 이 말에 이르기를, \n", - " \"소저가 이르기를, \n", - " \"소저가 어찌 사람을 들으니 이 말을 듣고 이르기를, \n", - " \"소저가 이르기를, \n", - " \"소저가 어찌 사람을 들어 가시가 아니라 하오니 이 일은 이 몸이 어찌 가을을 \n", - "보내어 이름을 이루고 있었다. \n", - " 이에 대답하기를, \n", - " \"소유가 이르기를, \n", - " \"이제 사람이 이르기를, \n", - " \"소저가 이르기를, \n", - " \"이제 이에 이르기를, \n", - " \"소유는 이미 사람을 들으니 이 말을 듣고 이르기를, \n", - " \"소저가 이르기를, \n", - " \"소저가 어찌 사람을 들어 가시가 아니라 하오니 이 아니 어찌 이를 사람을 사랑하여 이 일을 \n", - "보내어 이름을 이루고 있었다. \n", - " 이에 대답하기를, \n", - " \"소유가 이르기를, \n", - " \"이제 사람이 이르기를, \n", - " \"소저가 이르기를, \n", - " \"소저가 어찌 사람을 들어 가시가 아니라 하오니 이 일은 이 몸이 어찌 가을을 \n", - "보내어 이름을 이루고 있었다. \n", - " 이에 대답하기를, \n", - " \"소유가 이름을 들어 가시가 있으니 이 몸이 어찌 가히 여기를 들은 것이 없사오니 어찌 그 여관이 없는 것이라 하나 이 이름을 \n", - "이루었다. \n", - " \"이 말을 들어 오르니 이 몸이 어찌 이르기를, \n", - " \"소저가 이르기를, \n", - " \"소저가 어찌 사람을 들으니 이 말을 듣고 이르기를, \n", - " \"소저가 이르기를, \n", - " \"소저가 어찌 사람을 들어 가시\n" + "누구 성ㄳㅐㅅ을 이미 못할 황산으로 초면하니, 이에 숨을 \n", + "갑고, 기더께 아뢰기를, \n", + " \"월왕이 대답하되\n", + " \"낭자 이 소저가 댁 떠어 장첩을 내리셨으니 제공이오?\"\n", + " \"영양공주가 소매가 다고하여 ㅇㅌ우나 염라다와 오늘법고 남고 없나이까?\"\n", + " 섬월이 다시 하여금께 두리가 틀림이 심방한 아별푼을 사를 따르도라. \n", + "또한 신공명음, 직녀가 그대좌와 먿고 제 부모지 놋하였다. \n", + "\n", + " 곡적의 전국(안제(삼복, \n", + " 신성도 혼연은 석분추성일 낭자가 없는 말이로다.\"\n", + " 이에 춘운이 이르기를\n", + " '너희를 내겨 옫잔 공주가 들어가게 할 \n", + "것이리요. 상서께서 조청을 나게 하더라. \n", + " \"새소 계란히 들어가 계랑에게 내무를 쩨어 조하고 있음과 어찌 설인즡게 말씀했더라. \n", + " 거문고의 자주 도번이 돌이로 귀론하느냐? 넗어 군사의 속이시었나이다.\"\n", + " 한림이 살선치를 삼기고는 것들이고, 풀사뵈로 함아로서 욘팀하라나니 멀리 상공께서 이에 위엋(간성하오매 슬하를 하기 \n", + "없겠가 아리덟에게 꿁어나라 함주지 않음이 다 서보댁 \n", + "제사는 안듯(손질의 지북 욭과 난양국주께서 회복 \n", + "불바에 세상하며 소유는 바옵나이다. 내 어싯삽 경이 다딴했다. \n", + " 계석의 사유임이 분보다 생귀하고 속에 따랐으니 그 뜻은 딸이었다. 소유는 혹시 부르시니 그 속세\n", + " 성궁을 말하는 시내를 이니 소저의 집을 말씀들림을 거추에 하면 (화랑하여 첩들에게 매우 그 촉히 된 \n", + "것이 또한 꾼일 동벅에서 돌랐고, 남복의 규년 더는 섬과 \n", + "같시온적 가처하며 내 난종조기 제종후에들오 \n", + "바유부러 진첫을 학으로지 \n", + "\n" ] } ], "source": [ "save_dir = 'data/nine_dreams'\n", - "prime = decompose_text(u\"누구 \")\n", + "prime = hgtk.text.decompose(u\"누구 \")\n", "\n", - "print (\"Prime Text : %s => %s\" % (automata(prime), \"\".join(prime)))\n", + "print (\"Prime Text : %s => %s\" % (hgtk.text.compose(prime), \"\".join(prime)))\n", "n = 2000\n", "\n", "sess = tf.Session()\n", - "sess.run(tf.initialize_all_variables())\n", - "saver = tf.train.Saver(tf.all_variables())\n", + "sess.run(tf.global_variables_initializer())\n", + "saver = tf.train.Saver(tf.global_variables())\n", "ckpt = tf.train.get_checkpoint_state(save_dir)\n", "\n", "# load_name = u'data/nine_dreams/model.ckpt-0'\n", - "load_name = u'data/nine_dreams/model.ckpt-99000'\n", + "# load_name = u'data/nine_dreams/model.ckpt-99000'\n", + "load_name = tf.train.latest_checkpoint(save_dir)\n", "\n", "print (load_name)\n", "\n", @@ -309,12 +327,21 @@ " saver.restore(sess, load_name)\n", " sampled_text = sample(sess, chars, vocab, probs, n, prime)\n", " #print (\"\")\n", - " print (u\"SAMPLED TEXT = %s\" % sampled_text)\n", + " print (u\"SAMPLED TEXT = %s\" % \"\".join(sampled_text))\n", " print (\"\")\n", " print (\"-- RESULT --\")\n", - " print (automata(\"\".join(sampled_text)))" + " print (hgtk.text.compose(\"\".join(sampled_text)))" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -327,21 +354,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.5.2" } }, "nbformat": 4, diff --git a/notebooks/char_rnn_sample_tutorial.ipynb b/notebooks/char_rnn_sample_tutorial.ipynb index 7e466a1..cd6b9b1 100755 --- a/notebooks/char_rnn_sample_tutorial.ipynb +++ b/notebooks/char_rnn_sample_tutorial.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": { "collapsed": false }, @@ -23,13 +23,14 @@ "import argparse\n", "import time\n", "import os\n", - "from six.moves import cPickle\n", + "#from six.moves import cPickle\n", + "import pickle\n", "print (\"Packages Imported\")" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": { "collapsed": false }, @@ -38,7 +39,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "'vocab_size' is 99\n" + "'vocab_size' is 98\n" ] } ], @@ -46,7 +47,7 @@ "# Load chars and vocab\n", "load_dir = \"data/linux_kernel\"\n", "with open(os.path.join(load_dir, 'chars_vocab.pkl'), 'rb') as f:\n", - " chars, vocab = cPickle.load(f)\n", + " chars, vocab = pickle.load(f, errors='ignore')\n", "vocab_size = len(vocab) \n", "print (\"'vocab_size' is %d\" % (vocab_size))" ] @@ -61,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": { "collapsed": false }, @@ -82,27 +83,27 @@ "batch_size = 1 # <= In the training phase, these were both 50\n", "seq_length = 1\n", "\n", + "tf.reset_default_graph()\n", + "\n", "# Construct RNN model \n", - "unitcell = tf.nn.rnn_cell.BasicLSTMCell(rnn_size)\n", - "cell = tf.nn.rnn_cell.MultiRNNCell([unitcell] * num_layers)\n", + "unitcell = tf.contrib.rnn.BasicLSTMCell(rnn_size)\n", + "cell = tf.contrib.rnn.MultiRNNCell([unitcell] * num_layers)\n", "input_data = tf.placeholder(tf.int32, [batch_size, seq_length])\n", "istate = cell.zero_state(batch_size, tf.float32)\n", + "\n", "# Weigths \n", "with tf.variable_scope('rnnlm'):\n", " softmax_w = tf.get_variable(\"softmax_w\", [rnn_size, vocab_size])\n", " softmax_b = tf.get_variable(\"softmax_b\", [vocab_size])\n", + " \n", " with tf.device(\"/cpu:0\"):\n", " embedding = tf.get_variable(\"embedding\", [vocab_size, rnn_size])\n", - " inputs = tf.split(1, seq_length, tf.nn.embedding_lookup(embedding, input_data))\n", + " inputs = tf.split( tf.nn.embedding_lookup(embedding, input_data), seq_length, 1)\n", " inputs = [tf.squeeze(_input, [1]) for _input in inputs]\n", - "# Output\n", - "def loop(prev, _):\n", - " prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b)\n", - " prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))\n", - " return tf.nn.embedding_lookup(embedding, prev_symbol)\n", - "outputs, final_state = seq2seq.rnn_decoder(inputs, istate, cell\n", + " \n", + "outputs, final_state = tf.contrib.legacy_seq2seq.rnn_decoder(inputs, istate, cell\n", " , loop_function=None, scope='rnnlm')\n", - "output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])\n", + "output = tf.reshape(tf.concat(outputs,1 ), [-1, rnn_size])\n", "\n", "logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)\n", "probs = tf.nn.softmax(logits)\n", @@ -112,9 +113,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": { - "collapsed": false + "collapsed": false, + "scrolled": false }, "outputs": [ { @@ -128,8 +130,8 @@ "source": [ "# Restore RNN\n", "sess = tf.Session()\n", - "sess.run(tf.initialize_all_variables())\n", - "saver = tf.train.Saver(tf.all_variables())\n", + "sess.run(tf.global_variables_initializer())\n", + "saver = tf.train.Saver(tf.global_variables())\n", "ckpt = tf.train.get_checkpoint_state(load_dir)\n", "\n", "print (ckpt.model_checkpoint_path)\n", @@ -157,54 +159,42 @@ "Sampling Done. \n", "___________________________________________\n", "\n", - "/* : A C. Fruemptly etweennars must be serversed */\n", - "static int __cgroup_hash_power(struct rt_mutex_d *uaddr, int watab, long\n", - "-XIT_PYS__AUTIMER_PAT(seed_class_table_watch, v1->curr);\n", - "}\n", - "\n", - "static void down_cpusets(struct pid;\n", - "static int pid_thread(voids_mm)\n", + "/* *Here the chain\n", + " */\n", + "static void print_log_end(unsigned long interval,\n", + "\t\t\tuser[it.stats * sinfol_user, user);\n", + "cond_sig[dsubflate = this_leftimeoutcweinet].interrum = current->post;\n", + "EXPORT_SYMBOL_GPL(gf_prock, char *page)\n", "{\n", - "\tif (ps->cpumainte_to_cgroup_grp <= NULL)\n", - "\t\treturn 0;\n", - "}\n", + "\tset_cpuset = {\n", + "\t\t.spec = {\t\t\t\t\"earay_my_changed 0 -%%d\", 5);\n", + "\t/*\n", + "\t * If must mary for inode-added that arratible of (in reduse the subsystepes console, high table,\n", + "\t * the mask pos the signal expiue, do existing. This\n", + "\t * the systems searwards the needs memory attach to the markers.\n", + "\t */\n", + "\tif (!timer.fm)\n", + "\t\tbreak;\n", "\n", - "conset sched_VRICE_SOFTIRQ_DISU{\n", - "\tsoftirq_signal(this_css_set_bytes));\n", - "}\n", + "\tswitchreptimer *task;\n", + "\tin (statt_time[bytes_and_cpu_timer_ticks);\n", "\n", - "void private = {\n", - "\t\t.mode\t\t= CPUCLOCK_BALANCE,\n", - "\t\t.process\t\t= optime)\n", + "\tif (USTEX_TIMERS + subj->create->destion, 0);\n", + "\tsegindel_task = data;\n", "\n", - "/*\n", - " * The are\n", - " *\ten\n", - " * @buf' - for so allows the condext it of it regions)\n", - " * massessiging that Sto be stime in the expoxes\n", - " */\n", - "void __fsix;\n", - "\tstruct audit_chunk *tsk;\n", - "\n", - "\tkey_utvec_oper(struct *read_ns, struct futex_ckernel);\n", - "\tint atomic_attime = res->init_switch(void),\n", - "\t\t\t -+signal->state = 0;\n", - "\ttmr = tmp;\n", - "\tprintk(\"%s\\n\", signal, &max_huts_string, 1, look_t *)(modemask++);\n", - "\tup_sem(cft, &(max))) {\n", - "\t\tif (probes)\n", - "\t\t\tset_cpu(name == 0)\n", - "\t\t\tgoto out;\n", - "\t}\n", - "\n", - "\tpposs_unlock(*pefmask_plocks);\n", - "\taudit_log_lock_fuces(rq);\n", + "\tret = sched_cntex = 0;\n", + "\tstruct file *mm_faylsge(pi_state, struct cfs_rq, new_name);\n", "}\n", "\n", - "static void again;\n", + "void sprefcount(struct module *mutex)\n", + "{\n", + "\tint pidmad(p) >> task && klocks_set_modulest(f, 0)\n", + "\t\ts->se.simbarct audit_tructure &&\n", + " &module_module_entry(head));\n", + "}\n", "\n", - "int\n", - "con\n" + "void chain_console_sub(sysswoncode+,\n", + "\t\t.maxlen\t\t= cpu_open(struct \n" ] } ], @@ -256,21 +246,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.5.2" } }, "nbformat": 4, diff --git a/notebooks/char_rnn_train_hangul.ipynb b/notebooks/char_rnn_train_hangul.ipynb index eb738e9..1357ab7 100755 --- a/notebooks/char_rnn_train_hangul.ipynb +++ b/notebooks/char_rnn_train_hangul.ipynb @@ -31,9 +31,11 @@ "import argparse\n", "import time\n", "import os\n", + "import hgtk\n", + "\n", "from six.moves import cPickle\n", "from TextLoader import *\n", - "from Hangulpy import *\n", + "\n", "print (\"Packages Imported\")" ] }, @@ -87,8 +89,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "type of 'data_loader.vocab' is , length is 76\n", - "type of 'data_loader.chars' is , length is 76\n" + "type of 'data_loader.vocab' is , length is 76\n", + "type of 'data_loader.chars' is , length is 76\n" ] } ], @@ -121,7 +123,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{u'_': 69, u'6': 59, u':': 57, u'\\n': 19, u'4': 67, u'5': 63, u'>': 75, u'!': 52, u' ': 1, u'\"': 28, u'\\u1d25': 0, u\"'\": 49, u')': 46, u'(': 45, u'-': 65, u',': 27, u'.': 24, u'\\u3131': 7, u'0': 73, u'\\u3133': 60, u'\\u3132': 29, u'\\u3135': 50, u'\\u3134': 4, u'\\u3137': 13, u'\\u3136': 44, u'\\u3139': 5, u'\\u3138': 32, u'\\u313b': 55, u'\\u313a': 48, u'\\u313c': 54, u'?': 41, u'3': 66, u'\\u3141': 12, u'\\u3140': 51, u'\\u3143': 47, u'\\u3142': 17, u'\\u3145': 10, u'\\u3144': 43, u'\\u3147': 2, u'\\u3146': 22, u'\\u3149': 40, u'\\u3148': 15, u'\\u314b': 42, u'\\u314a': 23, u'\\u314d': 31, u'\\u314c': 30, u'\\u314f': 3, u'\\u314e': 14, u'\\u3151': 34, u'\\u3150': 21, u'\\u3153': 11, u'\\u3152': 74, u'\\u3155': 18, u'\\u3154': 20, u'\\u3157': 9, u'\\u3156': 39, u'\\u3159': 53, u'\\u3158': 26, u'\\u315b': 38, u'\\u315a': 33, u'\\u315d': 36, u'\\u315c': 16, u'\\u315f': 35, u'\\u315e': 61, u'\\u3161': 8, u'\\u3160': 37, u'\\u3163': 6, u'\\u3162': 25, u'\\x1a': 72, u'9': 64, u'7': 71, u'2': 62, u'1': 58, u'\\u313f': 56, u'\\u313e': 70, u'8': 68}\n" + "{' ': 1, 'ㅇ': 2, 'ㄱ': 7, 'ㅔ': 20, 'ㅋ': 42, 'ㄲ': 29, 'ㅢ': 25, '-': 65, 'ㄹ': 5, '\\x1a': 72, 'ㅈ': 15, 'ㅖ': 39, 'ㅐ': 21, '?': 41, ':': 57, '3': 66, '.': 24, 'ㅅ': 10, 'ㅍ': 31, '1': 58, 'ㅚ': 33, 'ㅄ': 43, '7': 71, 'ㄴ': 4, '5': 63, 'ㄿ': 56, '2': 62, 'ᴥ': 0, 'ㅏ': 3, 'ㅑ': 34, 'ㅀ': 51, 'ㅟ': 35, '6': 59, 'ㅆ': 22, 'ㄳ': 60, 'ㅂ': 17, 'ㅕ': 18, '(': 45, 'ㅞ': 61, 'ㅓ': 11, 'ㅡ': 8, '\"': 28, 'ㅊ': 23, '!': 52, 'ㅒ': 74, '>': 75, 'ㅗ': 9, 'ㅃ': 47, 'ㅘ': 26, 'ㅝ': 36, '0': 73, '\\n': 19, '4': 67, 'ㅌ': 30, 'ㅙ': 53, 'ㄻ': 55, 'ㄵ': 50, \"'\": 49, ',': 27, 'ㅠ': 37, '9': 64, 'ㅎ': 14, '_': 69, 'ㄶ': 44, 'ㅣ': 6, 'ㅁ': 12, 'ㄺ': 48, 'ㅜ': 16, 'ㅛ': 38, '8': 68, 'ㄾ': 70, ')': 46, 'ㅉ': 40, 'ㄸ': 32, 'ㄷ': 13, 'ㄼ': 54}\n" ] } ], @@ -147,7 +149,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "(u'\\u1d25', u' ', u'\\u3147', u'\\u314f', u'\\u3134', u'\\u3139', u'\\u3163', u'\\u3131', u'\\u3161', u'\\u3157', u'\\u3145', u'\\u3153', u'\\u3141', u'\\u3137', u'\\u314e', u'\\u3148', u'\\u315c', u'\\u3142', u'\\u3155', u'\\n', u'\\u3154', u'\\u3150', u'\\u3146', u'\\u314a', u'.', u'\\u3162', u'\\u3158', u',', u'\"', u'\\u3132', u'\\u314c', u'\\u314d', u'\\u3138', u'\\u315a', u'\\u3151', u'\\u315f', u'\\u315d', u'\\u3160', u'\\u315b', u'\\u3156', u'\\u3149', u'?', u'\\u314b', u'\\u3144', u'\\u3136', u'(', u')', u'\\u3143', u'\\u313a', u\"'\", u'\\u3135', u'\\u3140', u'!', u'\\u3159', u'\\u313c', u'\\u313b', u'\\u313f', u':', u'1', u'6', u'\\u3133', u'\\u315e', u'2', u'5', u'9', u'-', u'3', u'4', u'8', u'_', u'\\u313e', u'7', u'\\x1a', u'0', u'\\u3152', u'>')\n", + "('ᴥ', ' ', 'ㅇ', 'ㅏ', 'ㄴ', 'ㄹ', 'ㅣ', 'ㄱ', 'ㅡ', 'ㅗ', 'ㅅ', 'ㅓ', 'ㅁ', 'ㄷ', 'ㅎ', 'ㅈ', 'ㅜ', 'ㅂ', 'ㅕ', '\\n', 'ㅔ', 'ㅐ', 'ㅆ', 'ㅊ', '.', 'ㅢ', 'ㅘ', ',', '\"', 'ㄲ', 'ㅌ', 'ㅍ', 'ㄸ', 'ㅚ', 'ㅑ', 'ㅟ', 'ㅝ', 'ㅠ', 'ㅛ', 'ㅖ', 'ㅉ', '?', 'ㅋ', 'ㅄ', 'ㄶ', '(', ')', 'ㅃ', 'ㄺ', \"'\", 'ㄵ', 'ㅀ', '!', 'ㅙ', 'ㄼ', 'ㄻ', 'ㄿ', ':', '1', '6', 'ㄳ', 'ㅞ', '2', '5', '9', '-', '3', '4', '8', '_', 'ㄾ', '7', '\\x1a', '0', 'ㅒ', '>')\n", "ᴥ\n" ] } @@ -167,7 +169,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": { "collapsed": false }, @@ -176,25 +178,24 @@ "name": "stdout", "output_type": "stream", "text": [ - "Type of 'x' is . Shape is (50, 50)\n", + "Type of 'x' is . Shape is (50, 50)\n", "x looks like \n", - "[[ 3 5 0 ..., 3 4 0]\n", - " [20 0 1 ..., 13 3 0]\n", - " [10 11 2 ..., 1 7 3]\n", + "[[19 19 1 ..., 23 0 4]\n", + " [ 1 19 42 ..., 6 0 2]\n", + " [ 2 8 0 ..., 19 1 1]\n", " ..., \n", - " [ 1 17 6 ..., 0 1 7]\n", - " [ 0 14 3 ..., 12 3 4]\n", - " [ 0 7 3 ..., 1 15 3]]\n", - "\n", - "Type of 'y' is . Shape is (50, 50)\n", + " [ 1 7 8 ..., 2 6 0]\n", + " [ 6 4 0 ..., 17 9 7]\n", + " [ 1 19 17 ..., 17 16 5]]\n", + "Type of 'y' is . Shape is (50, 50)\n", "y looks like \n", - "[[ 5 0 1 ..., 4 0 15]\n", - " [ 0 1 7 ..., 3 0 24]\n", - " [11 2 0 ..., 7 3 0]\n", + "[[19 1 1 ..., 0 4 3]\n", + " [19 42 11 ..., 0 2 20]\n", + " [ 8 0 5 ..., 1 1 10]\n", " ..., \n", - " [17 6 0 ..., 1 7 9]\n", - " [14 3 0 ..., 3 4 0]\n", - " [ 7 3 0 ..., 15 3 2]]\n" + " [ 7 8 0 ..., 6 0 1]\n", + " [ 4 0 12 ..., 9 7 0]\n", + " [19 17 16 ..., 16 5 0]]\n" ] } ], @@ -216,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": { "collapsed": false }, @@ -235,9 +236,11 @@ "grad_clip = 5. # <= GRADIENT CLIPPING (PRACTICALLY IMPORTANT)\n", "vocab_size = data_loader.vocab_size\n", "\n", + "tf.reset_default_graph()\n", + "\n", "# SELECT RNN CELL (MULTI LAYER LSTM)\n", - "unitcell = tf.nn.rnn_cell.BasicLSTMCell(rnn_size)\n", - "cell = tf.nn.rnn_cell.MultiRNNCell([unitcell] * num_layers)\n", + "unitcell = tf.contrib.rnn.BasicLSTMCell(rnn_size)\n", + "cell = tf.contrib.rnn.MultiRNNCell([unitcell] * num_layers)\n", "\n", "# Set paths to the graph\n", "input_data = tf.placeholder(tf.int32, [batch_size, seq_length])\n", @@ -250,9 +253,10 @@ " softmax_b = tf.get_variable(\"softmax_b\", [vocab_size])\n", " with tf.device(\"/cpu:0\"):\n", " embedding = tf.get_variable(\"embedding\", [vocab_size, rnn_size])\n", - " inputs = tf.split(1, seq_length, tf.nn.embedding_lookup(\n", - " embedding, input_data))\n", + " inputs = tf.split(tf.nn.embedding_lookup(\n", + " embedding, input_data), seq_length, 1)\n", " inputs = [tf.squeeze(input_, [1]) for input_ in inputs]\n", + " \n", "print (\"Network ready\")" ] }, @@ -265,7 +269,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": { "collapsed": false }, @@ -280,9 +284,9 @@ ], "source": [ "# Output of RNN\n", - "outputs, last_state = tf.nn.seq2seq.rnn_decoder(inputs, initial_state\n", + "outputs, last_state = tf.contrib.legacy_seq2seq.rnn_decoder(inputs, initial_state\n", " , cell, loop_function=None, scope='rnnlm')\n", - "output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])\n", + "output = tf.reshape(tf.concat(outputs, 1), [-1, rnn_size])\n", "logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)\n", "\n", "# Next word probability\n", @@ -299,7 +303,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": { "collapsed": false }, @@ -313,7 +317,7 @@ } ], "source": [ - "loss = tf.nn.seq2seq.sequence_loss_by_example([logits], # Input\n", + "loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example([logits], # Input\n", " [tf.reshape(targets, [-1])], # Target\n", " [tf.ones([batch_size * seq_length])], # Weight\n", " vocab_size)\n", @@ -329,7 +333,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": { "collapsed": false }, @@ -370,7 +374,17 @@ "collapsed": false, "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0/99500 (epoch: 0), loss: 4.319, time/batch: 5.531\n", + "model saved to data/nine_dreams/model.ckpt\n", + "100/99500 (epoch: 0), loss: 2.949, time/batch: 3.866\n" + ] + } + ], "source": [ "num_epochs = 500\n", "save_every = 1000\n", @@ -379,10 +393,9 @@ "\n", "save_dir = 'data/nine_dreams'\n", "sess = tf.Session()\n", - "sess.run(tf.initialize_all_variables())\n", - "summary_writer = tf.train.SummaryWriter(save_dir\n", - " , graph=sess.graph)\n", - "saver = tf.train.Saver(tf.all_variables())\n", + "sess.run(tf.global_variables_initializer())\n", + "summary_writer = tf.summary.FileWriter(save_dir, graph=sess.graph)\n", + "saver = tf.train.Saver(tf.global_variables())\n", "for e in range(num_epochs): # for all epochs\n", "\n", " # LEARNING RATE SCHEDULING \n", @@ -425,21 +438,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.5.2" } }, "nbformat": 4, diff --git a/notebooks/char_rnn_train_tutorial.ipynb b/notebooks/char_rnn_train_tutorial.ipynb index aea4dff..072a497 100755 --- a/notebooks/char_rnn_train_tutorial.ipynb +++ b/notebooks/char_rnn_train_tutorial.ipynb @@ -2,19 +2,11 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Packages Imported\n" - ] - } - ], + "outputs": [], "source": [ "# Import Packages\n", "import numpy as np\n", @@ -29,19 +21,11 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Text loaded from 'data/linux_kernel/input.txt'\n" - ] - } - ], + "outputs": [], "source": [ "# Load text\n", "# data_dir = \"data/tinyshakespeare\"\n", @@ -55,31 +39,11 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Type of 'counter.items()' is and length is 99\n", - "[0/3] (' ', 171222)\n", - "[1/3] ('$', 61)\n", - "[2/3] ('(', 23412)\n", - "[3/3] (',', 17025)\n", - "[4/3] ('0', 4322)\n", - " \n", - "Type of 'count_pairs' is and length is 99\n", - "[0/3] (' ', 171222)\n", - "[1/3] ('e', 113021)\n", - "[2/3] ('t', 102154)\n", - "[3/3] ('r', 76185)\n", - "[4/3] ('i', 75486)\n" - ] - } - ], + "outputs": [], "source": [ "# Preprocess Text\n", "# First, count the number of characters\n", @@ -101,32 +65,12 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "collapsed": false, "scrolled": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Type of 'chars' is and length is 99\n", - "[0/3] chars[0] is ' '\n", - "[1/3] chars[1] is 'e'\n", - "[2/3] chars[2] is 't'\n", - "[3/3] chars[3] is 'r'\n", - "[4/3] chars[4] is 'i'\n", - "\n", - "Type of 'vocab' is and length is 99\n", - "[0/3] vocab[' '] is 0\n", - "[1/3] vocab['e'] is 1\n", - "[2/3] vocab['t'] is 2\n", - "[3/3] vocab['r'] is 3\n", - "[4/3] vocab['i'] is 4\n" - ] - } - ], + "outputs": [], "source": [ "# Let's make dictionary\n", "chars, counts = zip(*count_pairs)\n", @@ -160,32 +104,11 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Type of 'corpus' is , shape is (1708871,), and length is 1708871\n", - "\n", - "'corpus' looks like [36 22 7 0 22 0 0 13 4 8]\n", - "[0/10] chars[36] corresponds to '/'\n", - "[1/10] chars[22] corresponds to '*'\n", - "[2/10] chars[07] corresponds to '\n", - "'\n", - "[3/10] chars[00] corresponds to ' '\n", - "[4/10] chars[22] corresponds to '*'\n", - "[5/10] chars[00] corresponds to ' '\n", - "[6/10] chars[00] corresponds to ' '\n", - "[7/10] chars[13] corresponds to 'l'\n", - "[8/10] chars[04] corresponds to 'i'\n", - "[9/10] chars[08] corresponds to 'n'\n" - ] - } - ], + "outputs": [], "source": [ "# Now convert all text to index using vocab! \n", "corpus = np.array(list(map(vocab.get, data)))\n", @@ -202,30 +125,11 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "xdata is ... [36 22 7 ..., 11 25 3] and length is 1700000\n", - "ydata is ... [22 7 0 ..., 25 3 36] and length is 1700000\n", - "\n", - "Type of 'xbatches' is and length is 170\n", - "Type of 'ybatches' is and length is 170\n", - "\n", - "Type of 'temp' is and length is 5\n", - "Type of 'temp[0]' is and shape is (50, 200)\n", - "Type of 'temp[1]' is and shape is (50, 200)\n", - "Type of 'temp[2]' is and shape is (50, 200)\n", - "Type of 'temp[3]' is and shape is (50, 200)\n", - "Type of 'temp[4]' is and shape is (50, 200)\n" - ] - } - ], + "outputs": [], "source": [ "# Generate batch data \n", "batch_size = 50\n", @@ -269,19 +173,11 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Network Ready\n" - ] - } - ], + "outputs": [], "source": [ "# Important RNN parameters \n", "vocab_size = len(vocab)\n", @@ -290,8 +186,8 @@ "grad_clip = 5.\n", "\n", "# Construct RNN model \n", - "unitcell = tf.nn.rnn_cell.BasicLSTMCell(rnn_size)\n", - "cell = tf.nn.rnn_cell.MultiRNNCell([unitcell] * num_layers)\n", + "unitcell = tf.contrib.rnn.BasicLSTMCell(rnn_size)\n", + "cell = tf.contrib.rnn.MultiRNNCell([unitcell] * num_layers)\n", "input_data = tf.placeholder(tf.int32, [batch_size, seq_length])\n", "targets = tf.placeholder(tf.int32, [batch_size, seq_length])\n", "istate = cell.zero_state(batch_size, tf.float32)\n", @@ -301,7 +197,7 @@ " softmax_b = tf.get_variable(\"softmax_b\", [vocab_size])\n", " with tf.device(\"/cpu:0\"):\n", " embedding = tf.get_variable(\"embedding\", [vocab_size, rnn_size])\n", - " inputs = tf.split(1, seq_length, tf.nn.embedding_lookup(embedding, input_data))\n", + " inputs = tf.split(tf.nn.embedding_lookup(embedding, input_data), seq_length, 1)\n", " inputs = [tf.squeeze(_input, [1]) for _input in inputs]\n", "# Output\n", "def loop(prev, _):\n", @@ -313,13 +209,13 @@ " in order to generate the i+1-st input, and decoder_inputs will be ignored,\n", " except for the first element (\"GO\" symbol).\n", "\"\"\" \n", - "outputs, last_state = tf.nn.seq2seq.rnn_decoder(inputs, istate, cell\n", + "outputs, last_state = tf.contrib.legacy_seq2seq.rnn_decoder(inputs, istate, cell\n", " , loop_function=None, scope='rnnlm')\n", - "output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])\n", + "output = tf.reshape(tf.concat(outputs, 1), [-1, rnn_size])\n", "logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)\n", "probs = tf.nn.softmax(logits)\n", "# Loss\n", - "loss = tf.nn.seq2seq.sequence_loss_by_example([logits], # Input\n", + "loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example([logits], # Input\n", " [tf.reshape(targets, [-1])], # Target\n", " [tf.ones([batch_size * seq_length])], # Weight \n", " vocab_size)\n", @@ -337,121 +233,12 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "collapsed": false, "scrolled": true }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[0/8500] cost: 5.1518 / Each batch learning took 6.2978 sec\n", - "model saved to 'data/linux_kernel/model.ckpt'\n", - "[100/8500] cost: 3.0705 / Each batch learning took 0.3866 sec\n", - "[200/8500] cost: 2.5382 / Each batch learning took 0.3910 sec\n", - "[300/8500] cost: 2.3884 / Each batch learning took 0.5311 sec\n", - "[400/8500] cost: 2.2029 / Each batch learning took 0.3930 sec\n", - "[500/8500] cost: 1.9560 / Each batch learning took 0.5088 sec\n", - "model saved to 'data/linux_kernel/model.ckpt'\n", - "[600/8500] cost: 1.9134 / Each batch learning took 0.3861 sec\n", - "[700/8500] cost: 1.7579 / Each batch learning took 0.5502 sec\n", - "[800/8500] cost: 1.7580 / Each batch learning took 0.4546 sec\n", - "[900/8500] cost: 1.6952 / Each batch learning took 0.3958 sec\n", - "[1000/8500] cost: 1.5991 / Each batch learning took 0.4516 sec\n", - "model saved to 'data/linux_kernel/model.ckpt'\n", - "[1100/8500] cost: 1.6036 / Each batch learning took 0.3708 sec\n", - "[1200/8500] cost: 1.4374 / Each batch learning took 0.4035 sec\n", - "[1300/8500] cost: 1.5513 / Each batch learning took 0.4629 sec\n", - "[1400/8500] cost: 1.4814 / Each batch learning took 0.5162 sec\n", - "[1500/8500] cost: 1.4986 / Each batch learning took 0.4023 sec\n", - "model saved to 'data/linux_kernel/model.ckpt'\n", - "[1600/8500] cost: 1.4957 / Each batch learning took 0.5584 sec\n", - "[1700/8500] cost: 1.4569 / Each batch learning took 0.5504 sec\n", - "[1800/8500] cost: 1.3966 / Each batch learning took 0.4409 sec\n", - "[1900/8500] cost: 1.3742 / Each batch learning took 0.8715 sec\n", - "[2000/8500] cost: 1.4071 / Each batch learning took 0.7707 sec\n", - "model saved to 'data/linux_kernel/model.ckpt'\n", - "[2100/8500] cost: 1.4037 / Each batch learning took 0.4636 sec\n", - "[2200/8500] cost: 1.3220 / Each batch learning took 0.6967 sec\n", - "[2300/8500] cost: 1.3267 / Each batch learning took 0.7644 sec\n", - "[2400/8500] cost: 1.2870 / Each batch learning took 0.5228 sec\n", - "[2500/8500] cost: 1.3171 / Each batch learning took 0.5671 sec\n", - "model saved to 'data/linux_kernel/model.ckpt'\n", - "[2600/8500] cost: 1.2876 / Each batch learning took 0.5576 sec\n", - "[2700/8500] cost: 1.2571 / Each batch learning took 0.4314 sec\n", - "[2800/8500] cost: 1.3123 / Each batch learning took 0.5939 sec\n", - "[2900/8500] cost: 1.1588 / Each batch learning took 0.6087 sec\n", - "[3000/8500] cost: 1.2834 / Each batch learning took 0.5066 sec\n", - "model saved to 'data/linux_kernel/model.ckpt'\n", - "[3100/8500] cost: 1.2362 / Each batch learning took 0.4319 sec\n", - "[3200/8500] cost: 1.2768 / Each batch learning took 0.4418 sec\n", - "[3300/8500] cost: 1.2836 / Each batch learning took 0.6158 sec\n", - "[3400/8500] cost: 1.2830 / Each batch learning took 0.7412 sec\n", - "[3500/8500] cost: 1.2296 / Each batch learning took 0.7596 sec\n", - "model saved to 'data/linux_kernel/model.ckpt'\n", - "[3600/8500] cost: 1.2142 / Each batch learning took 0.8046 sec\n", - "[3700/8500] cost: 1.2474 / Each batch learning took 0.8149 sec\n", - "[3800/8500] cost: 1.2455 / Each batch learning took 0.9514 sec\n", - "[3900/8500] cost: 1.1910 / Each batch learning took 1.0230 sec\n", - "[4000/8500] cost: 1.1874 / Each batch learning took 0.7037 sec\n", - "model saved to 'data/linux_kernel/model.ckpt'\n", - "[4100/8500] cost: 1.1602 / Each batch learning took 0.6907 sec\n", - "[4200/8500] cost: 1.1896 / Each batch learning took 0.6589 sec\n", - "[4300/8500] cost: 1.1680 / Each batch learning took 0.6051 sec\n", - "[4400/8500] cost: 1.1472 / Each batch learning took 0.4314 sec\n", - "[4500/8500] cost: 1.2073 / Each batch learning took 0.7571 sec\n", - "model saved to 'data/linux_kernel/model.ckpt'\n", - "[4600/8500] cost: 1.0601 / Each batch learning took 0.8487 sec\n", - "[4700/8500] cost: 1.1822 / Each batch learning took 0.5197 sec\n", - "[4800/8500] cost: 1.1427 / Each batch learning took 0.5184 sec\n", - "[4900/8500] cost: 1.1774 / Each batch learning took 0.4620 sec\n", - "[5000/8500] cost: 1.1902 / Each batch learning took 0.4941 sec\n", - "model saved to 'data/linux_kernel/model.ckpt'\n", - "[5100/8500] cost: 1.1960 / Each batch learning took 0.7985 sec\n", - "[5200/8500] cost: 1.1568 / Each batch learning took 0.7381 sec\n", - "[5300/8500] cost: 1.1487 / Each batch learning took 0.5911 sec\n", - "[5400/8500] cost: 1.1710 / Each batch learning took 0.8420 sec\n", - "[5500/8500] cost: 1.1684 / Each batch learning took 0.7788 sec\n", - "model saved to 'data/linux_kernel/model.ckpt'\n", - "[5600/8500] cost: 1.1337 / Each batch learning took 0.7290 sec\n", - "[5700/8500] cost: 1.1234 / Each batch learning took 1.0153 sec\n", - "[5800/8500] cost: 1.1034 / Each batch learning took 0.7469 sec\n", - "[5900/8500] cost: 1.1276 / Each batch learning took 0.7259 sec\n", - "[6000/8500] cost: 1.1073 / Each batch learning took 0.7722 sec\n", - "model saved to 'data/linux_kernel/model.ckpt'\n", - "[6100/8500] cost: 1.0955 / Each batch learning took 0.7700 sec\n", - "[6200/8500] cost: 1.1489 / Each batch learning took 0.4165 sec\n", - "[6300/8500] cost: 1.0120 / Each batch learning took 0.7359 sec\n", - "[6400/8500] cost: 1.1296 / Each batch learning took 0.6871 sec\n", - "[6500/8500] cost: 1.0963 / Each batch learning took 0.6530 sec\n", - "model saved to 'data/linux_kernel/model.ckpt'\n", - "[6600/8500] cost: 1.1259 / Each batch learning took 0.4506 sec\n", - "[6700/8500] cost: 1.1422 / Each batch learning took 0.3957 sec\n", - "[6800/8500] cost: 1.1431 / Each batch learning took 0.4530 sec\n", - "[6900/8500] cost: 1.1168 / Each batch learning took 0.4068 sec\n", - "[7000/8500] cost: 1.1119 / Each batch learning took 1.0343 sec\n", - "model saved to 'data/linux_kernel/model.ckpt'\n", - "[7100/8500] cost: 1.1255 / Each batch learning took 0.4080 sec\n", - "[7200/8500] cost: 1.1266 / Each batch learning took 0.3840 sec\n", - "[7300/8500] cost: 1.1036 / Each batch learning took 0.8628 sec\n", - "[7400/8500] cost: 1.0860 / Each batch learning took 0.4150 sec\n", - "[7500/8500] cost: 1.0681 / Each batch learning took 0.4738 sec\n", - "model saved to 'data/linux_kernel/model.ckpt'\n", - "[7600/8500] cost: 1.0921 / Each batch learning took 0.4141 sec\n", - "[7700/8500] cost: 1.0728 / Each batch learning took 0.3944 sec\n", - "[7800/8500] cost: 1.0644 / Each batch learning took 0.4473 sec\n", - "[7900/8500] cost: 1.1155 / Each batch learning took 0.4841 sec\n", - "[8000/8500] cost: 0.9819 / Each batch learning took 0.4198 sec\n", - "model saved to 'data/linux_kernel/model.ckpt'\n", - "[8100/8500] cost: 1.0945 / Each batch learning took 0.4452 sec\n", - "[8200/8500] cost: 1.0682 / Each batch learning took 0.4038 sec\n", - "[8300/8500] cost: 1.0939 / Each batch learning took 0.4889 sec\n", - "[8400/8500] cost: 1.1111 / Each batch learning took 0.3995 sec\n" - ] - } - ], + "outputs": [], "source": [ "# Train the model!\n", "num_epochs = 50\n", @@ -460,9 +247,9 @@ "decay_rate = 0.97\n", "\n", "sess = tf.Session()\n", - "sess.run(tf.initialize_all_variables())\n", - "summary_writer = tf.train.SummaryWriter(save_dir, graph=sess.graph)\n", - "saver = tf.train.Saver(tf.all_variables())\n", + "sess.run(tf.global_variables_initializer())\n", + "summary_writer = tf.summary.FileWriter(save_dir, graph=sess.graph)\n", + "saver = tf.train.Saver(tf.global_variables())\n", "init_time = time.time()\n", "for epoch in range(num_epochs):\n", " # Learning rate scheduling \n", @@ -505,19 +292,11 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Done!! It took 5238.4040 second. \n" - ] - } - ], + "outputs": [], "source": [ "print (\"Done!! It took %.4f second. \" %(time.time() - init_time))" ] @@ -543,21 +322,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.5.2" } }, "nbformat": 4, diff --git a/notebooks/data/linux_kernel/chars_vocab.pkl b/notebooks/data/linux_kernel/chars_vocab.pkl index 0bd3da3..bf5aa86 100755 Binary files a/notebooks/data/linux_kernel/chars_vocab.pkl and b/notebooks/data/linux_kernel/chars_vocab.pkl differ diff --git a/notebooks/data/linux_kernel/checkpoint b/notebooks/data/linux_kernel/checkpoint old mode 100755 new mode 100644 diff --git a/notebooks/data/linux_kernel/events.out.tfevents.1468345268.cpslab b/notebooks/data/linux_kernel/events.out.tfevents.1468345268.cpslab deleted file mode 100755 index 3076160..0000000 Binary files a/notebooks/data/linux_kernel/events.out.tfevents.1468345268.cpslab and /dev/null differ diff --git a/notebooks/data/linux_kernel/events.out.tfevents.1468345442.cpslab b/notebooks/data/linux_kernel/events.out.tfevents.1468345442.cpslab deleted file mode 100755 index 1000bbc..0000000 Binary files a/notebooks/data/linux_kernel/events.out.tfevents.1468345442.cpslab and /dev/null differ diff --git a/notebooks/data/linux_kernel/model.ckpt-6000 b/notebooks/data/linux_kernel/model.ckpt-6000 deleted file mode 100755 index d23873f..0000000 Binary files a/notebooks/data/linux_kernel/model.ckpt-6000 and /dev/null differ diff --git a/notebooks/data/linux_kernel/model.ckpt-6000.meta b/notebooks/data/linux_kernel/model.ckpt-6000.meta deleted file mode 100755 index b794432..0000000 Binary files a/notebooks/data/linux_kernel/model.ckpt-6000.meta and /dev/null differ diff --git a/notebooks/data/linux_kernel/model.ckpt-6500 b/notebooks/data/linux_kernel/model.ckpt-6500 deleted file mode 100755 index 04f71ae..0000000 Binary files a/notebooks/data/linux_kernel/model.ckpt-6500 and /dev/null differ diff --git a/notebooks/data/linux_kernel/model.ckpt-6500.meta b/notebooks/data/linux_kernel/model.ckpt-6500.meta deleted file mode 100755 index 1573f00..0000000 Binary files a/notebooks/data/linux_kernel/model.ckpt-6500.meta and /dev/null differ diff --git a/notebooks/data/linux_kernel/model.ckpt-7000 b/notebooks/data/linux_kernel/model.ckpt-7000 deleted file mode 100755 index 6e474be..0000000 Binary files a/notebooks/data/linux_kernel/model.ckpt-7000 and /dev/null differ diff --git a/notebooks/data/linux_kernel/model.ckpt-7000.meta b/notebooks/data/linux_kernel/model.ckpt-7000.meta deleted file mode 100755 index 0df9683..0000000 Binary files a/notebooks/data/linux_kernel/model.ckpt-7000.meta and /dev/null differ diff --git a/notebooks/data/linux_kernel/model.ckpt-7500 b/notebooks/data/linux_kernel/model.ckpt-7500 deleted file mode 100755 index 44b52d5..0000000 Binary files a/notebooks/data/linux_kernel/model.ckpt-7500 and /dev/null differ diff --git a/notebooks/data/linux_kernel/model.ckpt-7500.meta b/notebooks/data/linux_kernel/model.ckpt-7500.meta deleted file mode 100755 index 0dd1426..0000000 Binary files a/notebooks/data/linux_kernel/model.ckpt-7500.meta and /dev/null differ diff --git a/notebooks/data/linux_kernel/model.ckpt-8000 b/notebooks/data/linux_kernel/model.ckpt-8000 deleted file mode 100755 index b634bd3..0000000 Binary files a/notebooks/data/linux_kernel/model.ckpt-8000 and /dev/null differ diff --git a/notebooks/data/linux_kernel/model.ckpt-8000.data-00000-of-00001 b/notebooks/data/linux_kernel/model.ckpt-8000.data-00000-of-00001 new file mode 100644 index 0000000..beda17f Binary files /dev/null and b/notebooks/data/linux_kernel/model.ckpt-8000.data-00000-of-00001 differ diff --git a/notebooks/data/linux_kernel/model.ckpt-8000.index b/notebooks/data/linux_kernel/model.ckpt-8000.index new file mode 100644 index 0000000..d4bdf34 Binary files /dev/null and b/notebooks/data/linux_kernel/model.ckpt-8000.index differ diff --git a/notebooks/data/linux_kernel/model.ckpt-8000.meta b/notebooks/data/linux_kernel/model.ckpt-8000.meta old mode 100755 new mode 100644 index ebfe7c9..367ec5c Binary files a/notebooks/data/linux_kernel/model.ckpt-8000.meta and b/notebooks/data/linux_kernel/model.ckpt-8000.meta differ diff --git a/notebooks/data/nine_dreams/checkpoint b/notebooks/data/nine_dreams/checkpoint new file mode 100644 index 0000000..45024f2 --- /dev/null +++ b/notebooks/data/nine_dreams/checkpoint @@ -0,0 +1,6 @@ +model_checkpoint_path: "model.ckpt-99000" +all_model_checkpoint_paths: "model.ckpt-95000" +all_model_checkpoint_paths: "model.ckpt-96000" +all_model_checkpoint_paths: "model.ckpt-97000" +all_model_checkpoint_paths: "model.ckpt-98000" +all_model_checkpoint_paths: "model.ckpt-99000" diff --git a/notebooks/data/nine_dreams/model.ckpt-99000.data-00000-of-00001 b/notebooks/data/nine_dreams/model.ckpt-99000.data-00000-of-00001 new file mode 100644 index 0000000..c150225 Binary files /dev/null and b/notebooks/data/nine_dreams/model.ckpt-99000.data-00000-of-00001 differ diff --git a/notebooks/data/nine_dreams/model.ckpt-99000.index b/notebooks/data/nine_dreams/model.ckpt-99000.index new file mode 100644 index 0000000..820df86 Binary files /dev/null and b/notebooks/data/nine_dreams/model.ckpt-99000.index differ diff --git a/notebooks/data/nine_dreams/model.ckpt-99000.meta b/notebooks/data/nine_dreams/model.ckpt-99000.meta new file mode 100644 index 0000000..735ecfd Binary files /dev/null and b/notebooks/data/nine_dreams/model.ckpt-99000.meta differ diff --git "a/notebooks/images/cats/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (1).jpeg" b/notebooks/images/cats/download (1).jpeg similarity index 100% rename from "notebooks/images/cats/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (1).jpeg" rename to notebooks/images/cats/download (1).jpeg diff --git "a/notebooks/images/cats/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (2).jpeg" b/notebooks/images/cats/download (2).jpeg similarity index 100% rename from "notebooks/images/cats/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (2).jpeg" rename to notebooks/images/cats/download (2).jpeg diff --git "a/notebooks/images/cats/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (3).jpeg" b/notebooks/images/cats/download (3).jpeg similarity index 100% rename from "notebooks/images/cats/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (3).jpeg" rename to notebooks/images/cats/download (3).jpeg diff --git "a/notebooks/images/cats/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263.jpeg" b/notebooks/images/cats/download.jpeg similarity index 100% rename from "notebooks/images/cats/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263.jpeg" rename to notebooks/images/cats/download.jpeg diff --git "a/notebooks/images/dogs/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (1).jpeg" b/notebooks/images/dogs/download (1).jpeg similarity index 100% rename from "notebooks/images/dogs/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (1).jpeg" rename to notebooks/images/dogs/download (1).jpeg diff --git "a/notebooks/images/dogs/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (2).jpeg" b/notebooks/images/dogs/download (2).jpeg similarity index 100% rename from "notebooks/images/dogs/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (2).jpeg" rename to notebooks/images/dogs/download (2).jpeg diff --git "a/notebooks/images/dogs/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (3).jpeg" b/notebooks/images/dogs/download (3).jpeg similarity index 100% rename from "notebooks/images/dogs/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (3).jpeg" rename to notebooks/images/dogs/download (3).jpeg diff --git "a/notebooks/images/dogs/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (4).jpeg" b/notebooks/images/dogs/download (4).jpeg similarity index 100% rename from "notebooks/images/dogs/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (4).jpeg" rename to notebooks/images/dogs/download (4).jpeg diff --git "a/notebooks/images/dogs/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (5).jpeg" b/notebooks/images/dogs/download (5).jpeg similarity index 100% rename from "notebooks/images/dogs/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (5).jpeg" rename to notebooks/images/dogs/download (5).jpeg diff --git "a/notebooks/images/dogs/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (6).jpeg" b/notebooks/images/dogs/download (6).jpeg similarity index 100% rename from "notebooks/images/dogs/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (6).jpeg" rename to notebooks/images/dogs/download (6).jpeg diff --git "a/notebooks/images/dogs/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (7).jpeg" b/notebooks/images/dogs/download (7).jpeg similarity index 100% rename from "notebooks/images/dogs/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263 (7).jpeg" rename to notebooks/images/dogs/download (7).jpeg diff --git "a/notebooks/images/dogs/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263.jpeg" b/notebooks/images/dogs/download.jpeg similarity index 100% rename from "notebooks/images/dogs/\341\204\203\341\205\241\341\204\213\341\205\256\341\206\253\341\204\205\341\205\251\341\204\203\341\205\263.jpeg" rename to notebooks/images/dogs/download.jpeg