In [12]:
import pickle
import numpy as np
import random
from tqdm import tqdm
import os
import os.path
from clear_texts import *

import tensorflow as tf


def loggin(log_str):
    print(log_str)
    
print(tf.VERSION)

1.1.0


In [13]:
#functions for generating traning sequenses, decodinn, encoding, text generation
text = ''.join(get_textes())

def get_encode_and_decode_dicts(text):
    later_counter = Counter()
    later_counter.update(text)
    alphabit = set(later_counter.keys())

    charter_to_inx = { ch:inx for inx,ch in enumerate(alphabit)}
    inx_to_charter = { inx:ch for ch,inx in charter_to_inx.items()}
    return charter_to_inx, inx_to_charter, alphabit


charter_to_inx, inx_to_charter, alphabit = get_encode_and_decode_dicts(text)
alphabit_size = len(alphabit)

def encode_seq(seq):
    return  np.array([ charter_to_inx[ch] for ch in seq ])

def decode_seq(seq):
    return "".join([ inx_to_charter[inx] for inx in seq ])


def sequenses_generator(text, batch_len, seq_length):
    while True:
        X = []
        while len(X) < batch_len:
            shift = np.random.randint(len(text)-seq_length)
            seq_in = text[shift:seq_length+shift]
            X.append(encode_seq(seq_in))
        yield np.array(X)

        
-

In [14]:
from os import listdir
from os.path import isfile, join

def rnn_placeholders(state):
    """Convert RNN state tensors to placeholders with the zero state as default."""
    if isinstance(state, tf.contrib.rnn.LSTMStateTuple):
        c, h = state
        c = tf.placeholder_with_default(c, c.shape, c.op.name)
        h = tf.placeholder_with_default(h, h.shape, h.op.name)
        return tf.contrib.rnn.LSTMStateTuple(c, h)
    elif isinstance(state, tf.Tensor):
        h = state
        h = tf.placeholder_with_default(h, h.shape, h.op.name)
        return h
    else:
        structure = [rnn_placeholders(x) for x in state]
        return tuple(structure)

class Model(object):
    def __init__(self, sess, seq_length, alphabit_size, verbas = True,  state_sizes=[128,128]):
        
        self.sess = sess
        
        self.state_sizes = state_sizes
        
        if verbas: loggin('Create NN')
        
        #data paceholder
        self.train_input = tf.placeholder(tf.int32, [None, seq_length])
        one_hot_input = tf.one_hot(self.train_input, alphabit_size)
        if verbas: loggin('rnn_cell input shape %s' % str(one_hot_input.get_shape()))
        
        #define weights and rnn cells
        #add LSTM cells
        def lstm_cell(state_size):
            return tf.contrib.rnn.BasicLSTMCell(state_size)
        
        cells = [lstm_cell(_) for _ in state_sizes ]
        self.rnn_cell = tf.contrib.rnn.MultiRNNCell(cells)
                
        #add output layer waights
        if verbas: loggin('rnn_cell output shape %s' % str(state_sizes[-1]))
                
        self.output_w = tf.get_variable(shape=(state_sizes[-1], alphabit_size),
                            initializer=tf.contrib.layers.xavier_initializer(), name = "out_w")
        
        self.output_b = tf.get_variable(shape=(alphabit_size),initializer=tf.constant_initializer(0.0), name = "out_b")

        if verbas: loggin('w shape %s, b shape %s' % (str(self.output_w.shape), str(self.output_b.shape)) ) 
            
        #1)deffine learning graph
        #forwarg pass
        with tf.variable_scope("rnn_layer"):
            lstm_output, lstm_states = tf.nn.dynamic_rnn(self.rnn_cell, one_hot_input, dtype = tf.float32)
        
        #we need only last 1/4
        start_position = seq_length//4
        count = seq_length-start_position-1
        
        trancated_lstm_output = tf.slice(lstm_output, begin = [0,start_position,0], size = [-1,count,-1])
        if verbas: loggin('trancated_lstm_output shape %s'% str( trancated_lstm_output.get_shape() ) )
        
        rnn_output = tf.reshape(trancated_lstm_output, [-1,state_sizes[-1]])
        if verbas: loggin('rnn_output shape %s'% str( rnn_output.get_shape() ) )
        
        output_layer = tf.nn.xw_plus_b(rnn_output,self.output_w,self.output_b)
        if verbas: loggin('output_layer shape %s'% str( output_layer.get_shape() ) )
        
        target = tf.reshape(tf.slice(one_hot_input, begin = [0,start_position+1,0], size = [-1,count,-1]),[-1,alphabit_size])
        if verbas: loggin('target shape %s'% str( target.get_shape() ) )
        
        
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=target, logits=output_layer))
        if verbas: loggin('self.loss shape %s'% str( self.loss.get_shape() ) )
        
        self.learning_rate = tf.placeholder(tf.float32)
        self.optimizer = tf.train.AdamOptimizer(learning_rate = self.learning_rate).minimize(self.loss)   
            
        #2)deffine acc func
        pred_classes = tf.argmax(output_layer, axis=1)
        y_classes = tf.argmax(target ,axis = 1)
            
        self.accuracy = tf.reduce_mean(tf.cast(tf.equal(pred_classes, y_classes), tf.float32))
        
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('accuracy', self.accuracy)
        self.merged_summary = tf.summary.merge_all()
        
        #3)predict next charter graph
        
        
        self.input = tf.placeholder(tf.int32)
        self.input_state = rnn_placeholders(self.rnn_cell.zero_state(1,tf.float32))
        
        rnn_input = tf.reshape(tf.one_hot(self.input, alphabit_size), [1,1,alphabit_size])
        with tf.variable_scope("rnn_layer"):
            tf.get_variable_scope().reuse_variables()
            predict_rnn_output, self.output_state = tf.nn.dynamic_rnn(self.rnn_cell,
                                                                rnn_input, dtype = tf.float32,
                                                                initial_state = self.input_state )
        
        predict_rnn_output = tf.reshape(predict_rnn_output, [-1,state_sizes[-1]])
        
        predict_output_layer = tf.nn.xw_plus_b(predict_rnn_output,self.output_w,self.output_b) 
        
        self.predict_output = tf.nn.softmax(predict_output_layer)
        
        
        
        
    def train_on_batch(self, batch, learning_rate = 1e-3):
        feed = {self.train_input: batch,self.learning_rate:learning_rate}
        return self.sess.run([ self.loss, self.optimizer ], feed)[0]
    
    def get_loss(self, batch):
        feed = {self.train_input: batch}
        return self.sess.run(self.loss, feed)
    
    def get_accuracy(self, batch):
        feed = {self.train_input: batch}
        return self.sess.run(self.accuracy, feed)
    
    def get_summary(self, batch):
        feed = {self.train_input: batch}
        return self.sess.run(self.merged_summary, feed)
    
    def get_accuracy_for_seq(self,seq):
        encoded = encode_seq(seq)
        predicted = []
        self.reset_state()
        for _ in encoded:
            predicted.append( self.step(_) )
        eq_count = sum( [ int(_[1] == _[0].argmax()) for _ in zip(predicted[:-1], list(encoded)[1:]) ] )
        return eq_count/len(predicted)
    
    def step(self ,ch_inx):
        feed = {self.input: ch_inx, self.input_state: self.curent_state}
        out, self.curent_state = self.sess.run([self.predict_output, self.output_state], feed)
        return out
        
    
    def reset_state(self):
        self.curent_state = self.sess.run(self.rnn_cell.zero_state(1,tf.float32))
      
    def __deffault_fname__(self):
        fname = "statesize-%s-cellcount-%s-.ckpt" % (
            str(self.state_sizes[-1]),str(len(self.state_sizes)) )
        return fname
        
    def save(self, fname = None, loss = None):
        savepath = "model_waights"
        
        if not fname: fname = self.__deffault_fname__()
        if loss: fname = "loss-%s-"%(str(loss))+fname
        fname = join(savepath, fname)
        saver = tf.train.Saver()
        save_path = saver.save(self.sess, fname)
        print("Model saved in file: %s" % save_path)
        
    def load(self, fname = None):
        savepath = "model_waights"
        
        if not fname: fname = self.__deffault_fname__()
        fname = join(savepath, fname)
        saver = tf.train.Saver()
        saver.restore(self.sess, fname)

    def load_best(self):
        savepath = "model_waights"
        files = [f for f in listdir(savepath) if isfile(join(savepath, f))]
        
        all_checkpoint_parameters = []
        for fname in files:
            checkpoint_parameters = {"fname":fname}
            parts_of_name = fname.split('-')
            while len(parts_of_name)>1:
                key = parts_of_name[0]
                value = parts_of_name[1]
                checkpoint_parameters[key] = value
                parts_of_name = parts_of_name[2:]
                all_checkpoint_parameters.append(checkpoint_parameters)
                
        all_checkpoint_parameters = [par for par in all_checkpoint_parameters
                                     if "statesize" in par and "cellcount" in par and "loss" in par  ]
        
        all_checkpoint_parameters = [par for par in all_checkpoint_parameters
                                     if int(par["statesize"])==self.state_sizes[-1] and int(par["cellcount"])==len(self.state_sizes) ]       
         
        if not len(all_checkpoint_parameters):
            loggin('No checkpoints for this model')
            return
        
        all_checkpoint_parameters = sorted(all_checkpoint_parameters, key=lambda x: float(x['loss']))  
        fname = all_checkpoint_parameters[0]['fname'].split('.ckpt')[0]+'.ckpt'
        loggin('loaded from %s' % fname)
        self.load(fname)
        
    

In [22]:
def train(model, eps_count = 10, batch_len = 256, batchs_in_ep = 256, learning_rate = 1e-3):
    
    print('gen:+',decode_seq(generate(model, 100)),'+')
    
    texts = get_textes()
    random.shuffle(texts)
    train_text = ''.join(texts[50:])
    test_text = ''.join(texts[:50])
    seed = np.random.randint(100000)
    np.random.seed(123)
    x_for_loss_check = sequenses_generator(test_text, 512, seq_length).__next__()
    np.random.seed(seed)
   
    data_gen = sequenses_generator(train_text, batch_len, seq_length)
    
    min_loss = model.get_loss(x_for_loss_check)
    
    for ep in range(eps_count):
        for batches_processed in tqdm(range(batchs_in_ep)):
            train_x = data_gen.__next__()
            train_loss = model.train_on_batch(train_x, learning_rate = learning_rate)
        
        loss = model.get_loss(x_for_loss_check)
        acc = model.get_accuracy_for_seq(test_text)
        
        if min_loss > loss:
            min_loss = loss
            model.save(loss = loss)
            
        print('ep %s acc %s, last loss %s, train_loss: %s' % ( ep,str(acc), str(loss), str(train_loss) ) )
        print('gen:+',decode_seq(generate(model, 100)),'+')

In [None]:
seq_length = 200

eps_count = 20
batch_len = 256
batchs_in_ep = 64

state_sizes = [512,512,512]

learning_rate = 1e-3

new_model = True

with tf.Graph().as_default():
    with tf.Session() as sess:
        
        model = Model(sess, seq_length = seq_length, verbas = True,
                      alphabit_size = alphabit_size, state_sizes = state_sizes)
        
        sess.run(tf.global_variables_initializer())
        if not new_model: model.load_best()
        
        train(model, eps_count = eps_count, batch_len = batch_len, batchs_in_ep = batchs_in_ep, learning_rate = learning_rate)
        

Create NN
rnn_cell input shape (?, 200, 41)
rnn_cell output shape 512
w shape (512, 41), b shape (41,)
trancated_lstm_output shape (?, 49, 512)
rnn_output shape (?, 512)
output_layer shape (?, 41)
target shape (?, 41)
self.loss shape ()
gen:+ !!!! +



  0%|          | 0/64 [00:00<?, ?it/s][A
100%|██████████| 64/64 [01:34<00:00,  1.48s/it]


Model saved in file: model_waights/loss-3.1711-statesize-512-cellcount-3-.ckpt
ep 0 acc 0.16497626000312074, last loss 3.1711, train_loss: 3.17264


  0%|          | 0/64 [00:00<?, ?it/s]

gen:+  оо                                                                                                  +


100%|██████████| 64/64 [01:34<00:00,  1.49s/it]


Model saved in file: model_waights/loss-3.14989-statesize-512-cellcount-3-.ckpt
ep 1 acc 0.16589019415527964, last loss 3.14989, train_loss: 3.13962


  0%|          | 0/64 [00:00<?, ?it/s]

gen:+  ее                                                                                                  +


100%|██████████| 64/64 [01:34<00:00,  1.48s/it]


Model saved in file: model_waights/loss-2.95717-statesize-512-cellcount-3-.ckpt
ep 2 acc 0.1973874857894385, last loss 2.95717, train_loss: 2.9366


  0%|          | 0/64 [00:00<?, ?it/s]

gen:+ о оое ое ое ое ое ое ое ое ое ое ое ое ое ое ое ое ое ое ое ое ое ое ое ое ое ое ое ое ое ое ое ое о +


100%|██████████| 64/64 [01:34<00:00,  1.48s/it]


Model saved in file: model_waights/loss-2.7486-statesize-512-cellcount-3-.ckpt
ep 3 acc 0.22052562359287578, last loss 2.7486, train_loss: 2.75367


  0%|          | 0/64 [00:00<?, ?it/s]

gen:+ поее ие ие ое ое ое ое ое о ое ое о ое ое о ое ое о ое ое о ое ое о ое ое о ое ое о ое ое о ое ое о  +


100%|██████████| 64/64 [01:34<00:00,  1.48s/it]


Model saved in file: model_waights/loss-2.56147-statesize-512-cellcount-3-.ckpt
ep 4 acc 0.24593745123826932, last loss 2.56147, train_loss: 2.56379


  0%|          | 0/64 [00:00<?, ?it/s]

gen:+ проос сорет серовной с серот с серова с серова с серова с серова с серова с серова с серова с серова +


100%|██████████| 64/64 [01:34<00:00,  1.48s/it]


Model saved in file: model_waights/loss-2.48602-statesize-512-cellcount-3-.ckpt
ep 5 acc 0.262499721361539, last loss 2.48602, train_loss: 2.48435


  0%|          | 0/64 [00:00<?, ?it/s]

gen:+ рроорет серотной с серотной с серотной с серотной с серотной с серотной с серотной с серотной с серо +


100%|██████████| 64/64 [01:34<00:00,  1.48s/it]


Model saved in file: model_waights/loss-2.42744-statesize-512-cellcount-3-.ckpt
ep 6 acc 0.2846793428590535, last loss 2.42744, train_loss: 2.37958


  0%|          | 0/64 [00:00<?, ?it/s]

gen:+ роортем сорет с серовной стероть с стем с стем с стем с стем с стем с стем с стем с стем с стем с ст +


100%|██████████| 64/64 [01:34<00:00,  1.48s/it]


Model saved in file: model_waights/loss-2.33489-statesize-512-cellcount-3-.ckpt
ep 7 acc 0.30170972559684356, last loss 2.33489, train_loss: 2.32403


  0%|          | 0/64 [00:00<?, ?it/s]

gen:+ роедт салной поледа не стороть и полет не стореть и полет не стореть и полет не стореть и полет не с +


  5%|▍         | 3/64 [00:04<01:32,  1.51s/it]

In [None]:
seq_length = 100
state_sizes = [512,512,512]
with tf.Graph().as_default():
    with tf.Session() as sess:
        
        model = Model(sess, seq_length = seq_length, verbas = True,
                      alphabit_size = alphabit_size, state_sizes = state_sizes)
        
        sess.run(tf.global_variables_initializer())
        model.load_best()
        
        print('generate random poetry:')
        print('gen:+',decode_seq(generate(model, 10000, sampling = False, pattern = "о любви.")),'+')