# Model Architecture

In [1]:
from keras.layers import (Input,
Embedding, BatchNormalization, GRU, Dense,
merge, TimeDistributed)
from keras.models import Model
from keras import optimizers

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [10]:
LATENT_DIM = 512
BODY_LENGTH = 100
TITLE_LENGTH = 10
NUM_ENCODER_TOKENS = 10000
NUM_DECODER_TOKENS = 1000

#### Encoder ####
encoder_inputs = Input(shape=(BODY_LENGTH,), name='Encoder-Input')

x = Embedding(NUM_ENCODER_TOKENS, LATENT_DIM, mask_zero=False, name='Body-Word-Embedding')(encoder_inputs)
x = BatchNormalization(name='Encoder-BatchNorm-1')(x)

_, state_h = GRU(LATENT_DIM, return_state=True, name='Encoder-Last-GRU')(x)
    
encoder_model = Model(encoder_inputs, state_h, name='Encoder-Model')

encoder_out = encoder_model(encoder_inputs)

#### Decoder ####
decoder_inputs = Input(shape=(None,), name='Decoder-Input')

x = Embedding(NUM_DECODER_TOKENS, LATENT_DIM, mask_zero=False, name='Title-Word-Embedding')(decoder_inputs)
x = BatchNormalization(name='Decoder-BatchNorm-1')(x)

decoder_gru = GRU(LATENT_DIM, return_state=True, return_sequences=True, name='Decoder-GRU')

# decoder_gru_state for extraction later
decoder_gru_out, decoder_gru_state = decoder_gru(x, initial_state=encoder_out)

x = TimeDistributed(BatchNormalization(), name='Decoder-BatchNorm-2')(decoder_gru_out)

decoder_out = Dense(NUM_DECODER_TOKENS, activation='softmax', name='Final-Output-Dense')(x)

#### Seq2Seq Model ####
seq2seq_Model = Model([encoder_inputs, decoder_inputs], decoder_out, name='Seq2Seq-Model')

seq2seq_Model.compile(optimizer=optimizers.Nadam(lr=1e-3), loss='sparse_categorical_crossentropy')

seq2seq_Model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Decoder-Input (InputLayer)      (None, None)         0                                            
__________________________________________________________________________________________________
Title-Word-Embedding (Embedding (None, None, 512)    512000      Decoder-Input[0][0]              
__________________________________________________________________________________________________
Encoder-Input (InputLayer)      (None, 100)          0                                            
__________________________________________________________________________________________________
Decoder-BatchNorm-1 (BatchNorma (None, None, 512)    2048        Title-Word-Embedding[0][0]       
__________________________________________________________________________________________________
Encoder-Mo

In [4]:
from keras.utils import plot_model
plot_model(seq2seq_Model, to_file='model.png')

# Train Model

In [5]:
# TODO: read by chunks
# import pandas as pd

# df = pd.read_csv('github_issues.csv').sample(n=10)
# SAMPLE_BODIES = df.body.tolist()
# SAMPLE_TITLES = df.issue_title.tolist()

In [6]:
SAMPLE_TITLES

['user can read about the product features.',
 'suggestion lapis lazuli tools, armor, etc.',
 'system.exception: fake exception',
 'update hotreloading on the fly',
 'macos: undoing a folder delete does not report folder as added',
 'drivers/periph_common : question about init.c',
 'rogue slight improvement to rogue_assassination_t19m_nh profile',
 'indicate issues referenced by now-merged prs?',
 'optimizing download procedure',
 'when bolus fails to deliver with okay to try again - bolus amount is 0.00']

In [7]:
from IPython.display import Audio, display
from Helpers import load_tokenizer
from keras.callbacks import Callback

tk_body = load_tokenizer('tk_body.dpkl')
tk_title = load_tokenizer('tk_title.dpkl')


class LossHistory(Callback):
    def on_epoch_end(self, epoch, logs):
        display(Audio(filename='notification.mp3', autoplay=True))
        print('\n')
        print('epoch: {}'.format(epoch))
        print('val_loss: {}'.format(logs.get('val_loss')))
        
        predictor = Predictor(seq2seq_Model, tk_body, tk_title, BODY_LENGTH, 12)
        
        for body in SAMPLE_BODIES:
            title = predictor.create_title(body)
            print('Generated title: {}'.format(title))

Size of vocabulary for tk_body.dpkl: 10000
Size of vocabulary for tk_title.dpkl: 1000


In [8]:
from Helpers import load_encoder_inputs, load_decoder_inputs

encoder_input_data, _ = load_encoder_inputs('train_body_vecs.npy')
decoder_input_data, decoder_target_data = load_decoder_inputs('train_title_vecs.npy')

Shape of encoder input: (477000, 100)
Shape of decoder input: (477000, 9)
Shape of decoder target: (477000, 9)


In [11]:
# Testing out class LossHistory
from predict import Predictor

predictor = Predictor(seq2seq_Model, tk_body, tk_title, BODY_LENGTH, TITLE_LENGTH)
print(predictor.create_title(SAMPLE_BODIES[0]))
print(predictor.create_title(SAMPLE_BODIES[1]))
print(predictor.create_title(SAMPLE_BODIES[2]))

reports resolve rules disable job commit well 1 analysis still
rules filter long design symbol projects focus options generate activity
unit available top top ðŸš¨ lines detect record invalid icon


In [13]:
SAMPLE_TITLES

['user can read about the product features.',
 'suggestion lapis lazuli tools, armor, etc.',
 'system.exception: fake exception',
 'update hotreloading on the fly',
 'macos: undoing a folder delete does not report folder as added',
 'drivers/periph_common : question about init.c',
 'rogue slight improvement to rogue_assassination_t19m_nh profile',
 'indicate issues referenced by now-merged prs?',
 'optimizing download procedure',
 'when bolus fails to deliver with okay to try again - bolus amount is 0.00']

In [12]:
import numpy as np
from keras.callbacks import ModelCheckpoint

batch_size = 1024
epochs = 10
history = LossHistory()
checkpointer = ModelCheckpoint(filepath='/tmp/github.{epoch:02d}-{val_loss:.2f}.h5',
                               verbose=1)

seq2seq_Model.fit([encoder_input_data, decoder_input_data], np.expand_dims(decoder_target_data, -1),
                   batch_size=batch_size,
                   epochs=epochs,
                   validation_split=0.10, 
                   callbacks=[history, checkpointer])

Train on 429300 samples, validate on 47700 samples
Epoch 1/10



epoch: 0
val_loss: 4.2807588551679245
Generated title: user can i to the of the in the and it to
Generated title: for the of the of the is the and it to the
Generated title: system my program test crash on 2 0 1 2 2 and
Generated title: support for for and in the of the for the and it
Generated title: issue when is not in the of the and it to the
Generated title: the of the is the the is the and it to what
Generated title: the of the is the the is the and it for 2
Generated title: add to the for the of the and it to the and
Generated title: feature request for auto for and in the and it to the
Generated title: i can i this to the in the 2 2 and 2
Epoch 00001: saving model to /tmp/github.01-4.28.h5


  str(node.arguments) + '. They will not be included '


Epoch 2/10



epoch: 1
val_loss: 2.0480287576071623
Generated title: add to user interface to add to the list of and to
Generated title: config not working for config file is not in the and 2
Generated title: exception exception exception exception while running test program test crash with 2
Generated title: support for server restart server in core settings in the server and
Generated title: folder does not delete folder when folder is closed in the project
Generated title: why do i have a device startup on startup and 1 1
Generated title: add to the id of the same time as a and i
Generated title: add to the readme md file to be used for and 2
Generated title: download media files for media files are not by id and or
Generated title: 2 0 2 failure to get the status of the and 2
Epoch 00002: saving model to /tmp/github.02-2.05.h5
Epoch 3/10



epoch: 2
val_loss: 2.051051321149622
Generated title: add a to the user section to the list of and users
Generated title: bug config doesn't appear to be changed in the server and 2
Generated title: system exception exception exception exception unknown string string string string string android
Generated title: restart server restart update server restart server restart server settings for development
Generated title: folder does not close when folder is deleted or or in 2
Generated title: why do we need the devices on startup and the server and
Generated title: add a to the of the image id in the and the
Generated title: add support for and open files in the editor and 2017 at
Generated title: auto download media files for media files are not working with and
Generated title: failed to start after 2 0 0 0 and 2 4 2
Epoch 00003: saving model to /tmp/github.03-2.05.h5
Epoch 4/10



epoch: 3
val_loss: 2.0886573040360426
Generated title: user can see a list of users to add to a list
Generated title: bug in config doesn't seem to be working properly for 5 and
Generated title: system exception exception exception thread exception not found for c project 1
Generated title: add support for server restart update server restart settings for c c
Generated title: folder folder after close event folder on windows 7 8 9 15
Generated title: why are devices in the devices startup script on windows 10 12
Generated title: add a new command to the same id as i have to
Generated title: issues with and other issues than the same as and i have
Generated title: media files should be auto download files by default and should be
Generated title: error when trying to send to an error at the of the
Epoch 00004: saving model to /tmp/github.04-2.09.h5
Epoch 5/10



epoch: 4
val_loss: 2.126324919634645
Generated title: add a user to the search bar for users and their their
Generated title: bug report doesn't appear in the config file for the and c
Generated title: system exception exception exception thread thread exception error failed to resolve issue
Generated title: add a server restart to the core script php line and module
Generated title: folder delete folder on windows when is deleted on 4 9 2
Generated title: why the device startup on startup on the device and the server
Generated title: adding a image to the same id causes the to the and
Generated title: add to the readme and remove them from the project and all
Generated title: auto download media files for media files by users should be and
Generated title: 2 0 4 6 to 2 0 0 and 2 4 Ð²
Epoch 00005: saving model to /tmp/github.05-2.13.h5
Epoch 6/10



epoch: 5
val_loss: 2.1849902292167616
Generated title: allow users to specify the type of the to and add all
Generated title: bug report doesn't appear as an option for all and users need
Generated title: system exception exception exception has no attribute to be used with your
Generated title: add server restart support for update server core js and all users
Generated title: folder delete folder after each other in test case and c backend
Generated title: why the device so must be used in the 6 but no
Generated title: id is too long the of the image is in and c
Generated title: add ability to open in the browser and in the and all
Generated title: auto download files or media files or directory open etc 2017 2
Generated title: 2 2 0 failure to get to 0 2 and 2 1
Epoch 00006: saving model to /tmp/github.06-2.18.h5
Epoch 7/10



epoch: 6
val_loss: 2.259371830042553
Generated title: add a column to the user page and add data and all
Generated title: bug config display for current section at 1 2 1 and 2
Generated title: system exception exception when is true and my ssl and i use
Generated title: allow to update server restart to server restart like or server i
Generated title: folder delete folder when is deleted or other when is as i
Generated title: why does the device why not why so 6 but can't use
Generated title: adding causes to causes to crash on load table with in c
Generated title: document the ability to open and remove them from all users need
Generated title: auto download media files or download media file from server and users
Generated title: 2 failed to get back to the new error after step 4
Epoch 00007: saving model to /tmp/github.07-2.26.h5
Epoch 8/10



epoch: 7
val_loss: 2.3493065726482145
Generated title: add a column to the theme for the and and all data
Generated title: bug report config appear as invalid in current environment variable and aws
Generated title: system exception exception exception thread undefined error symbol values to be 2
Generated title: suggestion server restart require updates of server restart like web app management
Generated title: folder when folder is deleted during tests or deleted on 4 13
Generated title: why the device will be used in the devices but do not
Generated title: adding id causes to table of image is not well etc d
Generated title: document how to use this repo to the and and idea 2017
Generated title: auto download media files or video files should be from server db
Generated title: 2 failed to get error after to the 2 4 4 13
Epoch 00008: saving model to /tmp/github.08-2.35.h5
Epoch 9/10



epoch: 8
val_loss: 2.4248258431952454
Generated title: add to the user interface and place data in the section 3
Generated title: bug for config doesn't appear to work at 1 2 9 6
Generated title: system exception exception exception when i expected but found but i should
Generated title: server restart should not require restart of server to server so it's
Generated title: folder delete folder when a folder is deleted by cli and 9
Generated title: why does your work on startup on os x but 2 12
Generated title: adding causes to the id causes a crash in editor 2 4
Generated title: document how to handle errors in the database and users need help
Generated title: auto download media files for a video service user and password and
Generated title: 2 failed to get back to the user 4 and 15 4
Epoch 00009: saving model to /tmp/github.09-2.42.h5
Epoch 10/10



epoch: 9
val_loss: 2.521794643422093
Generated title: user can see a non projects that have a and do them
Generated title: broken for 1 2 1 at their than 1 0 4 Ð²
Generated title: system exception exception when pass object has no attribute to be block
Generated title: server restart lock to the server restart of the to server io
Generated title: folder should deleted when running on a directory or another cli c
Generated title: why the device startup are by the devices in 6 4 and
Generated title: adding causes to crash on table elements when adding to c c
Generated title: add support for issues to resolve issues on the web development environment
Generated title: auto media files download media files download page only auto media audio
Generated title: 2 failed to get to after a of and is 4 2
Epoch 00010: saving model to /tmp/github.10-2.52.h5


<keras.callbacks.History at 0x7f17181baf28>

# See Results

In [12]:
del seq2seq_Model

In [31]:
import os
from keras.models import load_model

records = []

SAMPLE_BODIES = [
    """Describe the problem

Using tensorflow EagerExecution. According to the documentation :

During eager execution the lifetime of state objects is determined by the lifetime of their corresponding Python object.
I have some trouble with how tensorflow handle memory. I would like to remove tensors from my memory after each iteration on this toy example. The results are shown in the chart.

I have tried with Variables and with simple tensors. tf.assign doesn't do the job. More and more memory is used. It might be normal in order to be able to compute the gradient. But, if I apply some dummy optimizer at the end of each iteration, the memory isn't not released (more precisely, it happens sometimes but the global trend is that the memory use is growing).

I haven't found any API to deal with that yet."""
]

models_dir = '/tmp/github'
for modelFN in os.listdir(models_dir):
    print('\n%s:\n' % modelFN)
    d = {modelFN: []}
    p = Predictor(load_model(os.path.join(models_dir, modelFN)), tk_body, tk_title, BODY_LENGTH, TITLE_LENGTH)
    for body in SAMPLE_BODIES:
        t = p.create_title(body)
        print(t)
        d[modelFN].append(t)
    records.append(d)


github.03-2.05.h5:

memory usage of memory usage after using memory usage with

github.06-2.18.h5:

memory when using with the same as the solution and

github.08-2.35.h5:

memory when using with and after i restart my i

github.07-2.26.h5:

memory when using as a dependency of the project and

github.05-2.13.h5:

memory usage with the same as the solution of and

github.09-2.42.h5:

memory when using as a dependency of the project and

github.01-4.28.h5:

when is not to the of the is it to

github.02-2.05.h5:

memory in memory usage with different than the same as

github.04-2.09.h5:

memory when using memory usage with the same as and

github.10-2.52.h5:

memory after to load for testing with a function for
