# Run in colab and please Choose a GPU/TPU runtime in colab before preceed 
----
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/machine-learning-apps/Issue-Label-Bot/blob/master/notebooks/end_to_end_issue_labeler.ipynb)

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf

!pip install -q tensorflow-hub
import tensorflow_hub as hub
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, GRU, Dense, Embedding, BatchNormalization, Concatenate
from tensorflow.keras import  Model
from tensorflow.keras.optimizers import Adam
from google.cloud import bigquery


print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("Hub version: ", hub.__version__)
print("GPU is", "available" if tf.config.experimental.list_physical_devices("GPU") else "NOT AVAILABLE")

TensorFlow 2.x selected.
Version:  2.0.0
Eager mode:  True
Hub version:  0.7.0
GPU is available


### Provide your credentials to the runtime for bigquery purpose

In [2]:
from google.colab import auth
auth.authenticate_user()
print('Authenticated')

Authenticated


# Define some hyper-parameters

In [0]:
sample_count = 200000
labels = ['unknown',   'bug',  'feature',   'question']
num_classes=len(labels)
test_size=0.33
random_state=42

In [0]:
client = bigquery.Client(project='shopify-codelab-and-demos')


df = client.query(r"""
 with data as (
   SELECT
       LOWER(TRIM(REGEXP_REPLACE(JSON_EXTRACT(payload, '$.issue.title'), r"\\n|\\r|\(|\)|\[|\]|#|\*|`|\"", ' '))) as title
      , LOWER(TRIM(REGEXP_REPLACE(JSON_EXTRACT(payload, '$.issue.body'), r"\\n|\\r|\(|\)|\[|\]|#|\*|`|\"", ' '))) as body
      , REGEXP_EXTRACT_ALL(LOWER(TRIM(JSON_EXTRACT(payload, "$.issue.labels"))), ',"name\":"(.+?)","color') as labels
    FROM `githubarchive.year.20*`
    WHERE
    _TABLE_SUFFIX BETWEEN '16' and '18'
    and type="IssuesEvent"
    )
    select title, body, CASE when label like '%bug%' and label not like '%not bug%' then 1
                             when label like '%feature%' or label like '%enhancement%' or label like '%improvement%' or label like '%request%' then 2
                             when label like '%question%' or label like '%discussion%' then 3
                             else 0 end as y,
                         CASE when label like '%bug%' and label not like '%not bug%' then 'bug'
                             when label like '%feature%' or label like '%enhancement%' or label like '%improvement%' or label like '%request%' then 'feature'
                             when label like '%question%' or label like '%discussion%' then 'question'
                             else 'unknown' end as y_name
    from data, unnest(labels) label
    where
    ARRAY_LENGTH(SPLIT(body, ' ')) >= 6
    and ARRAY_LENGTH(SPLIT(title, ' ')) >= 3
    and ARRAY_LENGTH(SPLIT(title, ' ')) <= 50
    and ARRAY_LENGTH(SPLIT(body, ' ')) <= 1000
limit {sample_count}
  """.format(sample_count=sample_count)).to_dataframe() 

In [0]:
X = df[['title', 'body']]
y = df['y']

In [0]:

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=test_size, random_state=random_state)

In [7]:


# define pre-trained embedding
embedding = "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1"
hub_layer = hub.KerasLayer(embedding, input_shape=[], 
                           dtype=tf.string, trainable=True)

# define two sets of inputs
text = Input(shape=(2), dtype=tf.string, name="title_and_body")
 
title = hub_layer(text[:,0])[:, :, tf.newaxis]
body = hub_layer(text[:,1])[:, :, tf.newaxis]

data = Concatenate(axis=2, name='Concat')([body, title])


data = BatchNormalization()(data)
data = GRU(75, name='Title-Encoder')(data)

x = BatchNormalization()(data)
out = Dense(num_classes, activation='softmax')(x)

# our model will accept the inputs of the two branches and
# then output a single value
model = Model(inputs=text , outputs=out)
model.summary()


 

 

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
title_and_body (InputLayer)     [(None, 2)]          0                                            
__________________________________________________________________________________________________
tf_op_layer_strided_slice_2 (Te [(None,)]            0           title_and_body[0][0]             
__________________________________________________________________________________________________
tf_op_layer_strided_slice (Tens [(None,)]            0           title_and_body[0][0]             
__________________________________________________________________________________________________
keras_layer (KerasLayer)        (None, 20)           400020      tf_op_layer_strided_slice[0][0]  
                                                                 tf_op_layer_strided_slice_2[0

In [0]:
model.compile(optimizer=Adam(lr=0.001), 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

In [9]:
batch_size = 900
epochs = 4
history = model.fit(x=X_train.values, 
                    y=y_train.values,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=[X_test.values, y_test.values]
)

Train on 134000 samples, validate on 66000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [0]:
single_predict = lambda data: dict(zip(labels, model.predict([data])[0]))

In [11]:
single_predict(['allow layers to be added to the spiderfier aft...	', 'currently the spiderfier works with multiple l...	'])

{'bug': 0.04751177,
 'feature': 0.3074346,
 'question': 0.023045829,
 'unknown': 0.62200785}

In [12]:
single_predict(['requesting a button', 'It would be great to add a new button'])

{'bug': 0.016338741,
 'feature': 0.40941617,
 'question': 0.014603957,
 'unknown': 0.5596412}

In [0]:
model.save_weights('gs://pengyu-ml-test/issue_labler_e2d/weights')


# Bug in keras with tf-hub during save to saved_model

In [14]:
model.save('local')

AttributeError: ignored