In [1]:
import json
import re
import sentencepiece as spm

In [2]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

In [3]:
from prepro_utils import preprocess_text, encode_ids, encode_pieces

sp_model = spm.SentencePieceProcessor()
sp_model.Load('tiny-bert-v1/sp10m.cased.bert.model')

with open('tiny-bert-v1/sp10m.cased.bert.vocab') as fopen:
    v = fopen.read().split('\n')[:-1]
v = [i.split('\t') for i in v]
v = {i[0]: i[1] for i in v}

class Tokenizer:
    def __init__(self, v):
        self.vocab = v
        pass
    
    def tokenize(self, string):
        return encode_pieces(sp_model, string, return_unicode=False, sample=False)
    
    def convert_tokens_to_ids(self, tokens):
        return [sp_model.PieceToId(piece) for piece in tokens]
    
    def convert_ids_to_tokens(self, ids):
        return [sp_model.IdToPiece(i) for i in ids]
    
tokenizer = Tokenizer(v)

In [4]:
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization
from bert import modeling
import numpy as np
import json
import tensorflow as tf
import itertools
from unidecode import unidecode
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import re




In [5]:
BERT_INIT_CHKPNT = 'tiny-bert-v1/model.ckpt'
BERT_CONFIG = 'tiny-bert-v1/config.json'

In [6]:
from rules import normalized_chars
import random

laughing = {
    'huhu',
    'haha',
    'gagaga',
    'hihi',
    'wkawka',
    'wkwk',
    'kiki',
    'keke',
    'huehue',
    'hshs',
    'hoho',
    'hewhew',
    'uwu',
    'sksk',
    'ksks',
    'gituu',
    'gitu',
    'mmeeooww',
    'meow',
    'alhamdulillah',
    'muah',
    'mmuahh',
    'hehe',
    'salamramadhan',
    'happywomensday',
    'jahagaha',
    'ahakss',
    'ahksk'
}

def make_cleaning(s, c_dict):
    s = s.translate(c_dict)
    return s

def cleaning(string):
    """
    use by any transformer model before tokenization
    """
    string = unidecode(string)
    
    string = ' '.join(
        [make_cleaning(w, normalized_chars) for w in string.split()]
    )
    string = re.sub('\(dot\)', '.', string)
    string = (
        re.sub(re.findall(r'\<a(.*?)\>', string)[0], '', string)
        if (len(re.findall(r'\<a (.*?)\>', string)) > 0)
        and ('href' in re.findall(r'\<a (.*?)\>', string)[0])
        else string
    )
    string = re.sub(
        r'\w+:\/{2}[\d\w-]+(\.[\d\w-]+)*(?:(?:\/[^\s/]*))*', ' ', string
    )
    
    chars = '.,/'
    for c in chars:
        string = string.replace(c, f' {c} ')
        
    string = re.sub(r'[ ]+', ' ', string).strip().split()
    string = [w for w in string if w[0] != '@']
    x = []
    for word in string:
        word = word.lower()
        if any([laugh in word for laugh in laughing]):
            if random.random() >= 0.5:
                x.append(word)
        else:
            x.append(word)
    string = [w.title() if w[0].isupper() else w for w in x]
    return ' '.join(string)

In [7]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [8]:
labels = """
1. severe toxic
2. obscene
3. identity attack
4. insult
5. threat
6. asian
7. atheist
8. bisexual
9. black
10. buddhist
11. christian
12. female
13. heterosexual
14. indian
15. homosexual, gay or lesbian
16. intellectual or learning disability
17. jewish
18. latino
19. male
20. muslim
21. other disability
22. other gender
23. other race or ethnicity
24. other religion
25. other sexual orientation
26. physical disability
27. psychiatric or mental illness
28. transgender
29. white
30. malay
31. chinese
"""
labels = [l.split('. ')[1].strip() for l in labels.split('\n') if len(l)]
labels

['severe toxic',
 'obscene',
 'identity attack',
 'insult',
 'threat',
 'asian',
 'atheist',
 'bisexual',
 'black',
 'buddhist',
 'christian',
 'female',
 'heterosexual',
 'indian',
 'homosexual, gay or lesbian',
 'intellectual or learning disability',
 'jewish',
 'latino',
 'male',
 'muslim',
 'other disability',
 'other gender',
 'other race or ethnicity',
 'other religion',
 'other sexual orientation',
 'physical disability',
 'psychiatric or mental illness',
 'transgender',
 'white',
 'malay',
 'chinese']

In [9]:
import glob

files = glob.glob('../toxicity/translated*')
files

['../toxicity/translated-1750000.json',
 '../toxicity/translated-1450000.json',
 '../toxicity/translated-700000.json',
 '../toxicity/translated-350000.json',
 '../toxicity/translated-600000.json',
 '../toxicity/translated-900000.json',
 '../toxicity/translated-1000000.json',
 '../toxicity/translated-1100000.json',
 '../toxicity/translated-550000.json',
 '../toxicity/translated-150000.json',
 '../toxicity/translated-500000.json',
 '../toxicity/translated-1500000.json',
 '../toxicity/translated-1150000.json',
 '../toxicity/translated-750000.json',
 '../toxicity/translated-850000.json',
 '../toxicity/translated-1650000.json',
 '../toxicity/translated-300000.json',
 '../toxicity/translated-650000.json',
 '../toxicity/translated-950000.json',
 '../toxicity/translated-250000.json',
 '../toxicity/translated-1600000.json',
 '../toxicity/translated-0.json',
 '../toxicity/translated-1550000.json',
 '../toxicity/translated-1800000.json',
 '../toxicity/translated-450000.json',
 '../toxicity/transl

In [10]:
X, Y = [], []

for file in files:
    print(file)
    with open(file) as fopen:
        f = json.load(fopen)
        for row in f:
            if len(row[1]) == 29:
                X.append(row[0])
                Y.append(row[1] + [0, 0])
        
    
len(X)

../toxicity/translated-1750000.json
../toxicity/translated-1450000.json
../toxicity/translated-700000.json
../toxicity/translated-350000.json
../toxicity/translated-600000.json
../toxicity/translated-900000.json
../toxicity/translated-1000000.json
../toxicity/translated-1100000.json
../toxicity/translated-550000.json
../toxicity/translated-150000.json
../toxicity/translated-500000.json
../toxicity/translated-1500000.json
../toxicity/translated-1150000.json
../toxicity/translated-750000.json
../toxicity/translated-850000.json
../toxicity/translated-1650000.json
../toxicity/translated-300000.json
../toxicity/translated-650000.json
../toxicity/translated-950000.json
../toxicity/translated-250000.json
../toxicity/translated-1600000.json
../toxicity/translated-0.json
../toxicity/translated-1550000.json
../toxicity/translated-1800000.json
../toxicity/translated-450000.json
../toxicity/translated-50000.json
../toxicity/translated-1050000.json
../toxicity/translated-1200000.json
../toxicity/tr

1401054

In [11]:
rejected_labels = ['black', 'white', 'jewish', 'latino']
[labels.index(l) for l in rejected_labels]
labels = [l for l in labels if l not in rejected_labels]

In [12]:
ydf = pd.DataFrame(np.array(Y))
ydf = ydf.loc[(ydf[8] == 0) & (ydf[28] == 0) & (ydf[16] == 0) & (ydf[17] == 0)]
ydf = ydf.drop([8, 28, 16, 17], axis = 1)
ix = ydf.index.tolist()
Y = ydf.values.tolist()

In [13]:
X = [X[i] for i in ix]
len(X), len(Y)

(1361040, 1361040)

In [14]:
mapping = {'severe_toxic': 'severe toxic', 'identity_hate': 'identity attack',
          'toxic': 'severe toxic', 'melayu': 'malay', 'cina': 'chinese', 'india': 'indian'}

In [15]:
def generate_onehot(tags, depth = len(labels)):
    onehot = [0] * depth
    for tag in tags:
        onehot[labels.index(tag)] = 1
    return onehot

In [16]:
with open('../toxicity/kaum.json') as fopen:
    kaum = json.load(fopen)
    
for k, v in kaum.items():
    print(k, len(v))

melayu 84851
cina 43956
india 20208


In [17]:
with open('../toxicity/weak-learning-toxicity.json') as fopen:
    scores = json.load(fopen)
    
for k, v in scores.items():
    for no in range(len(v)):
        tags = []
        for l, v_ in v[no].items():
            if round(v_) == 1:
                tags.append(mapping.get(l, l))
        tags.append(mapping[k])
        Y.append(generate_onehot(tags))
        X.append(kaum[k][no])

In [18]:
len(X), len(Y)

(1510055, 1510055)

In [19]:
from tqdm import tqdm

for i in tqdm(range(len(X))):
    X[i] = cleaning(X[i])

100%|██████████| 1510055/1510055 [04:43<00:00, 5317.97it/s] 


In [20]:
actual_t, actual_l = [], []

for i in tqdm(range(len(X))):
    if len(X[i]) > 2:
        actual_t.append(X[i])
        actual_l.append(Y[i])

100%|██████████| 1510055/1510055 [00:01<00:00, 1354246.28it/s]


In [21]:
from tqdm import tqdm

MAX_SEQ_LENGTH = 150
input_ids, input_masks, segment_ids = [], [], []

for text in tqdm(actual_t):
    tokens_a = tokenizer.tokenize(text)
    if len(tokens_a) > MAX_SEQ_LENGTH - 2:
        tokens_a = tokens_a[:(MAX_SEQ_LENGTH - 2)]
    tokens = ["[CLS]"] + tokens_a + ["[SEP]"]
    segment_id = [0] * len(tokens)
    input_id = tokenizer.convert_tokens_to_ids(tokens)
    input_mask = [1] * len(input_id)
    padding = [0] * (MAX_SEQ_LENGTH - len(input_id))
    input_id += padding
    input_mask += padding
    segment_id += padding
    
    input_ids.append(input_id)
    input_masks.append(input_mask)
    segment_ids.append(segment_id)

100%|██████████| 1502314/1502314 [08:28<00:00, 2953.18it/s] 


In [22]:
bert_config = modeling.BertConfig.from_json_file(BERT_CONFIG)

epoch = 10
batch_size = 60
warmup_proportion = 0.1
num_train_steps = int(len(actual_t) / batch_size * epoch)
num_warmup_steps = int(num_train_steps * warmup_proportion)




In [23]:
def create_initializer(initializer_range=0.02):
    return tf.truncated_normal_initializer(stddev=initializer_range)

class Model:
    def __init__(
        self,
        dimension_output,
        learning_rate = 2e-5,
        training = True
    ):
        self.X = tf.placeholder(tf.int32, [None, None])
        self.MASK = tf.placeholder(tf.int32, [None, None])
        self.Y = tf.placeholder(tf.float32, [None, dimension_output])
        
        model = modeling.BertModel(
            config=bert_config,
            is_training=training,
            input_ids=self.X,
            input_mask=self.MASK,
            use_one_hot_embeddings=False)
        
        output_layer = model.get_sequence_output()
        output_layer = tf.layers.dense(
            output_layer,
            bert_config.hidden_size,
            activation=tf.tanh,
            kernel_initializer=create_initializer())
        self.logits_seq = tf.layers.dense(output_layer, dimension_output,
                                         kernel_initializer=create_initializer())
        self.logits_seq = tf.identity(self.logits_seq, name = 'logits_seq')
        self.logits = self.logits_seq[:, 0]
        self.logits = tf.identity(self.logits, name = 'logits')
        
        self.cost = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits = self.logits, labels = self.Y
            )
        )
        
        self.optimizer = optimization.create_optimizer(self.cost, learning_rate, 
                                                       num_train_steps, num_warmup_steps, False)
        
        correct_prediction = tf.equal(tf.round(tf.nn.sigmoid(self.logits)), tf.round(self.Y))
        all_labels_true = tf.reduce_min(tf.cast(correct_prediction, tf.float32), 1)
        self.accuracy = tf.reduce_mean(all_labels_true)

In [24]:
dimension_output = len(labels)
learning_rate = 2e-5

tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model(
    dimension_output,
    learning_rate
)

sess.run(tf.global_variables_initializer())
var_lists = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope = 'bert')
saver = tf.train.Saver(var_list = var_lists)
saver.restore(sess, BERT_INIT_CHKPNT)




The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



INFO:tensorflow:Restoring parameters from tiny-bert-v1/model.ckpt


In [25]:
from sklearn.model_selection import train_test_split

train_input_ids, test_input_ids, train_Y, test_Y, train_mask, test_mask = train_test_split(
    input_ids, actual_l, input_masks, test_size = 0.2
)

In [26]:
from tqdm import tqdm
import time

EARLY_STOPPING, CURRENT_CHECKPOINT, CURRENT_ACC, EPOCH = 1, 0, 0, 0

while True:
    lasttime = time.time()
    if CURRENT_CHECKPOINT == EARLY_STOPPING:
        print('break epoch:%d\n' % (EPOCH))
        break

    train_acc, train_loss, test_acc, test_loss = [], [], [], []
    pbar = tqdm(
        range(0, len(train_input_ids), batch_size), desc = 'train minibatch loop'
    )
    for i in pbar:
        index = min(i + batch_size, len(train_input_ids))
        batch_x = train_input_ids[i: index]
        batch_y = train_Y[i: index]
        batch_mask = train_mask[i: index]
        acc, cost, _ = sess.run(
            [model.accuracy, model.cost, model.optimizer],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x,
                model.MASK: batch_mask
            },
        )
        train_loss.append(cost)
        train_acc.append(acc)
        pbar.set_postfix(cost = cost, accuracy = acc)
        
    pbar = tqdm(range(0, len(test_input_ids), batch_size), desc = 'test minibatch loop')
    for i in pbar:
        index = min(i + batch_size, len(test_input_ids))
        batch_x = test_input_ids[i: index]
        batch_y = test_Y[i: index]
        batch_mask = test_mask[i: index]
        acc, cost = sess.run(
            [model.accuracy, model.cost],
            feed_dict = {
                model.Y: batch_y,
                model.X: batch_x,
                model.MASK: batch_mask
            },
        )
        test_loss.append(cost)
        test_acc.append(acc)
        pbar.set_postfix(cost = cost, accuracy = acc)
        
    train_loss = np.mean(train_loss)
    train_acc = np.mean(train_acc)
    test_loss = np.mean(test_loss)
    test_acc = np.mean(test_acc)
        
    if test_acc > CURRENT_ACC:
        print(
            'epoch: %d, pass acc: %f, current acc: %f'
            % (EPOCH, CURRENT_ACC, test_acc)
        )
        CURRENT_ACC = test_acc
        CURRENT_CHECKPOINT = 0
    else:
        CURRENT_CHECKPOINT += 1
        
    print('time taken:', time.time() - lasttime)
    print(
        'epoch: %d, training loss: %f, training acc: %f, valid loss: %f, valid acc: %f\n'
        % (EPOCH, train_loss, train_acc, test_loss, test_acc)
    )
    EPOCH += 1

train minibatch loop: 100%|██████████| 20031/20031 [30:02<00:00, 11.11it/s, accuracy=0.882, cost=0.0347] 
test minibatch loop: 100%|██████████| 5008/5008 [02:59<00:00, 27.85it/s, accuracy=0.884, cost=0.0219]
train minibatch loop:   0%|          | 2/20031 [00:00<28:56, 11.53it/s, accuracy=0.883, cost=0.0249]

epoch: 0, pass acc: 0.000000, current acc: 0.906599
time taken: 1982.249727487564
epoch: 0, training loss: 0.103340, training acc: 0.826560, valid loss: 0.023942, valid acc: 0.906599



train minibatch loop: 100%|██████████| 20031/20031 [29:29<00:00, 11.32it/s, accuracy=0.902, cost=0.0296] 
test minibatch loop: 100%|██████████| 5008/5008 [03:20<00:00, 24.92it/s, accuracy=0.884, cost=0.0184] 
train minibatch loop:   0%|          | 2/20031 [00:00<29:12, 11.43it/s, accuracy=0.917, cost=0.0234]

epoch: 1, pass acc: 0.906599, current acc: 0.915179
time taken: 1969.9870555400848
epoch: 1, training loss: 0.022376, training acc: 0.912714, valid loss: 0.021530, valid acc: 0.915179



train minibatch loop: 100%|██████████| 20031/20031 [29:34<00:00, 11.29it/s, accuracy=0.902, cost=0.0264] 
test minibatch loop: 100%|██████████| 5008/5008 [03:00<00:00, 27.72it/s, accuracy=0.884, cost=0.0208] 
train minibatch loop:   0%|          | 2/20031 [00:00<29:05, 11.47it/s, accuracy=0.9, cost=0.0224]

epoch: 2, pass acc: 0.915179, current acc: 0.917465
time taken: 1954.9896001815796
epoch: 2, training loss: 0.021035, training acc: 0.919165, valid loss: 0.021043, valid acc: 0.917465



train minibatch loop: 100%|██████████| 20031/20031 [29:46<00:00, 11.21it/s, accuracy=0.922, cost=0.0253] 
test minibatch loop: 100%|██████████| 5008/5008 [03:12<00:00, 25.97it/s, accuracy=0.86, cost=0.019]   
train minibatch loop:   0%|          | 1/20031 [00:00<50:25,  6.62it/s, accuracy=0.9, cost=0.0237]

epoch: 3, pass acc: 0.917465, current acc: 0.917780
time taken: 1979.6347844600677
epoch: 3, training loss: 0.020449, training acc: 0.922044, valid loss: 0.020814, valid acc: 0.917780



train minibatch loop: 100%|██████████| 20031/20031 [29:38<00:00, 11.27it/s, accuracy=0.922, cost=0.0254] 
test minibatch loop: 100%|██████████| 5008/5008 [03:00<00:00, 27.76it/s, accuracy=0.884, cost=0.0194] 
train minibatch loop:   0%|          | 2/20031 [00:00<29:12, 11.43it/s, accuracy=0.917, cost=0.0217]

epoch: 4, pass acc: 0.917780, current acc: 0.918104
time taken: 1958.528612613678
epoch: 4, training loss: 0.020027, training acc: 0.924586, valid loss: 0.020772, valid acc: 0.918104



train minibatch loop: 100%|██████████| 20031/20031 [29:32<00:00, 11.30it/s, accuracy=0.941, cost=0.0248] 
test minibatch loop: 100%|██████████| 5008/5008 [03:00<00:00, 27.76it/s, accuracy=0.884, cost=0.0212] 
train minibatch loop:   0%|          | 2/20031 [00:00<28:53, 11.55it/s, accuracy=0.9, cost=0.0222]

epoch: 5, pass acc: 0.918104, current acc: 0.918211
time taken: 1952.6419751644135
epoch: 5, training loss: 0.019691, training acc: 0.926600, valid loss: 0.020754, valid acc: 0.918211



train minibatch loop: 100%|██████████| 20031/20031 [30:06<00:00, 11.09it/s, accuracy=0.922, cost=0.0256] 
test minibatch loop: 100%|██████████| 5008/5008 [03:01<00:00, 27.62it/s, accuracy=0.884, cost=0.0187] 

time taken: 1987.691041469574
epoch: 6, training loss: 0.019411, training acc: 0.928529, valid loss: 0.020810, valid acc: 0.918111

break epoch:7






In [27]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'tiny-bert-toxic/model.ckpt')

'tiny-bert-toxic/model.ckpt'

In [28]:
dimension_output = len(labels)
learning_rate = 2e-5

tf.reset_default_graph()
sess = tf.InteractiveSession()
model = Model(
    dimension_output,
    learning_rate,
    training=False
    
)

sess.run(tf.global_variables_initializer())
saver = tf.train.Saver(tf.trainable_variables())
saver.restore(sess, 'tiny-bert-toxic/model.ckpt')



INFO:tensorflow:Restoring parameters from tiny-bert-toxic/model.ckpt


In [31]:
stack = []

pbar = tqdm(
    range(0, len(test_input_ids), batch_size), desc = 'validation minibatch loop'
)
for i in pbar:
    index = min(i + batch_size, len(test_input_ids))
    batch_x = test_input_ids[i: index]
    batch_mask = test_mask[i: index]
    stack.append(sess.run(tf.nn.sigmoid(model.logits),
            feed_dict = {
                model.X: batch_x,
                model.MASK: batch_mask
            },
    ))

validation minibatch loop: 100%|██████████| 5008/5008 [14:05<00:00,  5.92it/s]


In [41]:
from sklearn import metrics

print(metrics.classification_report(np.around(np.array(test_Y)),
                                    np.around(np.concatenate(stack,axis=0)),
                                    target_names=labels,
                                    digits=5))

                                     precision    recall  f1-score   support

                       severe toxic    0.77495   0.77346   0.77421      9857
                            obscene    0.62343   0.41033   0.49492      2788
                    identity attack    0.55057   0.34761   0.42616      1378
                             insult    0.69412   0.56324   0.62187     12659
                             threat    0.60825   0.13170   0.21651       448
                              asian    0.66667   0.47478   0.55459       337
                            atheist    0.85784   0.92593   0.89059       189
                           bisexual    1.00000   0.05263   0.10000        19
                           buddhist    0.63043   0.67442   0.65169        43
                          christian    0.79541   0.89441   0.84201      4612
                             female    0.85257   0.92515   0.88738      6907
                       heterosexual    0.67785   0.78295   0.72662       12

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [42]:
strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'Placeholder' in n.name
        or 'logits' in n.name
        or 'alphas' in n.name
        or 'self/Softmax' in n.name)
        and 'adam' not in n.name
        and 'beta' not in n.name
        and 'global_step' not in n.name
    ]
)
strings.split(',')

['Placeholder',
 'Placeholder_1',
 'Placeholder_2',
 'bert/embeddings/word_embeddings',
 'bert/embeddings/token_type_embeddings',
 'bert/embeddings/position_embeddings',
 'bert/embeddings/LayerNorm/gamma',
 'bert/encoder/layer_0/attention/self/query/kernel',
 'bert/encoder/layer_0/attention/self/query/bias',
 'bert/encoder/layer_0/attention/self/key/kernel',
 'bert/encoder/layer_0/attention/self/key/bias',
 'bert/encoder/layer_0/attention/self/value/kernel',
 'bert/encoder/layer_0/attention/self/value/bias',
 'bert/encoder/layer_0/attention/self/Softmax',
 'bert/encoder/layer_0/attention/output/dense/kernel',
 'bert/encoder/layer_0/attention/output/dense/bias',
 'bert/encoder/layer_0/attention/output/LayerNorm/gamma',
 'bert/encoder/layer_0/intermediate/dense/kernel',
 'bert/encoder/layer_0/intermediate/dense/bias',
 'bert/encoder/layer_0/output/dense/kernel',
 'bert/encoder/layer_0/output/dense/bias',
 'bert/encoder/layer_0/output/LayerNorm/gamma',
 'bert/encoder/layer_1/attention/sel

In [43]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))

In [44]:
freeze_graph('tiny-bert-toxic', strings)

INFO:tensorflow:Restoring parameters from tiny-bert-toxic/model.ckpt
Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
INFO:tensorflow:Froze 75 variables.
INFO:tensorflow:Converted 75 variables to const ops.
2494 ops in the final graph.


In [45]:
import boto3

bucketName = 'huseinhouse-storage'
Key = 'tiny-bert-toxic/frozen_model.pb'
outPutname = "v34/toxicity/tiny-bert-toxicity.pb"

s3 = boto3.client('s3')
s3.upload_file(Key,bucketName,outPutname)