In [12]:
"""
Google cloud variables.
"""
GCLOUD_PROJECT=<GOOGLE_CLOUD_PROJECT>
GCS_BUCKET=<CLOUD_STORAGE_BUCKET>
GCS_MODEL_DIR='so-quality/t5-model/01'

# environment setup

In [2]:
from google.colab import auth
auth.authenticate_user()

!gcloud config set project $GCLOUD_PROJECT

Updated property [core/project].


In [3]:
%pip install transformers==4.11.3
%pip install sentencepiece
%pip install -q -U tf-models-official



In [4]:
import os
import numpy as np
import tensorflow as tf
from official.nlp import optimization

if os.environ.get('COLAB_TPU_ADDR'):
  cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
  tf.config.experimental_connect_to_cluster(cluster_resolver)
  tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
  strategy = tf.distribute.TPUStrategy(cluster_resolver)
  print('Using TPU')
elif tf.config.list_physical_devices('GPU'):
  strategy = tf.distribute.MirroredStrategy()
  print('Using GPU')
else:
  strategy = tf.distribute.get_strategy()
  print('Running on CPU is not recommended.')

INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Initializing the TPU system: grpc://10.60.245.2:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.60.245.2:8470


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


Using TPU


## dataset

In [5]:
import tensorflow as tf

def tf_record_decoder(encoded):
    features = {
        "input_ids": tf.io.FixedLenFeature([512], tf.int64),
        "attention_mask": tf.io.FixedLenFeature([512], tf.int64),
        "labels": tf.io.FixedLenFeature([2], tf.int64),
        "decoder_attention_mask": tf.io.FixedLenFeature([2], tf.int64),
        "class": tf.io.FixedLenFeature([1], tf.int64),
    }
    tf_record = tf.io.parse_single_example(encoded, features)
    return tf_record

def dataset_prepare(dataset, batch_size=32, training=False):
    dataset = dataset.map(tf_record_decoder)
    dataset = dataset.cache().prefetch(buffer_size=tf.data.AUTOTUNE)

    if training:
        dataset = dataset.shuffle(1*1024)

    dataset = dataset.batch(batch_size)
    return dataset

def eval_mapper(batch):
    """ Map training entries in the format expected by model.predict
        i.e. the decoder_inputs are set with the <bos> token (id = 1)
        Since the expected classes are expressed as a single token
        we can retrieve the output with a single call to predict rather
        than using the more expensive text generation strategy that T5
        uses to predict sentences.
    """
    batch_size = tf.shape(batch['input_ids'])[0]
    inputs = {
        'input_ids': batch['input_ids'],
        'attention_mask': batch['attention_mask'],
        'decoder_input_ids': tf.zeros((batch_size, 1), dtype=tf.int32),
        'decoder_attention_mask': tf.ones((batch_size, 1)),
    }
    return inputs




In [6]:
ds_tr_valid = tf.data.TFRecordDataset(f'gs://{GCS_BUCKET}/so-quality/dataset_t5_valid.tfrecord')
with strategy.scope():
    ds_valid = dataset_prepare(ds_tr_valid)
    ds_eval = ds_valid.map(eval_mapper)

## Load the model

In [14]:
import os
os.makedirs('t5-tuned', exist_ok=True)

!gsutil cp gs://$GCS_BUCKET/$GCS_MODEL_DIR/config.json t5-tuned/
!gsutil cp gs://$GCS_BUCKET/$GCS_MODEL_DIR/tf_model.h5 t5-tuned/

Copying gs://ardent-strength-306418-data/so-quality/t5-model/01/config.json...
/ [0 files][    0.0 B/  1.3 KiB]                                                / [1 files][  1.3 KiB/  1.3 KiB]                                                
Operation completed over 1 objects/1.3 KiB.                                      
Copying gs://ardent-strength-306418-data/so-quality/t5-model/01/tf_model.h5...
| [1 files][850.8 MiB/850.8 MiB]   51.4 MiB/s                                   
Operation completed over 1 objects/850.8 MiB.                                    


In [15]:
import tensorflow as tf

class PredictionModel(tf.keras.Model):
    """ The model call function is executed in the TPU.

        This wrapper exists so that the argmax computation on logits is performed
        on the TPU and only the token indices are transfered between TPU
        and colab CPU. colab will run out of memory otherwise. Or one is
        forced to execute the predict calls once batch at a time which leads
        to graph setup/tear down costs.
    """
    def __init__(self, model):
        super().__init__()
        self._model = model
    
    def call(self, inputs):
        outputs = self._model(inputs)
        return tf.argmax(outputs['logits'], axis=-1)


In [17]:
import numpy as np

class ClassDecoder(object):
    """ Translate the class tokens into class ids.
    """
    TOKENS = ['none', 'low', 'high']
    def __init__(self, tokenizer):
        self.tok_ids = [tokenizer.encode(tok)[0] for tok in self.TOKENS]

    def _index(self, x):
        try:
            return self.tok_ids.index(x)
        except ValueError:
            return -1
    
    def __call__(self, y_pred_ids):
        result = [self._index(x) for x in y_pred_ids]
        return result


In [18]:
from transformers import T5Tokenizer, TFT5ForConditionalGeneration

with strategy.scope():
    tokenizer = T5Tokenizer.from_pretrained('t5-base')
    model = TFT5ForConditionalGeneration.from_pretrained('./t5-tuned/')
    xmodel = PredictionModel(model)

    decoder = ClassDecoder(tokenizer)

## Execute model predictions

Given that we are using a class with a single token the model can be run once per example in order to predict the next token output when the decoder contains only the <bos> token.

We compute the confusion matrix and accuracy score on the validation set.

In [19]:
with strategy.scope():
    y_pred_ids = xmodel.predict(ds_eval, verbose=1)

INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 512) dtype=int64>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None, 1) dtype=int32>, <tf.Tensor 'cond_8/Identity_3:0' shape=(None, 512) dtype=int64>]




In [20]:
decoder = ClassDecoder(tokenizer)
y_pred = np.array(decoder(y_pred_ids))

In [22]:
import tqdm

y_true_list = []
for batch in tqdm.tqdm(ds_valid):
    y_true_list.append(batch['class'].numpy().reshape(-1))
y_true = np.concatenate(y_true_list)

469it [00:01, 356.95it/s]


In [23]:
import sklearn.metrics
cm = sklearn.metrics.confusion_matrix(y_true, y_pred)
cm

array([[4336,    0,  664],
       [   6, 4994,    0],
       [ 284,    0, 4716]])

In [24]:
sklearn.metrics.accuracy_score(y_true, y_pred)

0.9364

In [28]:
# Compute the accuracy metric with the validation examples that we not used to determine the best checkpoint.

sklearn.metrics.accuracy_score(y_true[200 * 32:], y_pred[200 * 32:])

0.9361627906976744

# Debug samples

Examine bellow a random sample of the errors the model makes on the hold out set.



In [31]:
misclassified = np.where(y_true[200 * 32:] != y_pred[200 * 32:])[0]

In [36]:
from IPython.display import display, HTML

def _get_index(dataset, y_true, y_pred, index, batch_size=32):
    n_bucket = index // batch_size
    bi = index % batch_size
    if n_bucket > 0:
        dataset = dataset.skip(n_bucket)
    for batch in dataset.take(1):
        token_ids = batch['input_ids'][bi]
        mask = batch['attention_mask'][bi]
        zi = np.where(mask == 0)[0]
        if zi.size:
            token_ids = token_ids[:zi[0]]
        s = tokenizer.decode(token_ids)
        tx = batch['class'].numpy().reshape(-1)[bi]
        px = y_pred[index]
    
    return tx, px, s

def show_debug_samples(dataset, y_true, y_pred, indices):
    html = """
    <table>
        <thead>
            <tr>
                <th>index</th>
                <th>y_true</th>
                <th>y_pred</th>
                <th>tokens</th>
            </tr>
        </thead>
        <tbody>
    """

    for ix in indices:
        values = _get_index(dataset, y_true, y_pred, ix)
        html += '<tr><td>{0}</td>'.format(ix)
        for v in values:
            html += '<td>{0}</td>'.format(v)
        html += '</tr>'

    html += """
        </tbody>
    </table>
    """
    display(HTML(html))



In [37]:
import random
random.seed(42)

indices = misclassified + (200 * 32)
index_list = indices.tolist()
random.shuffle(index_list)
show_debug_samples(ds_valid, y_true, y_pred, index_list[:16])

index,y_true,y_pred,tokens
12076,0,2,quality: Reverse Integer Leetcode - Explain overflow issue p>I am new to coding and practicing leetcode problems. Integer reverse problem talks about overflow. /p> p>I have searched and most of the discussion about how to handle the overflow. Could someone explain what is this overflow and why is this caused? /p>
14326,0,2,quality: Conditional return with no else p>I'm try to do something like that in Python: /p> pre> code>return None if a is None /code> /pre> p>Instead of having: /p> pre> code>if a is None: return None /code> /pre> p>But I get an invalid syntax error. /p> p>I tried to use code>pass /code>: /p> pre> code>return None if a is None else pass /code> /pre> p>But it doesn't work as well. Is there a pythonian way to do it? /p>
13361,0,2,"quality: Add font-awesome icon to option in select p>Is it possible to add font-awesome icon to code><option> /code> in code><select> /code>? /p> pre> code> <body ng-app> <i class=""fa fa-camera-retro""></i> fa-camera-retro<br> <select ng-model=""choice"" class=""fa""> <option value="""">Choose</option> <option value=""icon camera"">icon camera</option> <option value=""icon bell"">icon bell</option> <option value=""icon bicycle"">icon bicycle</option> </select> /code> /pre>"
12390,0,2,"quality: How does one force a C++ move operator without std:whatever p>I have a very large code base that has been around a while. I've been updating it with selective use of new language features. I was going to play around with move constructors but I can't even come up with a scenario convoluted to make one happen. I'm not going to throw code into my code base that I can't even unit test. /p> p>And, I cannot use std:move, because I don't use the standard libraries at all. I have my own standard libraries, along with my own everything else (about a million lines of code.) Everything is hidden within a virtual kernel that doesn't expose any language or platform headers to the outside world. /p> p>So I can't use std::move. Hopefully it doesn't do anything magical that I can't do? And I guess something similar would also be required to handle move constructors in the face of base classes (almost always the case.) If it does do something magical, that's the library leaking into the language and wouldn't be good. /p> p>Apparently there's no compile settings on Visual C++ (2017 in my case) to disable it from eliding constructors, which is presumably why I can't manage to do anything to even cause it. /p> p>Of course that also raises the question of whether it's useless if it requires this much effort (even in non-optimized mode) to make it even happen. /p>"
8385,2,0,"quality: Java spread operator p>I am not sure of the vocabulary I am using here, please correct me if I'm wrong. /p> p>In Javascript, I had the following code: /p> pre> code>let args = [1,2,3]; function doSomething (a, b, c) return a + b + c; doSomething(...args); /code> /pre> p>As you can see, when calling code>doSomething /code>, I am able to use the code>... /code> spread operator in order to ""transform"" my arguments into code>1, 2, 3 /code>. /p> p>Now, I'm trying to do the same thing with Java. /p> p>Let's say I have a code>Foo /code> class: /p> pre> code>public class Foo public int doSomething (int a, int b, int c) return a + b + c; /code> /pre> p>And now I want to call the code>doSomething /code>: /p> pre> code>int[] args = 1, 2, 3 ; /code> /pre> p>I'd like to use something like code>doSomething (...args) /code> instead of calling code>doSomething(args[0], args[1], args[2]) /code>. /p> p>I saw that this is possible in the declaration of functions, but I'd like not to change the implementation of such a function. /p>"
10219,0,2,"quality: why recv accept mort than expected number of bytes? p>When i call recv to receive exactly 7 bytes i recv more than bytes expected? /p> p>I don't know how bufsize parameter work. /p> pre> code>for i in range(data2[1]): try: tuple = envois.recv(7) time.sleep(0.1) except Exception as e: print(e) if (tuple[-2:]!= myport): tuples.append(tuple) print(""tuples : x "".format(x = tuples)) last_msg = envois.recv(3) envois.close() return (last_msg,tuples) /code> /pre>"
13641,0,2,"quality: Is there a way to get all keys that match a string in a json file's values and output them to a text file? p>I'm trying to get all keys' values that equal ""url"" ignoring nesting from a JSON file and then output them to a text file. How would I go about doing this? /p> p>I'm running Python 3.7 and cannot seem to find a solution. /p> pre> code>r = requests.get('https://launchermeta.mojang.com/mc/game/version_manifest.json') j = r.json() /code> /pre> p>The result expected from this would be a text file filled with links from this json file. /p> pre> code>https://launchermeta.mojang.com/v1/packages/31fa028661857f2e3d3732d07a6d36ec21d6dbdc/a1.2.3_02.json https://launchermeta.mojang.com/v1/packages/2dbccc4579a4481dc8d72a962d396de044648522/a1.2.3_01.json https://launchermeta.mojang.com/v1/packages/48f077bf27e0a01a0bb2051e0ac17a96693cb730/a1.2.3.json etc. /code> /pre>"
13417,0,2,"quality: How to create a UIButton similar to type.system? p>How to create a code>UIButton /code> with tap effect similar to code>UIButton(type:.system) /code>, but without specifying code>.system /code> type? /p> pre> code>let btn = UIButton(frame: CGRect.zero) // How to set the tap effect? /code> /pre> p>How to set the tap effect which blinks the text when tapped? /p>"
7999,0,2,"quality: show uidatepicker, when I tap on view p>I have a view with UITapGestureRecognizer /p> p>I want to show uidatepicker, when I tap on view /p> p>How can I do it? /p> p>thanks in advance /p>"
11714,2,0,"quality: Back and forth loop Python p>I want to create an infinite loop that counts up and down from 0 to 100 to 0 (and so on) and only stops when some convergence criterion inside the loop is met, so basically something like this: /p> pre> code>for i in range(0, infinity): for j in range(0, 100, 1): print(j) # (in my case 100 lines of code) for j in range(100, 0, -1): print(j) # (same 100 lines of code as above) /code> /pre> p>Is there any way to merge the two for loops over j into one so that I don't have write out the same code inside the loops twice? /p>"
