In [1]:
%load_ext autoreload
%autoreload 2

Based on 



1.   https://towardsdatascience.com/hugging-face-transformers-fine-tuning-distilbert-for-binary-classification-tasks-490f1d192379
2.   https://www.analyticsvidhya.com/blog/2020/07/transfer-learning-for-nlp-fine-tuning-bert-for-text-classification/
3.   https://huggingface.co/transformers/training.html#fine-tuning-with-keras




**problem statement:**


*   a developer has to inspect an **artifact X**
*   Within the artifact, only a portion of the text is relevant to **input task Y**
*   We ought to build a model that establishes relationships between **Y** and **sentences x ∈ X** 
*  The model must determine: **is x relevant to task Y**




<br>

___

*Example of a task and an annotated artifact:*

<br>

[<img src="https://i.imgur.com/Zj1317H.jpg">](https://i.imgur.com/Zj1317H.jpg)




* The coloured sentences are sentences annotated as relevant to the input task. 
* The warmer the color, the more annotators selected that portion of the text. 
* For simplicity, we process the data and used sentences 

<br>

___

*Ultimately, our data is a tuple representing:*


*   **text** = artifact sentence

*   **question** = task description

*   **source** = URL of the artifact

*   **category_index** = whether sentence is relevant [or not] for the input task

*   **weights** = number of participants who annotated sentence as relevant


<br>

___



In [2]:
# @title Import data as JSON
import itertools
import json
import logging
import os
import sys
import random
from pathlib import Path

from Levenshtein import ratio
from colorama import Fore, Style

logger = logging.getLogger()
logger.level = logging.DEBUG
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)

from ds_android import get_input_for_BERT

raw_data = get_input_for_BERT()

print('Sample entry from data:')
print(json.dumps(raw_data[0], indent=4, sort_keys=True))

[31m39 [33m129 [0m https://developer.android.com/training/permissions/requesting
[31m14 [33m21 [0m https://stackoverflow.com/questions/5233543
[31m4 [33m34 [0m https://github.com/morenoh149/react-native-contacts/issues/516
[31m27 [33m63 [0m https://guides.codepath.com/android/Understanding-App-Permissions
[31m9 [33m161 [0m https://www.avg.com/en/signal/guide-to-android-app-permissions-how-to-use-them-smartly
[31m9 [33m15 [0m https://developer.android.com/training/volley/request
[31m14 [33m65 [0m https://stackoverflow.com/questions/28504524
[31m20 [33m59 [0m https://medium.com/@JasonCromer/android-asynctask-http-request-tutorial-6b429d833e28
[31m5 [33m97 [0m https://www.twilio.com/blog/5-ways-to-make-http-requests-in-java
[31m4 [33m12 [0m https://stackoverflow.com/questions/33241952
[31m6 [33m33 [0m https://github.com/realm/realm-java/issues/776
[31m3 [33m17 [0m https://stackoverflow.com/questions/8712652
[31m8 [33m59 [0m https://dzone.com/articles

[31m4 [33m54 [0m https://developer.android.com/training/gestures/scroll
[31m4 [33m16 [0m https://stackoverflow.com/questions/39588322
[31m20 [33m196 [0m https://developer.android.com/training/dependency-injection/dagger-android
[31m6 [33m44 [0m https://stackoverflow.com/questions/57235136
[31m24 [33m121 [0m https://guides.codepath.com/android/dependency-injection-with-dagger-2
Sample entry from data:
{
    "category_index": 1,
    "question": "Permission Denial when trying to access contacts in Android",
    "source": "https://developer.android.com/training/permissions/requesting",
    "text": "Every Android app runs in a limited-access sandbox.",
    "weights": 1
}


In [3]:
# @title DICT that will store fold results
# If there is a previous execution for the same configuration, we load it from disk

config_output = 'bert_ds_synthetic_best_config.json'

fold_results = dict()
if 'venn_diagram_set' not in fold_results:
    fold_results['venn_diagram_set'] = []
        
# if os.path.isfile(config_output):
#     logger.info(Fore.YELLOW + "Loading data from cache" + Style.RESET_ALL)
#     with open(config_output) as input_file:
#         fold_results = json.load(input_file)

In [4]:
# @title Set environment variables

import os
import contextlib
import tensorflow as tf
import os
import codecs
import numpy as np
import math
import json

import numpy as np
import pandas as pd

from collections import defaultdict, Counter
from tqdm import tqdm

USE_TPU = False
os.environ['TF_KERAS'] = '1'

# @title Initialize TPU Strategy
if USE_TPU:
    TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR']
    resolver = tf.contrib.cluster_resolver.TPUClusterResolver(TPU_WORKER)
    tf.contrib.distribute.initialize_tpu_system(resolver)
    strategy = tf.contrib.distribute.TPUStrategy(resolver)
    
from TFBertForTaskTextClassification import TFBertForTaskTextClassification
from TFBertForTaskTextClassification import TFBertForAndroidTaskTextClassification
from TFBertForTaskTextClassification import TFBertForSyntheticTaskTextClassification 

from metrics import MetricsAggregator

Falling back to TensorFlow client; we recommended you install the Cloud TPU client directly with pip install cloud-tpu-client.


# BERT

In [5]:
# Init the model
model = TFBertForSyntheticTaskTextClassification(model_id = 'bert-base-uncased')

# Configure filters. All other values are as default
model.target_output = 10
model.use_frame_filtering = True
model.match_frame_from_task = False
model.n_undersampling = 3 # this dataset is smaller, so we don't do as much undersampling
        
# Load tokenizer
model.tokenizer(cache_dir='/home/msarthur/scratch', local_files_only=True)

In [6]:
# model = TFBertForSequenceClassification.from_pretrained(model_id, cache_dir='/home/msarthur/scratch', local_files_only=True)

In [7]:
# @title 10-fold cross validation WIP
CORPUS = raw_data

all_tasks = sorted(list(set([d['question'] for d in raw_data])))
rseed = 20210343
random.seed(rseed)
random.shuffle(all_tasks)

from sklearn.model_selection import KFold


file_handler = logging.FileHandler('/home/msarthur/scratch/LOG-bert_ds_synthetic.ans')
file_handler.setLevel(logging.DEBUG)
logger.addHandler(file_handler)


n_splits = 10
kf = KFold(n_splits=n_splits, random_state=rseed)
np_tasks_arr = np.array(all_tasks)


# <------------------------------------------------------------------------- TRAIN
df_train, df_val, _, weights = model.get_train_val_test(
    CORPUS, [all_tasks[0]] # dummy test data
)

logger.info('-' * 10)
logger.info(Fore.RED + 'train'+ Style.RESET_ALL)
logger.info(str(df_train.category_index.value_counts()))
logger.info("")

logger.info(Fore.RED + 'weights'+ Style.RESET_ALL)
logger.info(str(weights))
logger.info('-' * 10)


# Encode X_train
train_encodings = model.encode(df_train)
train_labels = df_train['category_index'].tolist()

# Encode X_valid
val_encodings = model.encode(df_val)
val_labels = df_val['category_index'].tolist()


# https://huggingface.co/transformers/custom_datasets.html
train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings),
    train_labels
))

val_dataset = tf.data.Dataset.from_tensor_slices((
    dict(val_encodings),
    val_labels
))


fine_tunned_keras_model = model.build(
    train_dataset, val_dataset, weights, 
    checkpoint_filepath='/home/msarthur/scratch/best_model', 
    cache_dir='/home/msarthur/scratch', 
    local_files_only=True
)


# <-------------------------------------------------------------------------------- FOLDS
idx_split = 0
for train_index, test_index in kf.split(np_tasks_arr):

    idx_split = str(idx_split)
    eval_fold = True
    # 10 runs per fold to avoid reporting peek results in a given fold
    if idx_split in fold_results and fold_results[idx_split]['run_cnt'] >= 5:
        logger.info(Fore.RED + f"Fold {idx_split} FULLY TESTED" + Style.RESET_ALL)
        eval_fold = False


    if eval_fold:
        model.metrics.reset_aggregators()

        test_tasks_lst = np_tasks_arr[test_index].tolist()

        logger.info("")
        logger.info(Fore.RED + f"Fold {idx_split}" + Style.RESET_ALL)
        logger.info('\n'.join(test_tasks_lst))

        # <------------------------------------------------------------------------- INPUT
        _, _, df_test, weights = model.get_train_val_test(
            CORPUS, test_tasks_lst
        )
        
        logger.info(Fore.RED + 'test'+ Style.RESET_ALL)
        logger.info(str(df_test.category_index.value_counts()))
        logger.info("")
        
        if model.match_frame_from_task:
            __frame_pairs = model.fn_frame_pairs.get_most_common_frame_relationships(df_train)
            model.sentence_task_frame_pairs = __frame_pairs

        # <------------------------------------------------------------------------- TEST
        logger.info("")
        logger.info(Fore.RED + f"Testing model" + Style.RESET_ALL)
        for source in df_test["source"].unique():
            df_source = df_test[df_test["source"] == source]   
            logger.info(source)
            
            model.test(source, df_source, fine_tunned_keras_model)
                        

        # <------------------------------------------------------------------------- METRICS   
        
        prediction_metrics, api_metrics, so_metrics, git_metrics, misc_metrics = model.get_evaluation_metrics()
        
        MetricsAggregator.add_idx_fold_results(
            idx_split, fold_results, prediction_metrics,
            api_metrics, so_metrics, git_metrics, misc_metrics
        )

        fold_results['venn_diagram_set'] += model.metrics.venn_diagram_set
        fold_results['venn_diagram_set'] = list(set(fold_results['venn_diagram_set']))


        _precision, _recall, _f1score = MetricsAggregator.avg_macro_metric_for(prediction_metrics)

        logger.info("")
        logger.info(Fore.YELLOW + "Model metrics" + Style.RESET_ALL)
        logger.info("precision: " + Fore.RED + "{:.3f}".format(_precision) + Style.RESET_ALL)
        logger.info("recall:    " + Fore.RED + "{:.3f}".format(_recall) + Style.RESET_ALL)
        logger.info("f1-score:  " + Fore.RED + "{:.3f}".format(_f1score) + Style.RESET_ALL)


    idx_split = int(idx_split)
    idx_split += 1
    logger.info(f"next {idx_split}")


100%|██████████| 7903/7903 [00:00<00:00, 2781070.94it/s]

----------
[31mtrain[0m
0    491
1    164
Name: category_index, dtype: int64

[31mweights[0m
{0: 1.0, 1: 3.0}
----------



All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
AutoGraph could not transform <bound method Socket.send of <zmq.sugar.socket.Socket object at 0x2b2c434c63d0>> and will run it as-is.
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method
The parameter `return_dict` cannot be set in graph mode an

100%|██████████| 7903/7903 [00:00<00:00, 1846353.51it/s]

[31mtest[0m
0    706
1     29
Name: category_index, dtype: int64


[31mTesting model[0m
https://developer.android.com/codelabs/basic-android-kotlin-training-recyclerview-scrollable-list





https://guides.codepath.com/android/using-the-recyclerview
https://guides.codepath.com/android/converting-json-to-models
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
--------------------
Y
[0s] 29 [1s] 2
predicted
[0s] 21 [1s] 10
--------------------
Accuracy: 0.7419
macro_f1: 0.5867
Precision: 0.6000
Recall: 0.8621
F1: 0.5867
[31m2[0m entries logged
https://stackoverflow.com/questions/37096547
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will alwa

100%|██████████| 7903/7903 [00:00<00:00, 2099808.98it/s]

[31mtest[0m
0    666
1     38
Name: category_index, dtype: int64


[31mTesting model[0m
https://github.com/FasterXML/jackson-databind/issues/1538





--------------------
Y
[0s] 34 [1s] 2
predicted
[0s] 29 [1s] 7
--------------------
Accuracy: 0.7500
macro_f1: 0.4286
Precision: 0.4655
Recall: 0.3971
F1: 0.4286
[31m0[0m entries logged
https://medium.com/@chahat.jain0/rendering-a-pdf-document-in-android-activity-fragment-using-pdfrenderer-442462cb8f9a
--------------------
Y
[0s] 22 [1s] 2
predicted
[0s] 16 [1s] 8
--------------------
Accuracy: 0.6667
macro_f1: 0.4947
Precision: 0.5312
Recall: 0.5909
F1: 0.4947
[31m1[0m entries logged
https://developer.android.com/guide/topics/providers/content-provider-creating
https://developer.android.com/training/basics/firstapp/starting-activity
https://guides.codepath.com/android/creating-and-using-fragments
--------------------
Y
[0s] 153 [1s] 10
predicted
[0s] 140 [1s] 23
--------------------
Accuracy: 0.7975
macro_f1: 0.4437
Precision: 0.4643
Recall: 0.4248
F1: 0.4437
[31m0[0m entries logged
https://stackoverflow.com/questions/30362446
--------------------
Y
[0s] 39 [1s] 3
predicted
[0s]

100%|██████████| 7903/7903 [00:00<00:00, 1436888.66it/s]

[31mtest[0m
0    1304
1      54
Name: category_index, dtype: int64


[31mTesting model[0m
https://dzone.com/articles/rxjava-idiomatic-concurrency-flatmap-vs-parallel





--------------------
Y
[0s] 116 [1s] 1
predicted
[0s] 110 [1s] 7
--------------------
Accuracy: 0.9316
macro_f1: 0.4823
Precision: 0.4955
Recall: 0.4698
F1: 0.4823
[31m0[0m entries logged
https://www.avg.com/en/signal/guide-to-android-app-permissions-how-to-use-them-smartly
--------------------
Y
[0s] 158 [1s] 3
predicted
[0s] 144 [1s] 17
--------------------
Accuracy: 0.8758
macro_f1: 0.4669
Precision: 0.4896
Recall: 0.4462
F1: 0.4669
[31m0[0m entries logged
https://stackoverflow.com/questions/5233543
--------------------
Y
[0s] 13 [1s] 8
predicted
[0s] 14 [1s] 7
--------------------
Accuracy: 0.5714
macro_f1: 0.5333
Precision: 0.5357
Recall: 0.5337
F1: 0.5333
[31m3[0m entries logged
https://developer.android.com/codelabs/advanced-kotlin-coroutines#7
https://medium.com/mindorks/instrumentation-testing-with-mockwebserver-and-dagger2-56778477f0cf
--------------------
Y
[0s] 70 [1s] 2
predicted
[0s] 66 [1s] 6
--------------------
Accuracy: 0.8889
macro_f1: 0.4706
Precision: 0.4848


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 7903/7903 [00:00<00:00, 1610200.36it/s]

[31mtest[0m
0    781
1     37
Name: category_index, dtype: int64


[31mTesting model[0m
https://stackoverflow.com/questions/28504524





--------------------
Y
[0s] 61 [1s] 4
predicted
[0s] 57 [1s] 8
--------------------
Accuracy: 0.8154
macro_f1: 0.4492
Precision: 0.4649
Recall: 0.4344
F1: 0.4492
[31m0[0m entries logged
https://developer.android.com/guide/topics/media/camera
--------------------
Y
[0s] 239 [1s] 11
predicted
[0s] 231 [1s] 19
--------------------
Accuracy: 0.8880
macro_f1: 0.5035
Precision: 0.5047
Recall: 0.5078
F1: 0.5035
[31m1[0m entries logged
https://stackoverflow.com/questions/26838730
--------------------
Y
[0s] 18 [1s] 7
predicted
[0s] 16 [1s] 9
--------------------
Accuracy: 0.7600
macro_f1: 0.7243
Precision: 0.7153
Recall: 0.7460
F1: 0.7243
[31m5[0m entries logged
https://medium.com/@JasonCromer/android-asynctask-http-request-tutorial-6b429d833e28
--------------------
Y
[0s] 52 [1s] 7
predicted
[0s] 50 [1s] 9
--------------------
Accuracy: 0.7627
macro_f1: 0.4939
Precision: 0.4956
Recall: 0.4945
F1: 0.4939
[31m1[0m entries logged
https://docs.oracle.com/javase/8/javafx/layout-tutorial/si

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 7903/7903 [00:00<00:00, 1839080.37it/s]

[31mtest[0m
0    265
1     11
Name: category_index, dtype: int64


[31mTesting model[0m
https://github.com/realm/realm-java/issues/776





--------------------
Y
[0s] 31 [1s] 2
predicted
[0s] 28 [1s] 5
--------------------
Accuracy: 0.7879
macro_f1: 0.4407
Precision: 0.4643
Recall: 0.4194
F1: 0.4407
[31m0[0m entries logged
https://developer.android.com/work/dpc/dedicated-devices/lock-task-mode
https://developer.android.com/guide/topics/media-apps/volume-and-earphones
https://dzone.com/articles/android-rotate-and-scale
--------------------
Y
[0s] 58 [1s] 1
predicted
[0s] 55 [1s] 4
--------------------
Accuracy: 0.9153
macro_f1: 0.4779
Precision: 0.4909
Recall: 0.4655
F1: 0.4779
[31m0[0m entries logged
https://stackoverflow.com/questions/8712652
--------------------
Y
[0s] 15 [1s] 2
predicted
[0s] 12 [1s] 5
--------------------
Accuracy: 0.7059
macro_f1: 0.5503
Precision: 0.5583
Recall: 0.6167
F1: 0.5503
[31m1[0m entries logged
https://github.com/signalapp/Signal-Android/issues/3376
--------------------
Y
[0s] 54 [1s] 3
predicted
[0s] 52 [1s] 5
--------------------
Accuracy: 0.8596
macro_f1: 0.4623
Precision: 0.4712
R

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 7903/7903 [00:00<00:00, 2073927.58it/s]

[31mtest[0m
0    829
1     38
Name: category_index, dtype: int64


[31mTesting model[0m
https://docs.oracle.com/javase/8/docs/api/java/util/stream/Stream.html





--------------------
Y
[0s] 59 [1s] 5
predicted
[0s] 54 [1s] 10
--------------------
Accuracy: 0.7656
macro_f1: 0.4336
Precision: 0.4537
Recall: 0.4153
F1: 0.4336
[31m0[0m entries logged
https://developer.android.com/guide/topics/ui/notifiers/notifications
--------------------
Y
[0s] 144 [1s] 2
predicted
[0s] 125 [1s] 21
--------------------
Accuracy: 0.8425
macro_f1: 0.4572
Precision: 0.4920
Recall: 0.4271
F1: 0.4572
[31m0[0m entries logged
https://stackoverflow.com/questions/122105
--------------------
Y
[0s] 130 [1s] 1
predicted
[0s] 116 [1s] 15
--------------------
Accuracy: 0.8931
macro_f1: 0.5340
Precision: 0.5333
Recall: 0.9462
F1: 0.5340
[31m1[0m entries logged
https://www.raywenderlich.com/324-viewpager-tutorial-getting-started-in-kotlin
--------------------
Y
[0s] 165 [1s] 12
predicted
[0s] 151 [1s] 26
--------------------
Accuracy: 0.8079
macro_f1: 0.4988
Precision: 0.5053
Recall: 0.5106
F1: 0.4988
[31m2[0m entries logged
https://stackoverflow.com/questions/36275986


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 7903/7903 [00:00<00:00, 1617902.41it/s]

[31mtest[0m
0    1237
1      44
Name: category_index, dtype: int64


[31mTesting model[0m
https://developer.android.com/guide/topics/providers/content-provider-creating





https://www.raywenderlich.com/155-android-listview-tutorial-with-kotlin
--------------------
Y
[0s] 203 [1s] 8
predicted
[0s] 180 [1s] 31
--------------------
Accuracy: 0.8246
macro_f1: 0.4773
Precision: 0.4967
Recall: 0.4886
F1: 0.4773
[31m1[0m entries logged
https://stackoverflow.com/questions/11064244
--------------------
Y
[0s] 47 [1s] 4
predicted
[0s] 45 [1s] 6
--------------------
Accuracy: 0.8039
macro_f1: 0.4457
Precision: 0.4556
Recall: 0.4362
F1: 0.4457
[31m0[0m entries logged
https://github.com/quarkusio/quarkus/issues/3954
https://stackoverflow.com/questions/29738510
--------------------
Y
[0s] 21 [1s] 2
predicted
[0s] 19 [1s] 4
--------------------
Accuracy: 0.7391
macro_f1: 0.4250
Precision: 0.4474
Recall: 0.4048
F1: 0.4250
[31m0[0m entries logged
https://guides.codepath.com/android/Using-an-ArrayAdapter-with-ListView
--------------------
Y
[0s] 47 [1s] 12
predicted
[0s] 49 [1s] 10
--------------------
Accuracy: 0.7288
macro_f1: 0.5530
Precision: 0.5582
Recall: 0.55

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 7903/7903 [00:00<00:00, 1662366.32it/s]

[31mtest[0m
0    871
1     27
Name: category_index, dtype: int64


[31mTesting model[0m
https://developer.android.com/reference/android/widget/TextView





--------------------
Y
[0s] 468 [1s] 2
predicted
[0s] 420 [1s] 50
--------------------
Accuracy: 0.8936
macro_f1: 0.4911
Precision: 0.5088
Recall: 0.6976
F1: 0.4911
[31m1[0m entries logged
https://javapapers.com/android/android-location-fused-provider
--------------------
Y
[0s] 97 [1s] 2
predicted
[0s] 85 [1s] 14
--------------------
Accuracy: 0.8384
macro_f1: 0.4560
Precision: 0.4882
Recall: 0.4278
F1: 0.4560
[31m0[0m entries logged
https://developer.android.com/guide/topics/media/mediarecorder
--------------------
Y
[0s] 45 [1s] 4
predicted
[0s] 39 [1s] 10
--------------------
Accuracy: 0.7143
macro_f1: 0.4167
Precision: 0.4487
Recall: 0.3889
F1: 0.4167
[31m0[0m entries logged
https://stackoverflow.com/questions/6688444
--------------------
Y
[0s] 5 [1s] 4
predicted
[0s] 1 [1s] 8
--------------------
Accuracy: 0.5556
macro_f1: 0.5000
Precision: 0.7500
Recall: 0.6000
F1: 0.5000
[31m4[0m entries logged
https://www.toptal.com/android/android-developers-guide-to-google-location-

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 7903/7903 [00:00<00:00, 2027003.27it/s]

[31mtest[0m
0    381
1      7
Name: category_index, dtype: int64


[31mTesting model[0m
https://developer.android.com/guide/background/threading
https://github.com/google/ExoPlayer/issues/8387





--------------------
Y
[0s] 31 [1s] 1
predicted
[0s] 22 [1s] 10
--------------------
Accuracy: 0.7188
macro_f1: 0.5060
Precision: 0.5500
Recall: 0.8548
F1: 0.5060
[31m1[0m entries logged
https://stackoverflow.com/questions/2993085
https://developer.android.com/training/gestures/scale
https://stackoverflow.com/questions/2661536
--------------------
Y
[0s] 99 [1s] 1
predicted
[0s] 96 [1s] 4
--------------------
Accuracy: 0.9500
macro_f1: 0.4872
Precision: 0.4948
Recall: 0.4798
F1: 0.4872
[31m0[0m entries logged
https://stackoverflow.com/questions/10630373
--------------------
Y
[0s] 29 [1s] 3
predicted
[0s] 26 [1s] 6
--------------------
Accuracy: 0.8438
macro_f1: 0.6768
Precision: 0.6474
Recall: 0.7644
F1: 0.6768
[31m2[0m entries logged
https://www.twilio.com/blog/asynchronous-api-requests-java-completablefutures
--------------------
Y
[0s] 48 [1s] 2
predicted
[0s] 40 [1s] 10
--------------------
Accuracy: 0.8000
macro_f1: 0.5265
Precision: 0.5375
Recall: 0.6562
F1: 0.5265
[31m1

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 7903/7903 [00:00<00:00, 2223178.04it/s]

[31mtest[0m
0    553
1     25
Name: category_index, dtype: int64


[31mTesting model[0m
https://developer.android.com/training/data-storage/sqlite





--------------------
Y
[0s] 67 [1s] 2
predicted
[0s] 59 [1s] 10
--------------------
Accuracy: 0.8261
macro_f1: 0.4524
Precision: 0.4831
Recall: 0.4254
F1: 0.4524
[31m0[0m entries logged
https://stackoverflow.com/questions/4015026
--------------------
Y
[0s] 26 [1s] 9
predicted
[0s] 27 [1s] 8
--------------------
Accuracy: 0.6857
macro_f1: 0.5727
Precision: 0.5764
Recall: 0.5705
F1: 0.5727
[31m3[0m entries logged
https://developer.android.com/training/dependency-injection/hilt-android
--------------------
Y
[0s] 141 [1s] 4
predicted
[0s] 124 [1s] 21
--------------------
Accuracy: 0.8552
macro_f1: 0.5404
Precision: 0.5396
Recall: 0.6826
F1: 0.5404
[31m2[0m entries logged
https://prog.world/a-practical-guide-to-using-hilt-with-kotlin
--------------------
Y
[0s] 45 [1s] 3
predicted
[0s] 38 [1s] 10
--------------------
Accuracy: 0.7708
macro_f1: 0.5107
Precision: 0.5237
Recall: 0.5667
F1: 0.5107
[31m1[0m entries logged
https://stackoverflow.com/questions/8184492
-------------------

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [8]:
# for source in df_test["source"].unique():
#     df_source = df_test[df_test["source"] == source]   
#     logger.info(source)
#     test_model(source, df_source, model, tokenizer, pos_filter=True)
    

In [9]:
__precision, __recall, __fscore = MetricsAggregator.get_full_exec_results(fold_results)

logger.info("\n")
logger.info(Fore.RED + "AGGREGATED METRICS" + Style.RESET_ALL)
logger.info("\nprecision: " + Fore.RED + "{:.3f}".format(np.mean(__precision)) + Style.RESET_ALL)
logger.info("recall:    " + Fore.RED + "{:.3f}".format(np.mean(__recall)) + Style.RESET_ALL)
logger.info("f1-score:  " +  Fore.RED + "{:.3f}".format(np.mean(__fscore)) + Style.RESET_ALL)



[31mAGGREGATED METRICS[0m

precision: [31m0.520[0m
recall:    [31m0.548[0m
f1-score:  [31m0.504[0m


In [10]:
## Stack Overflow results

In [11]:
__precision, __recall, __fscore = MetricsAggregator.get_full_exec_results(fold_results, result_type="so")

logger.info("\n")
logger.info(Fore.RED + "Stack Overflow metrics" + Style.RESET_ALL)
logger.info("\nprecision: " + Fore.RED + "{:.3f}".format(np.mean(__precision)) + Style.RESET_ALL)
logger.info("recall:    " + Fore.RED + "{:.3f}".format(np.mean(__recall)) + Style.RESET_ALL)
logger.info("f1-score:  " +  Fore.RED + "{:.3f}".format(np.mean(__fscore)) + Style.RESET_ALL)



[31mStack Overflow metrics[0m

precision: [31m0.565[0m
recall:    [31m0.611[0m
f1-score:  [31m0.554[0m


In [12]:
## Github issues results

In [13]:
__precision, __recall, __fscore = MetricsAggregator.get_full_exec_results(fold_results, result_type="git")

logger.info("\n")
logger.info(Fore.RED + "Github issues metrics" + Style.RESET_ALL)
logger.info("\nprecision: " + Fore.RED + "{:.3f}".format(np.mean(__precision)) + Style.RESET_ALL)
logger.info("recall:    " + Fore.RED + "{:.3f}".format(np.mean(__recall)) + Style.RESET_ALL)
logger.info("f1-score:  " +  Fore.RED + "{:.3f}".format(np.mean(__fscore)) + Style.RESET_ALL)



[31mGithub issues metrics[0m

precision: [31m0.525[0m
recall:    [31m0.581[0m
f1-score:  [31m0.503[0m


In [14]:
#@title Sample prediction outputs for API sources

logger.info(Fore.RED + "API" + Style.RESET_ALL)
model.metrics.examples_per_source_type(source_type='api', n_samples=8)

[31mAPI[0m

[31mThe gravity is not working on the TextView in some situation.[0m
https://developer.android.com/reference/android/widget/TextView


[31mAndroid PDF Rendering[0m
https://developer.android.com/reference/android/graphics/pdf/PdfRenderer

[w=0][31m[y=1][33m[p=0.9049][0m It is guaranteed, however, that the thread that invokes finalize will not be holding any user-visible synchronization locks when finalize is invoked.

[w=0][31m[y=1][33m[p=0.8953][0m Subclasses of Object may override this definition.

[w=0][31m[y=1][33m[p=0.8949][0m For example, the finalize method for an object that represents an input/output connection might perform explicit I/O transactions to break the connection before the object is permanently discarded.

[w=0][31m[y=1][33m[p=0.4253][0m Gets the number of pages in the document.

[w=3][31m[y=1][33m[p=0.3711][0m If document page size is greater than the printed media size the content should be anchored to the upper left corner of the 


[w=0][31m[y=1][33m[p=0.2290][0m Note: In the above example, the prepareVideoRecorder ( ) method refers to the example code shown in Configuring MediaRecorder.

[w=0][31m[y=1][33m[p=0.2167][0m Location Permission - If your application tags images with GPS location information, you must request the ACCESS_FINE_LOCATION permission.

[w=0][31m[y=1][33m[p=0.2041][0m This method is available in Android 2.2 ( API Level 8 ), for equivalent calls in earlier API versions, see Saving Shared Files.

--------------------

[31mExplanation of the getView() method of an ArrayAdapter[0m
https://developer.android.com/reference/android/widget/ArrayAdapter



In [15]:
#@title Sample prediction outputs for GIT sources

logger.info(Fore.RED + "GIT" + Style.RESET_ALL)
model.metrics.examples_per_source_type(source_type='git', n_samples=8)

[31mGIT[0m

[31mSeekTo Position of cutted song not working[0m
https://github.com/google/ExoPlayer/issues/8387

[w=1][31m[y=1][33m[p=0.8937][0m If you have a custom DataSource implementation then you'll need to share it so that we can take a look at what might be wrong.

[w=2][31m[y=1][33m[p=0.8818][0m The dataSpec argument is not defining the entire media.

[w=1][31m[y=1][33m[p=0.8563][0m It's not clear how you are actually applying the offset and length in or before FileDataSource.

[w=0][31m[y=1][33m[p=0.4815][0m One thing to note about this issue is that media can safely play without calling seek

[w=1][31m[y=1][33m[p=0.2875][0m It's defining the part of the media that the caller wants to read for this particular call to open.

[w=0][31m[y=1][33m[p=0.2149][0m Android device: Pixel 4 XL

[w=0][31m[y=1][33m[p=0.1437][0m The reason why dataSpec.position needs to be included is already explained as clearly as I'm able to explain it in my response above.

[w=0][

In [22]:
#@title Sample prediction outputs for SO sources

logger.info(Fore.RED + "SO" + Style.RESET_ALL)
model.metrics.examples_per_source_type(source_type='so', n_samples=8)

[31mSO[0m

[31mHow to Integrate reCAPTCHA 2.0 in Android[0m
https://stackoverflow.com/questions/27297067

[w=1][31m[y=1][33m[p=0.8142][0m So you have to validate this token.

[w=2][31m[y=1][33m[p=0.7743][0m Validating the response is really easy.

[w=3][31m[y=1][33m[p=0.3210][0m That means, that the widget will take care of asking questions, validating responses all the way till it determines that a user is actually a human, only then you get a g-recaptcha-response value.

[w=1][31m[y=1][33m[p=0.2119][0m Edit: It's actually a POST, as per LINK.

[w=1][31m[y=1][33m[p=0.1904][0m You will get a JSON Response with a success field.

[w=3][31m[y=1][33m[p=0.1721][0m Just make a GET Request to

[w=0][31m[y=1][33m[p=0.1526][0m Returns the API response in a JsonObject.

[w=0][31m[y=1][33m[p=0.1460][0m This code put at head section on call get_action -LRB- this -RRB- method form button:

--------------------

[31mHow to record phone calls in Android[0m
https://stacko

In [21]:
#@title Sample prediction outputs for MISC sources

logger.info(Fore.RED + "MISC" + Style.RESET_ALL)
model.metrics.examples_per_source_type(source_type='misc', n_samples=8)

[31mMISC[0m


In [18]:
logger.info(Fore.RED + f"{len(fold_results['venn_diagram_set'])} entries VENN SET" + Style.RESET_ALL)
for _t in fold_results['venn_diagram_set']:
    logger.info(_t)

[31m77 entries VENN SET[0m
then, in your code you detect the ENABLE_CRASHLYTICS flag as follows:

it is used for the Android Music Remote control even if the App is in Lock mode.
It sounds like you may be using the support library ?
I have no experience with Gradle, but it seems like you need to do something like this.
There is a WAKE_LOCK permission in your Manifest.xml.
Whether you need the authentication headers or not will decide which instance to inject.
If the device is running Android 6.0 or higher, and your app's target SDK is 23 or higher: The app has to list the permissions in the manifest, and it must request each dangerous permission it needs while the app is running.
However, there are a few cases where your app might need to implement its own Back behavior in order to provide the best possible user experience.
The TextView being in wrap_content this does nothing, as the TextView is exactly the size of the text.
An example of polymorphic deserialization is if you want yo

In [19]:
fold_results

{'venn_diagram_set': ['then, in your code you detect the ENABLE_CRASHLYTICS flag as follows:',
  '',
  'it is used for the Android Music Remote control even if the App is in Lock mode.',
  'It sounds like you may be using the support library ?',
  'I have no experience with Gradle, but it seems like you need to do something like this.',
  'There is a WAKE_LOCK permission in your Manifest.xml.',
  'Whether you need the authentication headers or not will decide which instance to inject.',
  "If the device is running Android 6.0 or higher, and your app's target SDK is 23 or higher: The app has to list the permissions in the manifest, and it must request each dangerous permission it needs while the app is running.",
  'However, there are a few cases where your app might need to implement its own Back behavior in order to provide the best possible user experience.',
  'The TextView being in wrap_content this does nothing, as the TextView is exactly the size of the text.',
  'An example of p