In [1]:
%load_ext autoreload
%autoreload 2

Based on 



1.   https://towardsdatascience.com/hugging-face-transformers-fine-tuning-distilbert-for-binary-classification-tasks-490f1d192379
2.   https://www.analyticsvidhya.com/blog/2020/07/transfer-learning-for-nlp-fine-tuning-bert-for-text-classification/
3.   https://huggingface.co/transformers/training.html#fine-tuning-with-keras




**problem statement:**


*   a developer has to inspect an **artifact X**
*   Within the artifact, only a portion of the text is relevant to **input task Y**
*   We ought to build a model that establishes relationships between **Y** and **sentences x ∈ X** 
*  The model must determine: **is x relevant to task Y**




<br>

___

*Example of a task and an annotated artifact:*

<br>

[<img src="https://i.imgur.com/Zj1317H.jpg">](https://i.imgur.com/Zj1317H.jpg)




* The coloured sentences are sentences annotated as relevant to the input task. 
* The warmer the color, the more annotators selected that portion of the text. 
* For simplicity, we process the data and used sentences 

<br>

___

*Ultimately, our data is a tuple representing:*


*   **text** = artifact sentence

*   **question** = task description

*   **source** = URL of the artifact

*   **category_index** = whether sentence is relevant [or not] for the input task

*   **weights** = number of participants who annotated sentence as relevant


<br>

___



In [2]:
# @title Import data as JSON
import itertools
import json
import logging
import os
import sys
import random
from pathlib import Path

from Levenshtein import ratio
from colorama import Fore, Style

logger = logging.getLogger()
logger.level = logging.DEBUG
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)

from ds_android import get_input_for_BERT

raw_data = get_input_for_BERT()

print('Sample entry from data:')
print(json.dumps(raw_data[0], indent=4, sort_keys=True))

[31m39 [33m129 [0m https://developer.android.com/training/permissions/requesting
[31m14 [33m21 [0m https://stackoverflow.com/questions/5233543
[31m4 [33m34 [0m https://github.com/morenoh149/react-native-contacts/issues/516
[31m27 [33m63 [0m https://guides.codepath.com/android/Understanding-App-Permissions
[31m9 [33m161 [0m https://www.avg.com/en/signal/guide-to-android-app-permissions-how-to-use-them-smartly
[31m9 [33m15 [0m https://developer.android.com/training/volley/request
[31m14 [33m65 [0m https://stackoverflow.com/questions/28504524
[31m20 [33m59 [0m https://medium.com/@JasonCromer/android-asynctask-http-request-tutorial-6b429d833e28
[31m5 [33m97 [0m https://www.twilio.com/blog/5-ways-to-make-http-requests-in-java
[31m4 [33m12 [0m https://stackoverflow.com/questions/33241952
[31m6 [33m33 [0m https://github.com/realm/realm-java/issues/776
[31m3 [33m17 [0m https://stackoverflow.com/questions/8712652
[31m8 [33m59 [0m https://dzone.com/articles

[31m6 [33m32 [0m https://stackoverflow.com/questions/10630373
[31m4 [33m54 [0m https://developer.android.com/training/gestures/scroll
[31m4 [33m16 [0m https://stackoverflow.com/questions/39588322
[31m20 [33m196 [0m https://developer.android.com/training/dependency-injection/dagger-android
[31m6 [33m44 [0m https://stackoverflow.com/questions/57235136
[31m24 [33m121 [0m https://guides.codepath.com/android/dependency-injection-with-dagger-2
Sample entry from data:
{
    "category_index": 1,
    "question": "Permission Denial when trying to access contacts in Android",
    "source": "https://developer.android.com/training/permissions/requesting",
    "text": "Every Android app runs in a limited-access sandbox.",
    "weights": 1
}


In [3]:
# @title DICT that will store fold results
# If there is a previous execution for the same configuration, we load it from disk

# final results are the average of 3 distinct runs of this script.
# reason: avoid phishing results when BERT training procedures were exceptionally good
NUMBER_OF_RUNS = 3 
config_output = 'output/bert_ds_synthetic_base.json'

fold_results = dict()
        
if os.path.isfile(config_output):
    logger.info(Fore.YELLOW + "Loading data from cache" + Style.RESET_ALL)
    with open(config_output) as input_file:
        fold_results = json.load(input_file)
        
if 'venn_diagram_set' not in fold_results:
    fold_results['venn_diagram_set'] = []        

[33mLoading data from cache[0m


In [4]:
# @title Set environment variables

import os
import contextlib
import tensorflow as tf
import os
import codecs
import numpy as np
import math
import json

import numpy as np
import pandas as pd

from collections import defaultdict, Counter
from tqdm import tqdm

USE_TPU = False
os.environ['TF_KERAS'] = '1'

# @title Initialize TPU Strategy
if USE_TPU:
    TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR']
    resolver = tf.contrib.cluster_resolver.TPUClusterResolver(TPU_WORKER)
    tf.contrib.distribute.initialize_tpu_system(resolver)
    strategy = tf.contrib.distribute.TPUStrategy(resolver)
    
from TFBertForTaskTextClassification import TFBertForTaskTextClassification
from TFBertForTaskTextClassification import TFBertForAndroidTaskTextClassification
from TFBertForTaskTextClassification import TFBertForSyntheticTaskTextClassification 

from metrics import MetricsAggregator

Falling back to TensorFlow client; we recommended you install the Cloud TPU client directly with pip install cloud-tpu-client.


# BERT

In [5]:
# Init the model
model = TFBertForSyntheticTaskTextClassification(model_id = 'bert-base-uncased')

# Configure filters. All other values are as default
model.target_output = 10
model.use_frame_filtering = False
model.match_frame_from_task = False
model.n_undersampling = 3 # this dataset is smaller, so we don't do as much undersampling
        
# Load tokenizer
model.tokenizer(cache_dir='/home/msarthur/scratch', local_files_only=True)

In [6]:
# # base + frame-elements
# model.use_frame_filtering = True
# model.match_frame_from_task = False

In [7]:
# # base + frame-associations
# model.use_frame_filtering = False
# model.match_frame_from_task = True

In [8]:
# # base + both filters
# model.use_frame_filtering = True
# model.match_frame_from_task = True

In [None]:
# @title 10-fold cross validation WIP
CORPUS = raw_data

all_tasks = sorted(list(set([d['question'] for d in raw_data])))
rseed = 20210343
random.seed(rseed)
random.shuffle(all_tasks)

from sklearn.model_selection import KFold


file_handler = logging.FileHandler('/home/msarthur/scratch/LOG-bert_ds_synthetic.ans')
file_handler.setLevel(logging.DEBUG)
logger.addHandler(file_handler)


n_splits = 10
kf = KFold(n_splits=n_splits, random_state=rseed)
np_tasks_arr = np.array(all_tasks)


# <------------------------------------------------------------------------- TRAIN
df_train, df_val, _, weights = model.get_train_val_test(
    CORPUS, [all_tasks[0]] # dummy test data
)

logger.info('-' * 10)
logger.info(Fore.RED + 'train'+ Style.RESET_ALL)
logger.info(str(df_train.category_index.value_counts()))
logger.info("")

logger.info(Fore.RED + 'weights'+ Style.RESET_ALL)
logger.info(str(weights))
logger.info('-' * 10)


# Encode X_train
train_encodings = model.encode(df_train)
train_labels = df_train['category_index'].tolist()

# Encode X_valid
val_encodings = model.encode(df_val)
val_labels = df_val['category_index'].tolist()


# https://huggingface.co/transformers/custom_datasets.html
train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings),
    train_labels
))

val_dataset = tf.data.Dataset.from_tensor_slices((
    dict(val_encodings),
    val_labels
))


fine_tunned_keras_model = model.build(
    train_dataset, val_dataset, weights, 
    checkpoint_filepath='/home/msarthur/scratch/best_model', 
    cache_dir='/home/msarthur/scratch', 
    local_files_only=True
)


# <-------------------------------------------------------------------------------- FOLDS
idx_split = 0
for train_index, test_index in kf.split(np_tasks_arr):

    idx_split = str(idx_split)
    eval_fold = True
    # 10 runs per fold to avoid reporting peek results in a given fold
    if idx_split in fold_results and fold_results[idx_split]['run_cnt'] >= NUMBER_OF_RUNS:
        logger.info(Fore.RED + f"Fold {idx_split} FULLY TESTED" + Style.RESET_ALL)
        eval_fold = False


    if eval_fold:
        model.metrics.reset_aggregators()

        test_tasks_lst = np_tasks_arr[test_index].tolist()

        logger.info("")
        logger.info(Fore.RED + f"Fold {idx_split}" + Style.RESET_ALL)
        logger.info('\n'.join(test_tasks_lst))

        # <------------------------------------------------------------------------- INPUT
        _, _, df_test, weights = model.get_train_val_test(
            CORPUS, test_tasks_lst
        )
        
        logger.info(Fore.RED + 'test'+ Style.RESET_ALL)
        logger.info(str(df_test.category_index.value_counts()))
        logger.info("")
        
        if model.match_frame_from_task:
            __frame_pairs = model.fn_frame_pairs.get_most_common_frame_relationships(df_train)
            model.sentence_task_frame_pairs = __frame_pairs

        # <------------------------------------------------------------------------- TEST
        logger.info("")
        logger.info(Fore.RED + f"Testing model" + Style.RESET_ALL)
        for source in df_test["source"].unique():
            df_source = df_test[df_test["source"] == source]   
            logger.info(source)
            
            model.test(source, df_source, fine_tunned_keras_model)
                        

        # <------------------------------------------------------------------------- METRICS   
        
        prediction_metrics, api_metrics, so_metrics, git_metrics, misc_metrics = model.get_evaluation_metrics()
        
        MetricsAggregator.add_idx_fold_results(
            idx_split, fold_results, prediction_metrics,
            api_metrics, so_metrics, git_metrics, misc_metrics
        )

        fold_results['venn_diagram_set'] += model.metrics.venn_diagram_set
        fold_results['venn_diagram_set'] = list(set(fold_results['venn_diagram_set']))


        _precision, _recall, _f1score = MetricsAggregator.avg_macro_metric_for(prediction_metrics)

        logger.info("")
        logger.info(Fore.YELLOW + "Model metrics" + Style.RESET_ALL)
        logger.info("precision: " + Fore.RED + "{:.3f}".format(_precision) + Style.RESET_ALL)
        logger.info("recall:    " + Fore.RED + "{:.3f}".format(_recall) + Style.RESET_ALL)
        logger.info("f1-score:  " + Fore.RED + "{:.3f}".format(_f1score) + Style.RESET_ALL)


    idx_split = int(idx_split)
    idx_split += 1
    logger.info(f"next {idx_split}")


100%|██████████| 7918/7918 [00:00<00:00, 2507398.95it/s]

----------
[31mtrain[0m
0    491
1    164
Name: category_index, dtype: int64

[31mweights[0m
{0: 1.0, 1: 3.0}
----------



All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
AutoGraph could not transform <bound method Socket.send of <zmq.sugar.socket.Socket object at 0x2ba1adb5a3d0>> and will run it as-is.
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module, class, method, function, traceback, frame, or code object was expected, got cython_function_or_method
The parameter `return_dict` cannot be set in graph mode an

100%|██████████| 7918/7918 [00:00<00:00, 1827061.62it/s]

[31mtest[0m
0    706
1     29
Name: category_index, dtype: int64


[31mTesting model[0m
https://developer.android.com/codelabs/basic-android-kotlin-training-recyclerview-scrollable-list





https://stackoverflow.com/questions/33241952
https://guides.codepath.com/android/using-the-recyclerview
https://developer.android.com/reference/org/json/JSONObject
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
--------------------
Y
[0s] 87 [1s] 17
predicted
[0s] 94 [1s] 10
--------------------
Accuracy: 0.7788
macro_f1: 0.5105
Precision: 0.5202
Recall: 0.5128
F1: 0.5105
[31m2[0m entries logged
https://github.com/SundeepK/CompactCalendarView/issues/181
The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
The paramet

100%|██████████| 7918/7918 [00:00<00:00, 1774918.45it/s]

[31mtest[0m
0    722
1     38
Name: category_index, dtype: int64


[31mTesting model[0m
https://developer.android.com/guide/topics/providers/content-provider-creating





https://stackoverflow.com/questions/30362446
--------------------
Y
[0s] 39 [1s] 3
predicted
[0s] 32 [1s] 10
--------------------
Accuracy: 0.7381
macro_f1: 0.4995
Precision: 0.5188
Recall: 0.5513
F1: 0.4995
[31m1[0m entries logged
https://docs.oracle.com/javase/7/docs/api/java/awt/Rectangle.html
https://stackoverflow.com/questions/2883355
--------------------
Y
[0s] 22 [1s] 2
predicted
[0s] 14 [1s] 10
--------------------
Accuracy: 0.6667
macro_f1: 0.5556
Precision: 0.6000
Recall: 0.8182
F1: 0.5556
[31m2[0m entries logged
https://guides.codepath.com/android/creating-and-using-fragments
--------------------
Y
[0s] 153 [1s] 10
predicted
[0s] 153 [1s] 10
--------------------
Accuracy: 0.8773
macro_f1: 0.4673
Precision: 0.4673
Recall: 0.4673
F1: 0.4673
[31m0[0m entries logged
https://developer.android.com/training/basics/firstapp/starting-activity
https://developer.android.com/reference/android/graphics/pdf/PdfRenderer
--------------------
Y
[0s] 36 [1s] 8
predicted
[0s] 34 [1s] 10


100%|██████████| 7918/7918 [00:00<00:00, 1480958.71it/s]

[31mtest[0m
0    1304
1      54
Name: category_index, dtype: int64


[31mTesting model[0m
https://developer.android.com/codelabs/advanced-kotlin-coroutines#7





https://www.avg.com/en/signal/guide-to-android-app-permissions-how-to-use-them-smartly
--------------------
Y
[0s] 158 [1s] 3
predicted
[0s] 151 [1s] 10
--------------------
Accuracy: 0.9565
macro_f1: 0.7194
Precision: 0.6500
Recall: 0.9778
F1: 0.7194
[31m3[0m entries logged
https://www.raywenderlich.com/10091980-testing-rest-apis-using-mockwebserver
https://dzone.com/articles/rxjava-idiomatic-concurrency-flatmap-vs-parallel
--------------------
Y
[0s] 116 [1s] 1
predicted
[0s] 107 [1s] 10
--------------------
Accuracy: 0.9060
macro_f1: 0.4753
Precision: 0.4953
Recall: 0.4569
F1: 0.4753
[31m0[0m entries logged
https://developer.android.com/training/permissions/requesting
--------------------
Y
[0s] 114 [1s] 15
predicted
[0s] 119 [1s] 10
--------------------
Accuracy: 0.8217
macro_f1: 0.4906
Precision: 0.4912
Recall: 0.4939
F1: 0.4906
[31m1[0m entries logged
https://developer.android.com/training/safetynet/recaptcha
--------------------
Y
[0s] 48 [1s] 6
predicted
[0s] 44 [1s] 10
-

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 7918/7918 [00:00<00:00, 1835036.97it/s]

[31mtest[0m
0    781
1     37
Name: category_index, dtype: int64


[31mTesting model[0m
https://developer.android.com/guide/topics/media/camera





--------------------
Y
[0s] 239 [1s] 11
predicted
[0s] 240 [1s] 10
--------------------
Accuracy: 0.9240
macro_f1: 0.5278
Precision: 0.5292
Recall: 0.5266
F1: 0.5278
[31m1[0m entries logged
https://docs.oracle.com/javase/8/javafx/layout-tutorial/size_align.htm
https://developer.android.com/training/notify-user/build-notification
--------------------
Y
[0s] 145 [1s] 2
predicted
[0s] 137 [1s] 10
--------------------
Accuracy: 0.9320
macro_f1: 0.5656
Precision: 0.5464
Recall: 0.7190
F1: 0.5656
[31m1[0m entries logged
https://medium.com/@JasonCromer/android-asynctask-http-request-tutorial-6b429d833e28
--------------------
Y
[0s] 52 [1s] 7
predicted
[0s] 49 [1s] 10
--------------------
Accuracy: 0.7458
macro_f1: 0.4846
Precision: 0.4888
Recall: 0.4849
F1: 0.4846
[31m1[0m entries logged
https://stackoverflow.com/questions/26838730
--------------------
Y
[0s] 18 [1s] 7
predicted
[0s] 15 [1s] 10
--------------------
Accuracy: 0.7200
macro_f1: 0.6881
Precision: 0.6833
Recall: 0.7183
F1: 0

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 7918/7918 [00:00<00:00, 2418475.03it/s]

[31mtest[0m
0    265
1     11
Name: category_index, dtype: int64


[31mTesting model[0m
https://stackoverflow.com/questions/8712652





--------------------
Y
[0s] 15 [1s] 2
predicted
[0s] 7 [1s] 10
--------------------
Accuracy: 0.5294
macro_f1: 0.4848
Precision: 0.6000
Recall: 0.7333
F1: 0.4848
[31m2[0m entries logged
https://dzone.com/articles/android-rotate-and-scale
--------------------
Y
[0s] 58 [1s] 1
predicted
[0s] 49 [1s] 10
--------------------
Accuracy: 0.8475
macro_f1: 0.5489
Precision: 0.5500
Recall: 0.9224
F1: 0.5489
[31m1[0m entries logged
https://github.com/signalapp/Signal-Android/issues/3376
--------------------
Y
[0s] 54 [1s] 3
predicted
[0s] 47 [1s] 10
--------------------
Accuracy: 0.7719
macro_f1: 0.4356
Precision: 0.4681
Recall: 0.4074
F1: 0.4356
[31m0[0m entries logged
https://developer.android.com/work/dpc/dedicated-devices/lock-task-mode
https://github.com/realm/realm-java/issues/776
--------------------
Y
[0s] 31 [1s] 2
predicted
[0s] 23 [1s] 10
--------------------
Accuracy: 0.6364
macro_f1: 0.3889
Precision: 0.4565
Recall: 0.3387
F1: 0.3889
[31m0[0m entries logged
https://stackoverf

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 7918/7918 [00:00<00:00, 1615218.09it/s]

[31mtest[0m
0    770
1     33
Name: category_index, dtype: int64


[31mTesting model[0m
https://stackoverflow.com/questions/10108774
https://www.raywenderlich.com/324-viewpager-tutorial-getting-started-in-kotlin





--------------------
Y
[0s] 165 [1s] 12
predicted
[0s] 167 [1s] 10
--------------------
Accuracy: 0.8870
macro_f1: 0.5153
Precision: 0.5171
Recall: 0.5144
F1: 0.5153
[31m1[0m entries logged
https://developer.android.com/guide/topics/ui/notifiers/notifications
--------------------
Y
[0s] 144 [1s] 2
predicted
[0s] 136 [1s] 10
--------------------
Accuracy: 0.9178
macro_f1: 0.4786
Precision: 0.4926
Recall: 0.4653
F1: 0.4786
[31m0[0m entries logged
https://stackoverflow.com/questions/122105
--------------------
Y
[0s] 130 [1s] 1
predicted
[0s] 121 [1s] 10
--------------------
Accuracy: 0.9313
macro_f1: 0.5730
Precision: 0.5500
Recall: 0.9654
F1: 0.5730
[31m1[0m entries logged
https://developer.android.com/guide/navigation/navigation-custom-back
--------------------
Y
[0s] 25 [1s] 8
predicted
[0s] 26 [1s] 7
--------------------
Accuracy: 0.6667
macro_f1: 0.5255
Precision: 0.5275
Recall: 0.5250
F1: 0.5255
[31m2[0m entries logged
https://stackoverflow.com/questions/24313539
----------

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 7918/7918 [00:00<00:00, 1755590.16it/s]

[31mtest[0m
0    1237
1      44
Name: category_index, dtype: int64


[31mTesting model[0m
https://stackoverflow.com/questions/29923376





--------------------
Y
[0s] 28 [1s] 4
predicted
[0s] 22 [1s] 10
--------------------
Accuracy: 0.8125
macro_f1: 0.7257
Precision: 0.7000
Recall: 0.8929
F1: 0.7257
[31m4[0m entries logged
https://stackoverflow.com/questions/6442054
--------------------
Y
[0s] 14 [1s] 7
predicted
[0s] 11 [1s] 10
--------------------
Accuracy: 0.4762
macro_f1: 0.4565
Precision: 0.4682
Recall: 0.4643
F1: 0.4565
[31m3[0m entries logged
https://developer.android.com/training/dependency-injection/dagger-android
--------------------
Y
[0s] 195 [1s] 1
predicted
[0s] 186 [1s] 10
--------------------
Accuracy: 0.9439
macro_f1: 0.4856
Precision: 0.4973
Recall: 0.4744
F1: 0.4856
[31m0[0m entries logged
https://guides.codepath.com/android/dependency-injection-with-dagger-2
https://stackoverflow.com/questions/57235136
--------------------
Y
[0s] 41 [1s] 3
predicted
[0s] 34 [1s] 10
--------------------
Accuracy: 0.7955
macro_f1: 0.5938
Precision: 0.5853
Recall: 0.7358
F1: 0.5938
[31m2[0m entries logged
https:/

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 7918/7918 [00:00<00:00, 1688812.56it/s]

[31mtest[0m
0    892
1     29
Name: category_index, dtype: int64


[31mTesting model[0m
https://javapapers.com/android/android-location-fused-provider





--------------------
Y
[0s] 97 [1s] 2
predicted
[0s] 89 [1s] 10
--------------------
Accuracy: 0.8990
macro_f1: 0.5565
Precision: 0.5444
Recall: 0.7036
F1: 0.5565
[31m1[0m entries logged
https://developer.android.com/reference/android/widget/TextView
--------------------
Y
[0s] 468 [1s] 2
predicted
[0s] 460 [1s] 10
--------------------
Accuracy: 0.9745
macro_f1: 0.4935
Precision: 0.4978
Recall: 0.4893
F1: 0.4935
[31m0[0m entries logged
https://www.toptal.com/android/android-developers-guide-to-google-location-services-api
--------------------
Y
[0s] 113 [1s] 6
predicted
[0s] 109 [1s] 10
--------------------
Accuracy: 0.8824
macro_f1: 0.5310
Precision: 0.5271
Recall: 0.5435
F1: 0.5310
[31m1[0m entries logged
https://developer.android.com/guide/topics/media/mediarecorder
--------------------
Y
[0s] 45 [1s] 4
predicted
[0s] 39 [1s] 10
--------------------
Accuracy: 0.7551
macro_f1: 0.5000
Precision: 0.5115
Recall: 0.5250
F1: 0.5000
[31m1[0m entries logged
https://github.com/google

100%|██████████| 7918/7918 [00:00<00:00, 1940659.09it/s]

[31mtest[0m
0    377
1     11
Name: category_index, dtype: int64


[31mTesting model[0m
https://github.com/google/ExoPlayer/issues/8387





--------------------
Y
[0s] 27 [1s] 5
predicted
[0s] 22 [1s] 10
--------------------
Accuracy: 0.6562
macro_f1: 0.5211
Precision: 0.5318
Recall: 0.5519
F1: 0.5211
[31m2[0m entries logged
https://developer.android.com/training/gestures/scale
https://stackoverflow.com/questions/10630373
--------------------
Y
[0s] 29 [1s] 3
predicted
[0s] 22 [1s] 10
--------------------
Accuracy: 0.7188
macro_f1: 0.5656
Precision: 0.5773
Recall: 0.6954
F1: 0.5656
[31m2[0m entries logged
https://developer.android.com/guide/background/threading
https://www.twilio.com/blog/asynchronous-api-requests-java-completablefutures
--------------------
Y
[0s] 48 [1s] 2
predicted
[0s] 40 [1s] 10
--------------------
Accuracy: 0.8000
macro_f1: 0.5265
Precision: 0.5375
Recall: 0.6562
F1: 0.5265
[31m1[0m entries logged
https://stackoverflow.com/questions/2661536
--------------------
Y
[0s] 99 [1s] 1
predicted
[0s] 90 [1s] 10
--------------------
Accuracy: 0.8900
macro_f1: 0.4709
Precision: 0.4944
Recall: 0.4495
F1:

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
100%|██████████| 7918/7918 [00:00<00:00, 2130653.69it/s]

[31mtest[0m
0    553
1     25
Name: category_index, dtype: int64


[31mTesting model[0m
https://developer.android.com/training/dependency-injection/hilt-android





--------------------
Y
[0s] 141 [1s] 4
predicted
[0s] 135 [1s] 10
--------------------
Accuracy: 0.9034
macro_f1: 0.4746
Precision: 0.4852
Recall: 0.4645
F1: 0.4746
[31m0[0m entries logged
https://developer.android.com/training/data-storage/sqlite
--------------------
Y
[0s] 67 [1s] 2
predicted
[0s] 59 [1s] 10
--------------------
Accuracy: 0.8261
macro_f1: 0.4524
Precision: 0.4831
Recall: 0.4254
F1: 0.4524
[31m0[0m entries logged
https://github.com/google/dagger/issues/1991
https://medium.com/mindorks/how-to-pass-large-data-between-server-and-client-android-securely-345fed551651
https://stackoverflow.com/questions/4015026


In [None]:
# with open(config_output, 'w') as outfile:
#     json.dump(fold_results, outfile, sort_keys=True, indent=4)
#     logger.info(Fore.RED + "Output successfully saved to: {}".format(config_output) + Style.RESET_ALL)

## Results

In [None]:
__precision, __recall, __fscore = MetricsAggregator.get_full_exec_results(fold_results)

logger.info("\n")
logger.info(Fore.RED + "AGGREGATED METRICS" + Style.RESET_ALL)
logger.info("\nprecision: " + Fore.RED + "{:.3f}".format(np.mean(__precision)) + Style.RESET_ALL)
logger.info("recall:    " + Fore.RED + "{:.3f}".format(np.mean(__recall)) + Style.RESET_ALL)
logger.info("f1-score:  " +  Fore.RED + "{:.3f}".format(np.mean(__fscore)) + Style.RESET_ALL)

## Stack Overflow results

In [None]:
__precision, __recall, __fscore = MetricsAggregator.get_full_exec_results(fold_results, result_type="so")

logger.info("\n")
logger.info(Fore.RED + "Stack Overflow metrics" + Style.RESET_ALL)
logger.info("\nprecision: " + Fore.RED + "{:.3f}".format(np.mean(__precision)) + Style.RESET_ALL)
logger.info("recall:    " + Fore.RED + "{:.3f}".format(np.mean(__recall)) + Style.RESET_ALL)
logger.info("f1-score:  " +  Fore.RED + "{:.3f}".format(np.mean(__fscore)) + Style.RESET_ALL)

## Github issues results

In [None]:
__precision, __recall, __fscore = MetricsAggregator.get_full_exec_results(fold_results, result_type="git")

logger.info("\n")
logger.info(Fore.RED + "Github issues metrics" + Style.RESET_ALL)
logger.info("\nprecision: " + Fore.RED + "{:.3f}".format(np.mean(__precision)) + Style.RESET_ALL)
logger.info("recall:    " + Fore.RED + "{:.3f}".format(np.mean(__recall)) + Style.RESET_ALL)
logger.info("f1-score:  " +  Fore.RED + "{:.3f}".format(np.mean(__fscore)) + Style.RESET_ALL)

## Examples of text retrieved

In [None]:
#@title Sample prediction outputs for API sources

logger.info(Fore.RED + "API" + Style.RESET_ALL)
model.metrics.examples_per_source_type(source_type='api', n_samples=8)

In [None]:
#@title Sample prediction outputs for GIT sources

logger.info(Fore.RED + "GIT" + Style.RESET_ALL)
model.metrics.examples_per_source_type(source_type='git', n_samples=8)

In [None]:
#@title Sample prediction outputs for SO sources

logger.info(Fore.RED + "SO" + Style.RESET_ALL)
model.metrics.examples_per_source_type(source_type='so', n_samples=8)

In [None]:
#@title Sample prediction outputs for MISC sources

logger.info(Fore.RED + "MISC" + Style.RESET_ALL)
model.metrics.examples_per_source_type(source_type='misc', n_samples=8)

In [None]:
logger.info(Fore.RED + f"{len(fold_results['venn_diagram_set'])} entries VENN SET" + Style.RESET_ALL)
for _t in fold_results['venn_diagram_set']:
    logger.info(_t)