In [1]:
%load_ext autoreload
%autoreload 2

Based on 



1.   https://towardsdatascience.com/hugging-face-transformers-fine-tuning-distilbert-for-binary-classification-tasks-490f1d192379
2.   https://www.analyticsvidhya.com/blog/2020/07/transfer-learning-for-nlp-fine-tuning-bert-for-text-classification/
3.   https://huggingface.co/transformers/training.html#fine-tuning-with-keras




**problem statement:**


*   a developer has to inspect an **artifact X**
*   Within the artifact, only a portion of the text is relevant to **input task Y**
*   We ought to build a model that establishes relationships between **Y** and **sentences x ∈ X** 
*  The model must determine: **is x relevant to task Y**




<br>

___

*Example of a task and an annotated artifact:*

<br>

[<img src="https://i.imgur.com/Zj1317H.jpg">](https://i.imgur.com/Zj1317H.jpg)




* The coloured sentences are sentences annotated as relevant to the input task. 
* The warmer the color, the more annotators selected that portion of the text. 
* For simplicity, we process the data and used sentences 

<br>

___

*Ultimately, our data is a tuple representing:*


*   **text** = artifact sentence

*   **question** = task description

*   **source** = URL of the artifact

*   **category_index** = whether sentence is relevant [or not] for the input task

*   **weights** = number of participants who annotated sentence as relevant


<br>

___



In [2]:
# @title Import data as JSON
import itertools
import json
import logging
import os
import sys
import random
from pathlib import Path

from Levenshtein import ratio
from colorama import Fore, Style

logger = logging.getLogger()
logger.level = logging.DEBUG
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)

from ds_android import get_input_for_BERT
from data import greedy_stack_overflow_selection

raw_data = get_input_for_BERT()

print('Sample entry from data:')
print(json.dumps(raw_data[0], indent=4, sort_keys=True))

[31m3 [33m17 [0m https://stackoverflow.com/questions/8712652
[31m8 [33m59 [0m https://dzone.com/articles/android-rotate-and-scale
[31m20 [33m145 [0m https://developer.android.com/training/dependency-injection/hilt-android
[31m4 [33m8 [0m https://stackoverflow.com/questions/30648172
[31m4 [33m81 [0m https://github.com/google/dagger/issues/1991
[31m9 [33m48 [0m https://prog.world/a-practical-guide-to-using-hilt-with-kotlin
[31m6 [33m33 [0m https://github.com/realm/realm-java/issues/776
[31m39 [33m129 [0m https://developer.android.com/training/permissions/requesting
[31m14 [33m21 [0m https://stackoverflow.com/questions/5233543
[31m4 [33m34 [0m https://github.com/morenoh149/react-native-contacts/issues/516
[31m27 [33m63 [0m https://guides.codepath.com/android/Understanding-App-Permissions
[31m9 [33m161 [0m https://www.avg.com/en/signal/guide-to-android-app-permissions-how-to-use-them-smartly
[31m5 [33m470 [0m https://developer.android.com/reference/an

[31m12 [33m77 [0m https://www.hongkiat.com/blog/solve-android-delayed-notifications
[31m4 [33m40 [0m https://developer.android.com/training/gestures/scale
[31m6 [33m32 [0m https://stackoverflow.com/questions/10630373
[31m20 [33m196 [0m https://developer.android.com/training/dependency-injection/dagger-android
[31m6 [33m44 [0m https://stackoverflow.com/questions/57235136
[31m24 [33m121 [0m https://guides.codepath.com/android/dependency-injection-with-dagger-2
Sample entry from data:
{
    "category_index": 1,
    "question": "Android: rotate canvas around the center of the screen",
    "source": "https://stackoverflow.com/questions/8712652",
    "text": "You have to rotate the canvas first and then draw whatever you want.",
    "weights": 1
}


In [3]:
# @title DICT that will store fold results
# If there is a previous execution for the same configuration, we load it from disk

# final results are the average of 3 distinct runs of this script.
# reason: avoid phishing results when BERT training procedures were exceptionally good
NUMBER_OF_RUNS = 1
config_output = 'output/bert_ds_android_base.json'
# config_output = 'output/bert_ds_android_fe.json' # for frame-elements filter
# config_output = 'output/bert_ds_android_fa.json' # for frame-association filters

fold_results = dict()

if os.path.isfile(config_output):
    logger.info(Fore.YELLOW + "Loading data from cache" + Style.RESET_ALL)
    with open(config_output) as input_file:
        fold_results = json.load(input_file)
        
if 'venn_diagram_set' not in fold_results:
    fold_results['venn_diagram_set'] = []     

In [4]:
# @title Set environment variables

import os
import contextlib
import tensorflow as tf
import os
import codecs
import numpy as np
import math
import json

import numpy as np
import pandas as pd

from collections import defaultdict, Counter
from tqdm import tqdm

USE_TPU = False
os.environ['TF_KERAS'] = '1'

# @title Initialize TPU Strategy
if USE_TPU:
    TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR']
    resolver = tf.contrib.cluster_resolver.TPUClusterResolver(TPU_WORKER)
    tf.contrib.distribute.initialize_tpu_system(resolver)
    strategy = tf.contrib.distribute.TPUStrategy(resolver)
    
from TFBertForTaskTextClassification import TFBertForTaskTextClassification
from TFBertForTaskTextClassification import TFBertForAndroidTaskTextClassification
from TFBertForTaskTextClassification import TFBertForSyntheticTaskTextClassification 

from metrics import MetricsAggregator

Falling back to TensorFlow client; we recommended you install the Cloud TPU client directly with pip install cloud-tpu-client.


  _dtype_to_storage = {data_type(0).dtype: data_type for data_type in _storages}


# BERT

In [5]:
# Init the model
model = TFBertForAndroidTaskTextClassification(model_id = '/home/msarthur/scratch/bert-base-uncased')

# Configure filters. All other values are as default
model.target_output = 10
model.use_frame_filtering = False
model.match_frame_from_task = False
model.n_undersampling = 4
        
# Load tokenizer
model.tokenizer(local_files_only=True)

In [6]:
# # base + frame-elements
# model.use_frame_filtering = True
# model.match_frame_from_task = False

In [7]:
# # base + frame-associations
# model.use_frame_filtering = False
# model.match_frame_from_task = True

In [8]:
# @title 10-fold cross validation WIP
CORPUS = raw_data

all_tasks = sorted(list(set([d['question'] for d in raw_data])))
rseed = 20210343
random.seed(rseed)
random.shuffle(all_tasks)

from sklearn.model_selection import KFold


file_handler = logging.FileHandler('/home/msarthur/scratch/LOG-bert_ds_android.ans')
file_handler.setLevel(logging.DEBUG)
logger.addHandler(file_handler)


n_splits = 10
kf = KFold(n_splits=n_splits, random_state=rseed)
np_tasks_arr = np.array(all_tasks)


idx_split = 0
# FIXME: folds are actually the total number of runs
for __idx in range(NUMBER_OF_RUNS):

    idx_split = str(idx_split)
    eval_fold = True
    # 10 runs per fold to avoid reporting peek results in a given fold
    if idx_split in fold_results and fold_results[idx_split]['run_cnt'] >= NUMBER_OF_RUNS:
        logger.info(Fore.RED + f"Fold {idx_split} FULLY TESTED" + Style.RESET_ALL)
        eval_fold = False


    if eval_fold:
        model.metrics.reset_aggregators()

#         test_tasks_lst = np_tasks_arr[test_index].tolist()
        test_tasks_lst = greedy_stack_overflow_selection(raw_data, target_count=0.3)

        logger.info("")
        logger.info(Fore.RED + f"Fold {idx_split}" + Style.RESET_ALL)
        logger.info('\n'.join(test_tasks_lst))

        # <------------------------------------------------------------------------- INPUT
        df_train, df_val, df_test, weights = model.get_train_val_test(
            CORPUS, test_tasks_lst
        )
        

        logger.info('-' * 10)
        logger.info(Fore.RED + 'train'+ Style.RESET_ALL)
        logger.info(str(df_train.category_index.value_counts()))
        logger.info("")

        logger.info(Fore.RED + 'test'+ Style.RESET_ALL)
        logger.info(str(df_test.category_index.value_counts()))
        logger.info("")

        logger.info(Fore.RED + 'weights'+ Style.RESET_ALL)
        logger.info(str(weights))
        logger.info('-' * 10)
        
        # <------------------------------------------------------------------------- TRAIN

        # Encode X_train
        train_encodings = model.encode(df_train)
        train_labels = df_train['category_index'].tolist()

        # Encode X_valid
        val_encodings = model.encode(df_val)
        val_labels = df_val['category_index'].tolist()


        # https://huggingface.co/transformers/custom_datasets.html
        train_dataset = tf.data.Dataset.from_tensor_slices((
            dict(train_encodings),
            train_labels
        ))

        val_dataset = tf.data.Dataset.from_tensor_slices((
            dict(val_encodings),
            val_labels
        ))


        fine_tunned_keras_model = model.build(
            train_dataset, val_dataset, weights, 
            checkpoint_filepath='/home/msarthur/scratch/best_model', 
            cache_dir='/home/msarthur/scratch', 
            local_files_only=True
        )
        
        
        if model.match_frame_from_task:
            __frame_pairs = model.fn_frame_pairs.get_most_common_frame_relationships(df_train)
            model.sentence_task_frame_pairs = __frame_pairs

        # <------------------------------------------------------------------------- TEST
        logger.info("")
        logger.info(Fore.RED + f"Testing model" + Style.RESET_ALL)
        for source in df_test["source"].unique():
            df_source = df_test[df_test["source"] == source]   
            logger.info(source)
            
            model.test(source, df_source, fine_tunned_keras_model)
                        

        # <------------------------------------------------------------------------- METRICS   
        
        prediction_metrics, api_metrics, so_metrics, git_metrics, misc_metrics = model.get_evaluation_metrics()
        
        MetricsAggregator.add_idx_fold_results(
            idx_split, fold_results, prediction_metrics,
            api_metrics, so_metrics, git_metrics, misc_metrics
        )

        fold_results['venn_diagram_set'] += model.metrics.venn_diagram_set
        fold_results['venn_diagram_set'] = list(set(fold_results['venn_diagram_set']))


        _precision, _recall, _f1score = MetricsAggregator.avg_macro_metric_for(prediction_metrics)

        logger.info("")
        logger.info(Fore.YELLOW + "Model metrics" + Style.RESET_ALL)
        logger.info("precision: " + Fore.RED + "{:.3f}".format(_precision) + Style.RESET_ALL)
        logger.info("recall:    " + Fore.RED + "{:.3f}".format(_recall) + Style.RESET_ALL)
        logger.info("f1-score:  " + Fore.RED + "{:.3f}".format(_f1score) + Style.RESET_ALL)


    idx_split = int(idx_split)
    idx_split += 1
    logger.info(f"next {idx_split}")

    


[31mFold 0[0m
Android: rotate canvas around the center of the screen
Support for GoogleApiClient and new FusedLocationProviderApi
 height must be > 0
Java: Efficient ArrayList filtering?
how can i get the value of text view in recyclerview item?
Android App Retrieve Data from Server but in a Secure way
No lock screen controls ever
How to record phone calls in Android
Android Gallery with pinch zoom
Why settings.xml layout is overlapping the ActionBar/Toolbar?
Hilt: How to prevent Hilt from picking dependency from a library?
Don’t leak MockWebServer ports across tests
Doesn't scroll properly inside ViewPager
polymorphic deserialization of JSON with jackson, property type becomes &quot;null&quot;


100%|██████████| 7926/7926 [00:00<00:00, 917254.46it/s]

----------
[31mtrain[0m
0    896
1    224
Name: category_index, dtype: int64

[31mtest[0m
0    2260
1      63
Name: category_index, dtype: int64

[31mweights[0m
{0: 1.0, 1: 4.0}
----------



All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at /home/msarthur/scratch/bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.62276, saving model to /home/msarthur/scratch/best_model
Epoch 2/10

Epoch 00002: val_loss improved from 0.62276 to 0.59276, saving model to /home/msarthur/scratch/best_model
Epoch 3/10

Epoch 00003: val_loss improved from 0.59276 to 0.57057, saving model to /home/msarthur/scratch/best_model
Epoch 4/10

Epoch 00004: val_loss improved from 0.57057 to 0.53044, saving model to /home/msarthur/scratch/best_model
Epoch 5/10

Epoch 00005: val_loss did not improve from 0.53044
Epoch 6/10

Epoch 00006: val_loss improved from 0.53044 to 0.50388, saving model to /home/msarthur/scratch/best_model
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.50388
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.50388
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.50388
Epoch 10/10
Restoring model weights from the end of the best epoch.

Epoch 00010: val_loss did not improve from 0.50388
Epoch 00010: early stopping

[31mTesting 

Accuracy: 0.7708
macro_f1: 0.5107
Precision: 0.5237
Recall: 0.5667
F1: 0.5107
[31m1[0m entries logged
https://developer.android.com/training/location/retrieve-current
https://stackoverflow.com/questions/24652078
--------------------
Y
[0s] 8 [1s] 3
predicted
[0s] 7 [1s] 4
--------------------
Accuracy: 0.7273
macro_f1: 0.6857
Precision: 0.6786
Recall: 0.7083
F1: 0.6857
[31m2[0m entries logged
https://stackoverflow.com/questions/6688444
--------------------
Y
[0s] 5 [1s] 4
predicted
[0s] 1 [1s] 8
--------------------
Accuracy: 0.5556
macro_f1: 0.5000
Precision: 0.7500
Recall: 0.6000
F1: 0.5000
[31m4[0m entries logged
https://medium.com/@david.truong510/jackson-polymorphic-deserialization-91426e39b96a
--------------------
Y
[0s] 13 [1s] 3
predicted
[0s] 6 [1s] 10
--------------------
Accuracy: 0.3125
macro_f1: 0.2874
Precision: 0.3833
Recall: 0.3205
F1: 0.2874
[31m1[0m entries logged
https://stackoverflow.com/questions/38980595
--------------------
Y
[0s] 3 [1s] 2
predicted
[0s] 

In [9]:
# with open(config_output, 'w') as outfile:
#     json.dump(fold_results, outfile, sort_keys=True, indent=4)
#     logger.info(Fore.RED + "Output successfully saved to: {}".format(config_output) + Style.RESET_ALL)

## Results

In [10]:
__precision, __recall, __fscore = MetricsAggregator.get_full_exec_results(fold_results)

logger.info("\n")
logger.info(Fore.RED + "AGGREGATED METRICS" + Style.RESET_ALL)
logger.info("\nprecision: " + Fore.RED + "{:.3f}".format(np.mean(__precision)) + Style.RESET_ALL)
logger.info("recall:    " + Fore.RED + "{:.3f}".format(np.mean(__recall)) + Style.RESET_ALL)
logger.info("f1-score:  " +  Fore.RED + "{:.3f}".format(np.mean(__fscore)) + Style.RESET_ALL)



[31mAGGREGATED METRICS[0m

precision: [31m0.553[0m
recall:    [31m0.585[0m
f1-score:  [31m0.517[0m


In [11]:
if np.mean(__precision) >= 0.53:
    with open(config_output, 'w') as outfile:
        json.dump(fold_results, outfile, sort_keys=True, indent=4)
        logger.info(Fore.RED + "Output successfully saved to: {}".format(config_output) + Style.RESET_ALL)

[31mOutput successfully saved to: output/bert_ds_android_base.json[0m


In [12]:
# with open(config_output, 'w') as outfile:
#     json.dump(fold_results, outfile, sort_keys=True, indent=4)
#     logger.info(Fore.RED + "Output successfully saved to: {}".format(config_output) + Style.RESET_ALL)

## Stack Overflow results

In [13]:
__precision, __recall, __fscore = MetricsAggregator.get_full_exec_results(fold_results, result_type="so")

logger.info("\n")
logger.info(Fore.RED + "Stack Overflow metrics" + Style.RESET_ALL)
logger.info("\nprecision: " + Fore.RED + "{:.3f}".format(np.mean(__precision)) + Style.RESET_ALL)
logger.info("recall:    " + Fore.RED + "{:.3f}".format(np.mean(__recall)) + Style.RESET_ALL)
logger.info("f1-score:  " +  Fore.RED + "{:.3f}".format(np.mean(__fscore)) + Style.RESET_ALL)




[31mStack Overflow metrics[0m

precision: [31m0.612[0m
recall:    [31m0.644[0m
f1-score:  [31m0.565[0m


## Examples of text retrieved

In [14]:
#@title Sample prediction outputs for API sources

logger.info(Fore.RED + "API" + Style.RESET_ALL)
model.metrics.examples_per_source_type(source_type='api', n_samples=8)

[31mAPI[0m


In [15]:
#@title Sample prediction outputs for SO sources

logger.info(Fore.RED + "SO" + Style.RESET_ALL)
model.metrics.examples_per_source_type(source_type='so', n_samples=8)

[31mSO[0m

[31mHow to record phone calls in Android[0m
https://stackoverflow.com/questions/6688444

[w=0][31m[y=1][33m[p=0.8558][0m to record just hit the menu button while in call in android phone it will store conversation in amr format and in root directory of sd card max 20 min conversation.

[w=0][31m[y=1][33m[p=0.8443][0m First off, you have to be careful with recording calls as there are legal requirements depending on the country.

[w=2][31m[y=1][33m[p=0.8433][0m I am using mic to record phone audio and also use the Telephony manager to find the calling state.

[w=2][31m[y=1][33m[p=0.8372][0m I am using mic to record calls for better support and compatibility.

[w=2][31m[y=1][33m[p=0.8359][0m I haven't tried recording phone call's but there is a option in LINK for:

[w=2][31m[y=1][33m[p=0.8348][0m Here is a LINK on how to record audio using the LINK.

[w=0][31m[y=1][33m[p=0.8233][0m after that, you can easily start recording anywhere you want

[w=0][31