In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from ast import literal_eval
from tqdm import tqdm
from sklearn.metrics import classification_report
tqdm.pandas()
import random
random.seed(0)
from belt_nlp.bert_with_pooling import BertClassifierWithPooling

In [2]:
raw_data_path = '../data/raw/'
processed_data_path = '../data/processed/'
reports_path = '../reports/'
file_format_users_filtered = processed_data_path + 'r3_{target}_{split}_users_scored_Timeline.csv' 
file_format_tmt_filtered = processed_data_path + '{split}_r3_{target}_top_mentioned_timelines_scored_Texts.csv'

In [3]:
target_list = [
    'ig',
    'bo', 
    'cl', 
    'co', 
    'gl', 
    'lu'
    ]

In [4]:
dict_experiments = {
    'filtered_Texts5': {
        "text_col": 'Texts',
        "n_comments": 5,
        "file_format": file_format_tmt_filtered
    },
    'filteredTimeline5': {
        "text_col": 'Timeline',
        "n_comments": 5,
        "file_format": file_format_users_filtered
    },
    'filtered_Texts10': {
        "text_col": 'Texts',
        "n_comments": 10,
        "file_format": file_format_tmt_filtered
    },
    'filteredTimeline10': {
        "text_col": 'Timeline',
        "n_comments": 10,
        "file_format": file_format_users_filtered
    },
    'filtered_Texts15': {
        "text_col": 'Texts',
        "n_comments": 15,
        "file_format": file_format_tmt_filtered
    },
    'filteredTimeline15': {
        "text_col": 'Timeline',
        "n_comments": 15,
        "file_format": file_format_users_filtered
    },
}

# Example - Model BERT with pooling

In this notebook we will show how to use basic methods `fit` and `predict` for the BERT model with pooling.

In [5]:
!export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
!export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

In [6]:
bert_model_name = 'pablocosta/bertabaporu-base-uncased'
epochs = 3
batch_size = 3

MODEL_PARAMS = {
    "batch_size": batch_size,
    "learning_rate": 5e-5,
    "epochs": epochs,
    "chunk_size": 510,
    "stride": 510,
    "minimal_chunk_length": 510,
    "pooling_strategy": "mean",
    "pretrained_model_name_or_path": bert_model_name
}

In [7]:
bert_model_name.replace('/','_')

'pablocosta_bertabaporu-base-uncased'

In [8]:
for exp_name, config in dict_experiments.items():
    
    print(f"""####################################  
# Running {exp_name}
#####################################""")
    
    
    # get configs of experiments
    text_col = config['text_col']
    file_format = config['file_format']
    n_comments = config['n_comments']
    file_format = config['file_format']
    
    
    list_responses = []
    for target in target_list:
        
        print(target)
        
        # read data
        train = pd.read_csv(
            file_format.format(target = target, split = "train"), 
            sep = ';', 
            encoding='utf-8-sig'
            )

        train[f'comments_and_scores_{text_col}'] = train[f'comments_and_scores_{text_col}'].progress_apply(lambda x: literal_eval(x))

        train[text_col] = train[f'comments_and_scores_{text_col}'].progress_apply(
            lambda x: " # ".join([comment for score, comment in x[-n_comments:]])
            ) 
        train.Polarity = train.Polarity.map({
            "against": False,
            "for": True
        })
        test = pd.read_csv(
            file_format.format(target = target, split = "test"), 
            sep = ';', 
            encoding='utf-8-sig'
            )

        test[f'comments_and_scores_{text_col}'] = test[f'comments_and_scores_{text_col}'].progress_apply(lambda x: literal_eval(x))

        test[text_col] = test[f'comments_and_scores_{text_col}'].progress_apply(
            lambda x: " # ".join([comment for score, comment in x[-n_comments:]])
            ) 

        test.Polarity = test.Polarity.map({
            "against": False,
            "for": True
        })
        
        
        X_train = train[text_col].tolist()
        X_test = test[text_col].tolist()

        y_train = train["Polarity"].tolist()
        y_test = test["Polarity"].tolist()        
        
        torch.cuda.empty_cache()
        
        model = BertClassifierWithPooling(**MODEL_PARAMS, device="cuda:0")
        
        model.fit(X_train, y_train, epochs=epochs)
        
        y_pred = model.predict_classes(X_test)
        
        del model
        
        df_responses = pd.DataFrame({
            "y_test": y_test,
            "y_pred": y_pred
        })
        
        df_responses['target'] =target
        df_responses['exp_name'] = exp_name
        df_responses['n_comments'] = n_comments
        df_responses['text_col'] = text_col
        
        print(classification_report(y_test, y_pred))
        df_responses.to_csv(f'{reports_path}test_results/belt_{exp_name}_{bert_model_name.replace('/','_')}_test_results_part_{target}.csv')  
        
        list_responses.append(df_responses)
        
df_responses_final = pd.concat(list_responses)
df_responses_final.to_csv(f'{reports_path}test_results/belt_{exp_name}_{bert_model_name.replace('/','_')}_test_results.csv')


####################################  
# Running filtered_Texts5
#####################################
ig


100%|██████████| 1758/1758 [00:34<00:00, 51.26it/s]
100%|██████████| 1758/1758 [00:00<00:00, 427280.90it/s]
100%|██████████| 599/599 [00:12<00:00, 49.48it/s]
100%|██████████| 599/599 [00:00<00:00, 352319.18it/s]


              precision    recall  f1-score   support

       False       0.66      0.61      0.63       339
        True       0.54      0.60      0.57       260

    accuracy                           0.60       599
   macro avg       0.60      0.60      0.60       599
weighted avg       0.61      0.60      0.61       599

bo


100%|██████████| 556/556 [00:10<00:00, 55.52it/s]
100%|██████████| 556/556 [00:00<00:00, 334006.45it/s]
100%|██████████| 188/188 [00:03<00:00, 60.09it/s]
100%|██████████| 188/188 [00:00<00:00, 260584.65it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

       False       0.86      1.00      0.93       162
        True       0.00      0.00      0.00        26

    accuracy                           0.86       188
   macro avg       0.43      0.50      0.46       188
weighted avg       0.74      0.86      0.80       188

cl


100%|██████████| 1710/1710 [00:44<00:00, 38.01it/s]
100%|██████████| 1710/1710 [00:00<00:00, 342024.79it/s]
100%|██████████| 574/574 [00:15<00:00, 36.35it/s]
100%|██████████| 574/574 [00:00<00:00, 352855.12it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

       False       0.00      0.00      0.00       289
        True       0.50      1.00      0.66       285

    accuracy                           0.50       574
   macro avg       0.25      0.50      0.33       574
weighted avg       0.25      0.50      0.33       574

co


100%|██████████| 2289/2289 [00:57<00:00, 40.04it/s]
100%|██████████| 2289/2289 [00:00<00:00, 373033.45it/s]
100%|██████████| 774/774 [00:19<00:00, 39.52it/s]
100%|██████████| 774/774 [00:00<00:00, 399113.76it/s]


              precision    recall  f1-score   support

       False       0.67      0.01      0.01       354
        True       0.54      1.00      0.70       420

    accuracy                           0.54       774
   macro avg       0.61      0.50      0.36       774
weighted avg       0.60      0.54      0.39       774

gl


100%|██████████| 1210/1210 [00:30<00:00, 39.45it/s]
100%|██████████| 1210/1210 [00:00<00:00, 388718.43it/s]
100%|██████████| 411/411 [00:10<00:00, 39.09it/s]
100%|██████████| 411/411 [00:00<00:00, 276925.13it/s]


              precision    recall  f1-score   support

       False       0.50      0.45      0.47       167
        True       0.65      0.69      0.67       244

    accuracy                           0.59       411
   macro avg       0.57      0.57      0.57       411
weighted avg       0.59      0.59      0.59       411

lu


100%|██████████| 804/804 [00:15<00:00, 52.65it/s]
100%|██████████| 804/804 [00:00<00:00, 330836.89it/s]
100%|██████████| 272/272 [00:05<00:00, 54.24it/s]
100%|██████████| 272/272 [00:00<00:00, 354191.46it/s]


              precision    recall  f1-score   support

       False       0.59      0.61      0.60       143
        True       0.55      0.53      0.54       129

    accuracy                           0.57       272
   macro avg       0.57      0.57      0.57       272
weighted avg       0.57      0.57      0.57       272

####################################  
# Running filteredTimeline5
#####################################
ig


100%|██████████| 1796/1796 [01:11<00:00, 25.20it/s]
100%|██████████| 1796/1796 [00:00<00:00, 400476.87it/s]
100%|██████████| 599/599 [00:24<00:00, 24.32it/s]
100%|██████████| 599/599 [00:00<00:00, 426986.42it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

       False       0.57      1.00      0.72       339
        True       0.00      0.00      0.00       260

    accuracy                           0.57       599
   macro avg       0.28      0.50      0.36       599
weighted avg       0.32      0.57      0.41       599

bo


100%|██████████| 563/563 [00:21<00:00, 25.80it/s]
100%|██████████| 563/563 [00:00<00:00, 227911.70it/s]
100%|██████████| 188/188 [00:07<00:00, 26.25it/s]
100%|██████████| 188/188 [00:00<00:00, 278435.44it/s]


              precision    recall  f1-score   support

       False       0.93      0.85      0.89       162
        True       0.40      0.62      0.48        26

    accuracy                           0.82       188
   macro avg       0.67      0.73      0.69       188
weighted avg       0.86      0.82      0.83       188

cl


100%|██████████| 1721/1721 [00:35<00:00, 48.70it/s]
100%|██████████| 1721/1721 [00:00<00:00, 441573.21it/s]
100%|██████████| 574/574 [00:11<00:00, 48.20it/s]
100%|██████████| 574/574 [00:00<00:00, 426942.81it/s]


              precision    recall  f1-score   support

       False       0.87      0.73      0.79       289
        True       0.76      0.89      0.82       285

    accuracy                           0.81       574
   macro avg       0.81      0.81      0.81       574
weighted avg       0.82      0.81      0.81       574

co


100%|██████████| 2319/2319 [00:47<00:00, 48.32it/s]
100%|██████████| 2319/2319 [00:00<00:00, 407481.82it/s]
100%|██████████| 774/774 [00:16<00:00, 46.56it/s]
100%|██████████| 774/774 [00:00<00:00, 450637.33it/s]


              precision    recall  f1-score   support

       False       0.83      0.75      0.79       354
        True       0.80      0.87      0.84       420

    accuracy                           0.81       774
   macro avg       0.82      0.81      0.81       774
weighted avg       0.82      0.81      0.81       774

gl


100%|██████████| 1231/1231 [00:50<00:00, 24.41it/s]
100%|██████████| 1231/1231 [00:00<00:00, 322779.96it/s]
100%|██████████| 411/411 [00:17<00:00, 23.29it/s]
100%|██████████| 411/411 [00:00<00:00, 376142.03it/s]


              precision    recall  f1-score   support

       False       0.48      0.35      0.41       167
        True       0.63      0.74      0.68       244

    accuracy                           0.58       411
   macro avg       0.55      0.55      0.54       411
weighted avg       0.57      0.58      0.57       411

lu


100%|██████████| 816/816 [00:31<00:00, 25.72it/s]
100%|██████████| 816/816 [00:00<00:00, 387561.10it/s]
100%|██████████| 272/272 [00:10<00:00, 25.38it/s]
100%|██████████| 272/272 [00:00<00:00, 346868.56it/s]


              precision    recall  f1-score   support

       False       0.80      0.48      0.60       143
        True       0.60      0.87      0.71       129

    accuracy                           0.66       272
   macro avg       0.70      0.67      0.65       272
weighted avg       0.70      0.66      0.65       272

####################################  
# Running filtered_Texts10
#####################################
ig


100%|██████████| 1758/1758 [00:33<00:00, 52.80it/s]
100%|██████████| 1758/1758 [00:00<00:00, 336079.60it/s]
100%|██████████| 599/599 [00:11<00:00, 51.66it/s]
100%|██████████| 599/599 [00:00<00:00, 238548.05it/s]


              precision    recall  f1-score   support

       False       0.65      0.73      0.69       339
        True       0.58      0.49      0.53       260

    accuracy                           0.62       599
   macro avg       0.61      0.61      0.61       599
weighted avg       0.62      0.62      0.62       599

bo


100%|██████████| 556/556 [00:09<00:00, 55.80it/s]
100%|██████████| 556/556 [00:00<00:00, 268853.24it/s]
100%|██████████| 188/188 [00:03<00:00, 59.55it/s]
100%|██████████| 188/188 [00:00<00:00, 229423.67it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

       False       0.86      1.00      0.93       162
        True       0.00      0.00      0.00        26

    accuracy                           0.86       188
   macro avg       0.43      0.50      0.46       188
weighted avg       0.74      0.86      0.80       188

cl


100%|██████████| 1710/1710 [00:35<00:00, 48.11it/s]
100%|██████████| 1710/1710 [00:00<00:00, 316446.50it/s]
100%|██████████| 574/574 [00:12<00:00, 45.10it/s]
100%|██████████| 574/574 [00:00<00:00, 301922.56it/s]


              precision    recall  f1-score   support

       False       0.62      0.40      0.49       289
        True       0.55      0.75      0.64       285

    accuracy                           0.57       574
   macro avg       0.59      0.58      0.56       574
weighted avg       0.59      0.57      0.56       574

co


100%|██████████| 2289/2289 [00:51<00:00, 44.17it/s]
100%|██████████| 2289/2289 [00:00<00:00, 343411.73it/s]
100%|██████████| 774/774 [00:17<00:00, 43.07it/s]
100%|██████████| 774/774 [00:00<00:00, 268407.71it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

       False       0.00      0.00      0.00       354
        True       0.54      1.00      0.70       420

    accuracy                           0.54       774
   macro avg       0.27      0.50      0.35       774
weighted avg       0.29      0.54      0.38       774

gl


100%|██████████| 1210/1210 [00:26<00:00, 46.01it/s]
100%|██████████| 1210/1210 [00:00<00:00, 281107.11it/s]
100%|██████████| 411/411 [00:09<00:00, 43.93it/s]
100%|██████████| 411/411 [00:00<00:00, 295181.33it/s]


              precision    recall  f1-score   support

       False       0.49      0.35      0.41       167
        True       0.63      0.75      0.68       244

    accuracy                           0.59       411
   macro avg       0.56      0.55      0.55       411
weighted avg       0.57      0.59      0.57       411

lu


100%|██████████| 804/804 [00:15<00:00, 51.17it/s]
100%|██████████| 804/804 [00:00<00:00, 295627.28it/s]
100%|██████████| 272/272 [00:05<00:00, 49.12it/s]
100%|██████████| 272/272 [00:00<00:00, 259107.58it/s]


              precision    recall  f1-score   support

       False       0.62      0.43      0.51       143
        True       0.53      0.71      0.61       129

    accuracy                           0.56       272
   macro avg       0.58      0.57      0.56       272
weighted avg       0.58      0.56      0.55       272

####################################  
# Running filteredTimeline10
#####################################
ig


100%|██████████| 1796/1796 [01:13<00:00, 24.59it/s]
100%|██████████| 1796/1796 [00:00<00:00, 320879.62it/s]
100%|██████████| 599/599 [00:25<00:00, 23.93it/s]
100%|██████████| 599/599 [00:00<00:00, 281594.72it/s]


              precision    recall  f1-score   support

       False       0.69      0.79      0.74       339
        True       0.67      0.54      0.60       260

    accuracy                           0.68       599
   macro avg       0.68      0.67      0.67       599
weighted avg       0.68      0.68      0.68       599

bo


100%|██████████| 563/563 [00:22<00:00, 25.28it/s]
100%|██████████| 563/563 [00:00<00:00, 266492.85it/s]
100%|██████████| 188/188 [00:07<00:00, 24.02it/s]
100%|██████████| 188/188 [00:00<00:00, 157800.51it/s]


              precision    recall  f1-score   support

       False       0.94      0.99      0.96       162
        True       0.88      0.58      0.70        26

    accuracy                           0.93       188
   macro avg       0.91      0.78      0.83       188
weighted avg       0.93      0.93      0.92       188

cl


100%|██████████| 1721/1721 [00:34<00:00, 49.41it/s]
100%|██████████| 1721/1721 [00:00<00:00, 338891.89it/s]
100%|██████████| 574/574 [00:12<00:00, 46.84it/s]
100%|██████████| 574/574 [00:00<00:00, 253825.04it/s]


              precision    recall  f1-score   support

       False       0.81      0.86      0.83       289
        True       0.85      0.79      0.82       285

    accuracy                           0.83       574
   macro avg       0.83      0.83      0.83       574
weighted avg       0.83      0.83      0.83       574

co


100%|██████████| 2319/2319 [00:47<00:00, 49.21it/s]
100%|██████████| 2319/2319 [00:00<00:00, 338398.60it/s]
100%|██████████| 774/774 [00:17<00:00, 45.49it/s]
100%|██████████| 774/774 [00:00<00:00, 321584.08it/s]


              precision    recall  f1-score   support

       False       0.99      0.32      0.48       354
        True       0.63      1.00      0.78       420

    accuracy                           0.69       774
   macro avg       0.81      0.66      0.63       774
weighted avg       0.80      0.69      0.64       774

gl


100%|██████████| 1231/1231 [00:50<00:00, 24.23it/s]
100%|██████████| 1231/1231 [00:00<00:00, 296071.35it/s]
100%|██████████| 411/411 [00:17<00:00, 23.05it/s]
100%|██████████| 411/411 [00:00<00:00, 273368.05it/s]


              precision    recall  f1-score   support

       False       0.55      0.50      0.53       167
        True       0.68      0.72      0.70       244

    accuracy                           0.63       411
   macro avg       0.62      0.61      0.61       411
weighted avg       0.63      0.63      0.63       411

lu


100%|██████████| 816/816 [00:31<00:00, 25.82it/s]
100%|██████████| 816/816 [00:00<00:00, 267533.19it/s]
100%|██████████| 272/272 [00:10<00:00, 24.91it/s]
100%|██████████| 272/272 [00:00<00:00, 253072.47it/s]


              precision    recall  f1-score   support

       False       0.69      0.83      0.75       143
        True       0.75      0.58      0.66       129

    accuracy                           0.71       272
   macro avg       0.72      0.70      0.70       272
weighted avg       0.72      0.71      0.70       272

####################################  
# Running filtered_Texts15
#####################################
ig


100%|██████████| 1758/1758 [00:33<00:00, 52.47it/s]
100%|██████████| 1758/1758 [00:00<00:00, 247626.91it/s]
100%|██████████| 599/599 [00:11<00:00, 50.85it/s]
100%|██████████| 599/599 [00:00<00:00, 252171.85it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

       False       0.57      1.00      0.72       339
        True       0.00      0.00      0.00       260

    accuracy                           0.57       599
   macro avg       0.28      0.50      0.36       599
weighted avg       0.32      0.57      0.41       599

bo


100%|██████████| 556/556 [00:10<00:00, 54.34it/s]
100%|██████████| 556/556 [00:00<00:00, 210093.07it/s]
100%|██████████| 188/188 [00:03<00:00, 57.69it/s]
100%|██████████| 188/188 [00:00<00:00, 176168.26it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

       False       0.86      1.00      0.93       162
        True       0.00      0.00      0.00        26

    accuracy                           0.86       188
   macro avg       0.43      0.50      0.46       188
weighted avg       0.74      0.86      0.80       188

cl


100%|██████████| 1710/1710 [00:35<00:00, 47.83it/s]
100%|██████████| 1710/1710 [00:00<00:00, 244611.71it/s]
100%|██████████| 574/574 [00:13<00:00, 44.10it/s]
100%|██████████| 574/574 [00:00<00:00, 246572.15it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

       False       0.50      1.00      0.67       289
        True       0.00      0.00      0.00       285

    accuracy                           0.50       574
   macro avg       0.25      0.50      0.33       574
weighted avg       0.25      0.50      0.34       574

co


100%|██████████| 2289/2289 [00:52<00:00, 43.54it/s]
100%|██████████| 2289/2289 [00:00<00:00, 272007.08it/s]
100%|██████████| 774/774 [00:18<00:00, 42.09it/s]
100%|██████████| 774/774 [00:00<00:00, 211078.76it/s]


              precision    recall  f1-score   support

       False       0.61      0.73      0.67       354
        True       0.73      0.61      0.66       420

    accuracy                           0.67       774
   macro avg       0.67      0.67      0.67       774
weighted avg       0.68      0.67      0.67       774

gl


100%|██████████| 1210/1210 [00:26<00:00, 45.88it/s]
100%|██████████| 1210/1210 [00:00<00:00, 252706.66it/s]
100%|██████████| 411/411 [00:09<00:00, 44.33it/s]
100%|██████████| 411/411 [00:00<00:00, 212795.82it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

       False       0.00      0.00      0.00       167
        True       0.59      1.00      0.75       244

    accuracy                           0.59       411
   macro avg       0.30      0.50      0.37       411
weighted avg       0.35      0.59      0.44       411

lu


100%|██████████| 804/804 [00:15<00:00, 51.30it/s]
100%|██████████| 804/804 [00:00<00:00, 228671.62it/s]
100%|██████████| 272/272 [00:05<00:00, 49.79it/s]
100%|██████████| 272/272 [00:00<00:00, 224223.80it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

       False       0.53      1.00      0.69       143
        True       0.00      0.00      0.00       129

    accuracy                           0.53       272
   macro avg       0.26      0.50      0.34       272
weighted avg       0.28      0.53      0.36       272

####################################  
# Running filteredTimeline15
#####################################
ig


100%|██████████| 1796/1796 [01:12<00:00, 24.68it/s]
100%|██████████| 1796/1796 [00:00<00:00, 249460.87it/s]
100%|██████████| 599/599 [00:25<00:00, 23.82it/s]
100%|██████████| 599/599 [00:00<00:00, 253392.65it/s]


              precision    recall  f1-score   support

       False       0.67      0.59      0.63       339
        True       0.54      0.63      0.58       260

    accuracy                           0.61       599
   macro avg       0.61      0.61      0.61       599
weighted avg       0.62      0.61      0.61       599

bo


100%|██████████| 563/563 [00:22<00:00, 25.10it/s]
100%|██████████| 563/563 [00:00<00:00, 181575.79it/s]
100%|██████████| 188/188 [00:07<00:00, 23.71it/s]
100%|██████████| 188/188 [00:00<00:00, 167487.08it/s]


              precision    recall  f1-score   support

       False       0.93      0.98      0.95       162
        True       0.82      0.54      0.65        26

    accuracy                           0.92       188
   macro avg       0.88      0.76      0.80       188
weighted avg       0.92      0.92      0.91       188

cl


100%|██████████| 1721/1721 [00:35<00:00, 48.86it/s]
100%|██████████| 1721/1721 [00:00<00:00, 254025.80it/s]
100%|██████████| 574/574 [00:12<00:00, 46.98it/s]
100%|██████████| 574/574 [00:00<00:00, 214938.89it/s]


              precision    recall  f1-score   support

       False       0.86      0.86      0.86       289
        True       0.86      0.85      0.85       285

    accuracy                           0.86       574
   macro avg       0.86      0.86      0.86       574
weighted avg       0.86      0.86      0.86       574

co


100%|██████████| 2319/2319 [00:47<00:00, 48.85it/s]
100%|██████████| 2319/2319 [00:00<00:00, 268260.55it/s]
100%|██████████| 774/774 [00:16<00:00, 46.20it/s]
100%|██████████| 774/774 [00:00<00:00, 263912.80it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

       False       0.00      0.00      0.00       354
        True       0.54      1.00      0.70       420

    accuracy                           0.54       774
   macro avg       0.27      0.50      0.35       774
weighted avg       0.29      0.54      0.38       774

gl


100%|██████████| 1231/1231 [01:01<00:00, 20.15it/s]
100%|██████████| 1231/1231 [00:00<00:00, 239291.29it/s]
100%|██████████| 411/411 [00:18<00:00, 22.54it/s]
100%|██████████| 411/411 [00:00<00:00, 228325.69it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

       False       0.00      0.00      0.00       167
        True       0.59      1.00      0.75       244

    accuracy                           0.59       411
   macro avg       0.30      0.50      0.37       411
weighted avg       0.35      0.59      0.44       411

lu


100%|██████████| 816/816 [00:33<00:00, 24.49it/s]
100%|██████████| 816/816 [00:00<00:00, 218163.70it/s]
100%|██████████| 272/272 [00:11<00:00, 23.31it/s]
100%|██████████| 272/272 [00:00<00:00, 208947.01it/s]


              precision    recall  f1-score   support

       False       0.74      0.89      0.81       143
        True       0.84      0.65      0.73       129

    accuracy                           0.78       272
   macro avg       0.79      0.77      0.77       272
weighted avg       0.79      0.78      0.77       272



In [11]:
df_responses_final['hit'] = df_responses_final.apply(lambda x: 1 if x.y_test == x.y_pred else 0,axis = 1)
df_responses_final

Unnamed: 0,y_test,y_pred,target,exp_name,n_comments,text_col,hit
0,False,True,ig,filteredTimeline15,15,Timeline,0
1,False,False,ig,filteredTimeline15,15,Timeline,1
2,False,True,ig,filteredTimeline15,15,Timeline,0
3,True,True,ig,filteredTimeline15,15,Timeline,1
4,True,True,ig,filteredTimeline15,15,Timeline,1
...,...,...,...,...,...,...,...
267,True,True,lu,filteredTimeline15,15,Timeline,1
268,False,False,lu,filteredTimeline15,15,Timeline,1
269,True,False,lu,filteredTimeline15,15,Timeline,0
270,True,True,lu,filteredTimeline15,15,Timeline,1


In [13]:
df_responses_final

Unnamed: 0,y_test,y_pred,target,exp_name,n_comments,text_col,hit
0,False,True,ig,filteredTimeline15,15,Timeline,0
1,False,False,ig,filteredTimeline15,15,Timeline,1
2,False,True,ig,filteredTimeline15,15,Timeline,0
3,True,True,ig,filteredTimeline15,15,Timeline,1
4,True,True,ig,filteredTimeline15,15,Timeline,1
...,...,...,...,...,...,...,...
267,True,True,lu,filteredTimeline15,15,Timeline,1
268,False,False,lu,filteredTimeline15,15,Timeline,1
269,True,False,lu,filteredTimeline15,15,Timeline,0
270,True,True,lu,filteredTimeline15,15,Timeline,1


In [12]:
df_responses_final.groupby('n_comments').mean('hit')

Unnamed: 0_level_0,y_test,y_pred,hit
n_comments,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
15,0.484031,0.669624,0.675302
