In [2]:
from AtlasToDataframe import read_atlas, code_selector, splitter
import random, datetime

In [3]:
import numpy as np, scipy as sp, pandas as pd

import torch
from transformers import DistilBertModel, DistilBertTokenizer # pytorch transformers
from torch.utils.data import TensorDataset, DataLoader

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, cohen_kappa_score
import krippendorff

In [4]:
import gc

In [5]:
from typing import Iterator, Iterable, Tuple, Text, Union
from scipy.sparse import spmatrix

NDArray = Union[np.ndarray, spmatrix]

class Classifier:
    def __init__(self):

        self.clf = LogisticRegression(penalty = 'l2', solver = 'liblinear', C = 1, class_weight= 'balanced')
        
    def train(self, features: NDArray, labels: NDArray) -> None:

        self.clf.fit(features, labels)
    
    def predict(self, features: NDArray) -> NDArray:

        predictions = self.clf.predict(features)
        predprob = self.clf.predict_proba(features)

        return predictions, predprob

In [6]:
def evaluate(features, labels, testsize, k):
    for i in range(k):
        random.seed()
        train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size= testsize)
        
        classifier = Classifier()
        classifier.train(train_features, train_labels)
        predicted_labels, predicted_probabilities = classifier.predict(test_features)
        
        f1 = f1_score(test_labels, predicted_labels, pos_label=1)
        accuracy = accuracy_score(test_labels, predicted_labels)
        precision = precision_score(test_labels, predicted_labels)
        recall = recall_score(test_labels, predicted_labels)
        alpha = krippendorff.alpha(np.stack((test_labels, predicted_labels)))
        kappa = cohen_kappa_score(test_labels, predicted_labels)
        print(f"\n{f1:.1%} F1, {precision:.1%} precision, {recall:.1%} recall, {alpha:.1%} krippendorff's alpha on test data, and {kappa:.1%} cohen's kappa")

In [7]:
device = "cuda"

In [8]:
gc.collect()

82

In [9]:
torch.cuda.empty_cache()

In [4]:
data = read_atlas("Interviews.csv")

In [11]:
texts = data['Quotation Content'].values.tolist()

In [12]:
texts

['No, that’s fine.',
 'Yep.',
 'Simple first question.  I guess going back to the beginning as far as cancer goes, February 2nd I went into the hospital in Jacksonville, Florida, with what I thought was appendicitis.  They took my appendix out and realized that it was cancer.  So from that point on --  I guess you want the whole tour?',
 'Yeah.  From that point the oncologist there basically said, “Hey, we don’t have anybody who can do what you need,” so I went doctor shopping: M. D. Anderson in Texas, where I ended up getting surgery, and I had also gone to U.C. San Diego and saw a Dr. Lowey(?) out there.  I had came here and had gone to U.C. San Francisco to see about an oncologist, not necessarily the surgery part but for follow-up treatment.  And had gone back to Houston on the 17th of March and did the liver resection surgery.  Left there I think the 25th of March, came here and was doing my follow-up to Dr. Vernook for, came up every follow-up.',
 'Correct.',
 "Yeah.  I had worke

In [13]:
len(texts)

9610

In [13]:
MAX_LENGTH = 512
BATCH_SIZE = 16

In [14]:
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

In [15]:
encodings = list(map(lambda t: tokenizer.encode(t, padding=True, truncation=True, max_length = MAX_LENGTH, add_special_tokens=True), texts))

In [16]:
max_len = 0
for i in encodings:
    if len(i) > max_len:
        max_len = len(i)

encodings_padded = np.array([i + [0]*(max_len-len(i)) for i in encodings])
attention_mask = [[float(i > 0) for i in ii] for ii in encodings_padded]

In [17]:
dataset = TensorDataset(torch.tensor(encodings_padded), torch.tensor(attention_mask))

In [18]:
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE)

In [19]:
model = DistilBertModel.from_pretrained('distilbert-base-uncased').to(device)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [20]:
features = []

with torch.no_grad():
    for step_num, batch_data in enumerate(dataloader):
        token_ids, masks = tuple(t.to(device) for t in batch_data)
        last_hidden_states = model(token_ids, masks)
        features.append(last_hidden_states[0][:,0,:].cpu().detach().numpy())

In [21]:
features = np.vstack(features)

In [21]:
labels = data['NET_SUPPORT_Social/Emotional'].values.tolist()

In [63]:
evaluate(features, labels, 0.2, 5)


36.7% F1, 23.9% precision, 79.7% recall, 30.6% krippendorff's alpha on test data, and 32.5% cohen's kappa

38.9% F1, 26.4% precision, 73.8% recall, 33.4% krippendorff's alpha on test data, and 34.7% cohen's kappa

32.9% F1, 21.9% precision, 66.2% recall, 27.5% krippendorff's alpha on test data, and 28.9% cohen's kappa

37.2% F1, 25.7% precision, 67.0% recall, 31.1% krippendorff's alpha on test data, and 32.4% cohen's kappa

35.3% F1, 22.7% precision, 80.0% recall, 29.5% krippendorff's alpha on test data, and 31.4% cohen's kappa


In [64]:
evaluate(features, labels, 0.5, 5)


35.0% F1, 23.6% precision, 67.9% recall, 29.6% krippendorff's alpha on test data, and 31.0% cohen's kappa

37.8% F1, 25.5% precision, 73.2% recall, 32.2% krippendorff's alpha on test data, and 33.6% cohen's kappa

36.5% F1, 25.0% precision, 67.5% recall, 31.3% krippendorff's alpha on test data, and 32.4% cohen's kappa

34.7% F1, 23.4% precision, 67.9% recall, 29.2% krippendorff's alpha on test data, and 30.6% cohen's kappa

35.8% F1, 24.4% precision, 67.0% recall, 30.4% krippendorff's alpha on test data, and 31.6% cohen's kappa


In [65]:
evaluate(features, labels, 0.75, 5)


37.6% F1, 28.5% precision, 55.1% recall, 33.3% krippendorff's alpha on test data, and 33.8% cohen's kappa

37.3% F1, 26.8% precision, 61.6% recall, 32.7% krippendorff's alpha on test data, and 33.4% cohen's kappa

35.2% F1, 24.7% precision, 61.2% recall, 30.3% krippendorff's alpha on test data, and 31.2% cohen's kappa

38.1% F1, 27.2% precision, 63.5% recall, 33.4% krippendorff's alpha on test data, and 34.2% cohen's kappa

36.4% F1, 25.1% precision, 66.2% recall, 31.2% krippendorff's alpha on test data, and 32.3% cohen's kappa


In [24]:
labels = code_selector(data, 'DEC').iloc[:, 1].values.tolist()

In [69]:
evaluate(features, labels, 0.2, 5)


42.0% F1, 29.5% precision, 72.6% recall, 32.0% krippendorff's alpha on test data, and 34.0% cohen's kappa

45.6% F1, 32.1% precision, 78.6% recall, 35.6% krippendorff's alpha on test data, and 37.6% cohen's kappa

43.0% F1, 30.0% precision, 76.1% recall, 32.9% krippendorff's alpha on test data, and 35.1% cohen's kappa

41.0% F1, 28.4% precision, 73.5% recall, 31.6% krippendorff's alpha on test data, and 33.7% cohen's kappa

46.6% F1, 32.9% precision, 80.1% recall, 36.7% krippendorff's alpha on test data, and 38.6% cohen's kappa


In [70]:
evaluate(features, labels, 0.5, 5)


44.5% F1, 32.9% precision, 68.6% recall, 35.3% krippendorff's alpha on test data, and 36.6% cohen's kappa

42.8% F1, 30.2% precision, 73.4% recall, 32.8% krippendorff's alpha on test data, and 34.8% cohen's kappa

45.9% F1, 33.0% precision, 75.3% recall, 36.5% krippendorff's alpha on test data, and 38.2% cohen's kappa

39.8% F1, 27.9% precision, 69.1% recall, 30.2% krippendorff's alpha on test data, and 32.1% cohen's kappa

41.4% F1, 29.1% precision, 71.3% recall, 31.5% krippendorff's alpha on test data, and 33.5% cohen's kappa


In [71]:
evaluate(features, labels, 0.75, 5)


40.6% F1, 28.6% precision, 69.6% recall, 30.5% krippendorff's alpha on test data, and 32.5% cohen's kappa

41.1% F1, 29.9% precision, 65.8% recall, 31.7% krippendorff's alpha on test data, and 33.2% cohen's kappa

42.8% F1, 30.8% precision, 70.3% recall, 33.4% krippendorff's alpha on test data, and 35.1% cohen's kappa

43.1% F1, 32.1% precision, 65.8% recall, 34.3% krippendorff's alpha on test data, and 35.5% cohen's kappa

43.0% F1, 32.0% precision, 65.6% recall, 34.7% krippendorff's alpha on test data, and 35.8% cohen's kappa


In [6]:
labels = data['PI_Tests_All'].values.tolist()

In [78]:
evaluate(features, labels, 0.2, 5)


51.6% F1, 36.1% precision, 90.6% recall, 49.2% krippendorff's alpha on test data, and 49.6% cohen's kappa

51.3% F1, 38.0% precision, 79.0% recall, 48.8% krippendorff's alpha on test data, and 49.1% cohen's kappa

53.0% F1, 38.0% precision, 87.3% recall, 50.0% krippendorff's alpha on test data, and 50.4% cohen's kappa

57.0% F1, 43.6% precision, 82.4% recall, 54.5% krippendorff's alpha on test data, and 54.7% cohen's kappa

55.3% F1, 39.6% precision, 91.5% recall, 52.4% krippendorff's alpha on test data, and 52.9% cohen's kappa


In [79]:
evaluate(features, labels, 0.5, 5)


52.2% F1, 37.9% precision, 83.6% recall, 49.4% krippendorff's alpha on test data, and 49.8% cohen's kappa

53.4% F1, 39.2% precision, 83.8% recall, 50.6% krippendorff's alpha on test data, and 51.0% cohen's kappa

55.0% F1, 40.6% precision, 84.9% recall, 52.4% krippendorff's alpha on test data, and 52.8% cohen's kappa

47.7% F1, 33.0% precision, 85.8% recall, 44.5% krippendorff's alpha on test data, and 45.1% cohen's kappa

55.3% F1, 40.6% precision, 86.6% recall, 52.6% krippendorff's alpha on test data, and 53.0% cohen's kappa


In [80]:
evaluate(features, labels, 0.75, 5)


50.8% F1, 38.3% precision, 75.5% recall, 48.2% krippendorff's alpha on test data, and 48.4% cohen's kappa

49.4% F1, 35.8% precision, 79.5% recall, 46.4% krippendorff's alpha on test data, and 46.9% cohen's kappa

52.8% F1, 39.4% precision, 80.0% recall, 50.2% krippendorff's alpha on test data, and 50.5% cohen's kappa

50.4% F1, 38.4% precision, 73.4% recall, 47.8% krippendorff's alpha on test data, and 48.0% cohen's kappa

49.7% F1, 36.3% precision, 78.8% recall, 46.9% krippendorff's alpha on test data, and 47.3% cohen's kappa


In [26]:
train_features, test_features, train_labels, test_labels, train_texts, test_texts = train_test_split(features, labels, texts, test_size= 0.5)
        
classifier = Classifier()
classifier.train(train_features, train_labels)
predicted_labels, predicted_probabilities = classifier.predict(test_features)
        
f1 = f1_score(test_labels, predicted_labels, pos_label=1)
accuracy = accuracy_score(test_labels, predicted_labels)
precision = precision_score(test_labels, predicted_labels)
recall = recall_score(test_labels, predicted_labels)
alpha = krippendorff.alpha(np.stack((test_labels, predicted_labels)))
kappa = cohen_kappa_score(test_labels, predicted_labels)
print(f"\n{f1:.1%} F1, {precision:.1%} precision, {recall:.1%} recall, {alpha:.1%} krippendorff's alpha on test data, and {kappa:.1%} cohen's kappa")


55.0% F1, 41.1% precision, 83.1% recall, 52.2% krippendorff's alpha on test data, and 52.6% cohen's kappa


In [None]:
pd.DataFrame([[predicted_probabilities[i,1], test_labels[i], test_texts[i]] for i in range(len(test_texts))], columns = ['Predicted Probability', 'True Label', 'Text']).sort_values(by = 'Predicted Probability', ascending=False).to_csv("Pr_BERT+Logit_Tests.txt", sep = "\t")

In [8]:
labels = code_selector(data, 'PI').iloc[:, 1].values.tolist()

In [9]:
sum(labels)

2043

In [73]:
evaluate(features, labels, 0.2, 5)


64.6% F1, 54.7% precision, 78.9% recall, 52.2% krippendorff's alpha on test data, and 52.8% cohen's kappa

64.3% F1, 54.9% precision, 77.6% recall, 52.0% krippendorff's alpha on test data, and 52.4% cohen's kappa

65.2% F1, 54.2% precision, 81.9% recall, 52.6% krippendorff's alpha on test data, and 53.3% cohen's kappa

65.3% F1, 55.4% precision, 79.6% recall, 53.3% krippendorff's alpha on test data, and 53.8% cohen's kappa

65.1% F1, 54.8% precision, 80.1% recall, 52.5% krippendorff's alpha on test data, and 53.1% cohen's kappa


In [74]:
evaluate(features, labels, 0.5, 5)


62.7% F1, 53.1% precision, 76.5% recall, 49.4% krippendorff's alpha on test data, and 49.9% cohen's kappa

63.4% F1, 54.0% precision, 76.6% recall, 50.8% krippendorff's alpha on test data, and 51.3% cohen's kappa

62.8% F1, 52.8% precision, 77.5% recall, 49.9% krippendorff's alpha on test data, and 50.5% cohen's kappa

63.0% F1, 52.9% precision, 77.8% recall, 50.6% krippendorff's alpha on test data, and 51.2% cohen's kappa

62.8% F1, 52.6% precision, 77.8% recall, 50.2% krippendorff's alpha on test data, and 50.8% cohen's kappa


In [75]:
evaluate(features, labels, 0.75, 5)


64.3% F1, 55.2% precision, 77.0% recall, 52.2% krippendorff's alpha on test data, and 52.6% cohen's kappa

64.3% F1, 55.1% precision, 77.3% recall, 52.0% krippendorff's alpha on test data, and 52.5% cohen's kappa

63.0% F1, 52.9% precision, 77.8% recall, 50.0% krippendorff's alpha on test data, and 50.7% cohen's kappa

62.2% F1, 53.4% precision, 74.5% recall, 49.3% krippendorff's alpha on test data, and 49.8% cohen's kappa

62.6% F1, 53.2% precision, 76.0% recall, 49.6% krippendorff's alpha on test data, and 50.1% cohen's kappa


In [37]:
train_features, test_features, train_labels, test_labels, train_texts, test_texts = train_test_split(features, labels, texts, test_size= 0.5)
        
classifier = Classifier()
classifier.train(train_features, train_labels)
predicted_labels, predicted_probabilities = classifier.predict(test_features)
        
f1 = f1_score(test_labels, predicted_labels, pos_label=1)
accuracy = accuracy_score(test_labels, predicted_labels)
precision = precision_score(test_labels, predicted_labels)
recall = recall_score(test_labels, predicted_labels)
alpha = krippendorff.alpha(np.stack((test_labels, predicted_labels)))
kappa = cohen_kappa_score(test_labels, predicted_labels)
print(f"\n{f1:.1%} F1, {precision:.1%} precision, {recall:.1%} recall, {alpha:.1%} krippendorff's alpha on test data, and {kappa:.1%} cohen's kappa")


65.1% F1, 55.3% precision, 79.2% recall, 52.8% krippendorff's alpha on test data, and 53.3% cohen's kappa


In [38]:
pd.DataFrame([[predicted_probabilities[i,1], test_labels[i], test_texts[i]] for i in range(len(test_texts))], columns = ['Predicted Probability', 'True Label', 'Text']).sort_values(by = 'Predicted Probability', ascending=False).to_csv("Pr_BERT+Logit_PI.txt", sep = "\t")

array([ 1.05280243e-02, -1.25068337e-01, -9.29146037e-02, -1.51216656e-01,
        2.69469954e-02, -1.46769360e-01,  1.65196225e-01,  3.47561270e-01,
       -1.49363667e-01, -2.38762379e-01,  9.99833345e-02, -9.67912525e-02,
       -8.58636647e-02,  2.46640071e-01, -1.45075498e-02, -1.98289938e-02,
        3.35497670e-02,  1.89025506e-01,  1.27456829e-01, -7.52308592e-03,
       -9.31165218e-02, -1.51067510e-01, -1.70633927e-01,  1.44743279e-01,
       -1.07426293e-01,  9.52546969e-02, -2.81170849e-02, -6.66082501e-02,
        2.10901126e-01, -1.72057644e-01,  3.34611796e-02,  2.09583770e-02,
       -1.74449384e-01, -1.09715499e-01, -5.94822913e-02, -1.16835326e-01,
        1.30035803e-01, -1.25164419e-01, -3.61455977e-02,  3.99911366e-02,
       -1.10091299e-01, -3.92497778e-02, -1.02700353e-01,  6.01272322e-02,
        1.60923928e-01,  2.38457229e-02, -2.11089039e+00, -3.58275697e-02,
       -6.29718974e-02, -3.91519785e-01,  1.40109882e-01, -6.18081093e-02,
        3.53995711e-01,  

In [44]:
labels = code_selector(data, 'NET').iloc[:, 1].values.tolist()

In [45]:
evaluate(features, labels, 0.2, 5)


51.4% F1, 37.3% precision, 82.6% recall, 41.3% krippendorff's alpha on test data, and 43.0% cohen's kappa

52.8% F1, 40.1% precision, 77.3% recall, 44.0% krippendorff's alpha on test data, and 45.0% cohen's kappa

47.5% F1, 33.6% precision, 81.1% recall, 37.3% krippendorff's alpha on test data, and 39.3% cohen's kappa

54.0% F1, 40.7% precision, 80.2% recall, 45.2% krippendorff's alpha on test data, and 46.3% cohen's kappa

45.9% F1, 32.6% precision, 77.7% recall, 35.8% krippendorff's alpha on test data, and 37.7% cohen's kappa


In [46]:
evaluate(features, labels, 0.5, 5)


51.2% F1, 38.6% precision, 75.9% recall, 42.1% krippendorff's alpha on test data, and 43.3% cohen's kappa

52.3% F1, 40.0% precision, 75.3% recall, 43.3% krippendorff's alpha on test data, and 44.3% cohen's kappa

49.4% F1, 37.5% precision, 72.6% recall, 40.2% krippendorff's alpha on test data, and 41.3% cohen's kappa

49.9% F1, 37.5% precision, 74.7% recall, 40.4% krippendorff's alpha on test data, and 41.6% cohen's kappa

50.3% F1, 38.9% precision, 71.2% recall, 40.8% krippendorff's alpha on test data, and 41.7% cohen's kappa


In [47]:
evaluate(features, labels, 0.75, 5)


48.1% F1, 36.4% precision, 71.0% recall, 38.6% krippendorff's alpha on test data, and 39.7% cohen's kappa

51.3% F1, 39.9% precision, 71.7% recall, 42.7% krippendorff's alpha on test data, and 43.5% cohen's kappa

51.1% F1, 39.6% precision, 71.8% recall, 42.4% krippendorff's alpha on test data, and 43.3% cohen's kappa

49.2% F1, 37.5% precision, 71.7% recall, 39.6% krippendorff's alpha on test data, and 40.7% cohen's kappa

49.4% F1, 38.2% precision, 70.1% recall, 40.2% krippendorff's alpha on test data, and 41.1% cohen's kappa


In [48]:
labels = code_selector(data, 'LOG').iloc[:, 1].values.tolist()

In [49]:
evaluate(features, labels, 0.2, 5)


41.6% F1, 29.7% precision, 69.2% recall, 34.2% krippendorff's alpha on test data, and 35.5% cohen's kappa

40.3% F1, 27.5% precision, 75.0% recall, 33.0% krippendorff's alpha on test data, and 34.7% cohen's kappa

42.1% F1, 29.8% precision, 71.9% recall, 35.2% krippendorff's alpha on test data, and 36.5% cohen's kappa

41.6% F1, 28.8% precision, 74.5% recall, 35.2% krippendorff's alpha on test data, and 36.5% cohen's kappa

42.2% F1, 29.9% precision, 71.9% recall, 34.8% krippendorff's alpha on test data, and 36.2% cohen's kappa


In [50]:
evaluate(features, labels, 0.5, 5)


40.6% F1, 28.9% precision, 68.5% recall, 33.4% krippendorff's alpha on test data, and 34.7% cohen's kappa

39.1% F1, 26.4% precision, 75.4% recall, 31.8% krippendorff's alpha on test data, and 33.6% cohen's kappa

40.0% F1, 28.3% precision, 68.2% recall, 32.7% krippendorff's alpha on test data, and 34.0% cohen's kappa

39.7% F1, 27.9% precision, 68.7% recall, 32.5% krippendorff's alpha on test data, and 33.9% cohen's kappa

39.3% F1, 27.7% precision, 67.7% recall, 32.6% krippendorff's alpha on test data, and 33.9% cohen's kappa


In [51]:
evaluate(features, labels, 0.75, 5)


41.3% F1, 30.3% precision, 64.8% recall, 35.1% krippendorff's alpha on test data, and 36.0% cohen's kappa

39.7% F1, 28.4% precision, 66.3% recall, 32.9% krippendorff's alpha on test data, and 34.1% cohen's kappa

38.8% F1, 27.6% precision, 65.5% recall, 31.6% krippendorff's alpha on test data, and 32.9% cohen's kappa

40.1% F1, 29.0% precision, 65.2% recall, 33.2% krippendorff's alpha on test data, and 34.3% cohen's kappa

40.8% F1, 29.0% precision, 68.4% recall, 34.1% krippendorff's alpha on test data, and 35.3% cohen's kappa


In [52]:
labels = code_selector(data, 'PI_Disease').iloc[:, 1].values.tolist()

In [53]:
evaluate(features, labels, 0.2, 5)


39.7% F1, 27.4% precision, 72.3% recall, 34.6% krippendorff's alpha on test data, and 35.7% cohen's kappa

44.7% F1, 31.5% precision, 76.7% recall, 39.8% krippendorff's alpha on test data, and 40.7% cohen's kappa

42.4% F1, 29.8% precision, 73.1% recall, 37.1% krippendorff's alpha on test data, and 38.1% cohen's kappa

43.3% F1, 29.7% precision, 80.5% recall, 38.2% krippendorff's alpha on test data, and 39.3% cohen's kappa

43.3% F1, 29.4% precision, 82.1% recall, 38.1% krippendorff's alpha on test data, and 39.4% cohen's kappa


In [54]:
evaluate(features, labels, 0.5, 5)


42.3% F1, 29.0% precision, 77.9% recall, 37.1% krippendorff's alpha on test data, and 38.2% cohen's kappa

44.0% F1, 31.9% precision, 70.8% recall, 39.4% krippendorff's alpha on test data, and 40.1% cohen's kappa

45.1% F1, 31.7% precision, 78.4% recall, 40.0% krippendorff's alpha on test data, and 41.0% cohen's kappa

43.1% F1, 30.3% precision, 74.5% recall, 37.8% krippendorff's alpha on test data, and 38.8% cohen's kappa

44.8% F1, 31.5% precision, 78.0% recall, 40.0% krippendorff's alpha on test data, and 40.9% cohen's kappa


In [55]:
evaluate(features, labels, 0.75, 5)


43.8% F1, 31.5% precision, 71.7% recall, 39.2% krippendorff's alpha on test data, and 39.9% cohen's kappa

44.6% F1, 32.8% precision, 69.8% recall, 40.0% krippendorff's alpha on test data, and 40.7% cohen's kappa

42.1% F1, 30.3% precision, 69.0% recall, 37.3% krippendorff's alpha on test data, and 38.1% cohen's kappa

43.8% F1, 31.8% precision, 70.2% recall, 39.2% krippendorff's alpha on test data, and 39.9% cohen's kappa

42.0% F1, 29.7% precision, 71.8% recall, 37.0% krippendorff's alpha on test data, and 37.9% cohen's kappa


In [56]:
labels = code_selector(data, 'NET_S').iloc[:, 1].values.tolist()

In [57]:
evaluate(features, labels, 0.2, 5)


45.8% F1, 32.6% precision, 77.1% recall, 37.5% krippendorff's alpha on test data, and 39.0% cohen's kappa

45.6% F1, 33.3% precision, 72.1% recall, 36.6% krippendorff's alpha on test data, and 38.0% cohen's kappa

48.1% F1, 34.3% precision, 80.7% recall, 39.3% krippendorff's alpha on test data, and 41.0% cohen's kappa

44.6% F1, 31.6% precision, 75.8% recall, 36.2% krippendorff's alpha on test data, and 37.8% cohen's kappa

49.9% F1, 36.3% precision, 80.0% recall, 41.4% krippendorff's alpha on test data, and 42.7% cohen's kappa


In [58]:
evaluate(features, labels, 0.5, 5)


45.1% F1, 32.4% precision, 74.0% recall, 36.2% krippendorff's alpha on test data, and 37.8% cohen's kappa

44.5% F1, 31.6% precision, 75.0% recall, 35.7% krippendorff's alpha on test data, and 37.3% cohen's kappa

44.0% F1, 31.0% precision, 76.0% recall, 34.6% krippendorff's alpha on test data, and 36.5% cohen's kappa

45.3% F1, 33.2% precision, 71.4% recall, 36.9% krippendorff's alpha on test data, and 38.2% cohen's kappa

46.5% F1, 34.4% precision, 71.8% recall, 38.4% krippendorff's alpha on test data, and 39.5% cohen's kappa


In [59]:
evaluate(features, labels, 0.75, 5)


46.4% F1, 34.3% precision, 71.8% recall, 38.6% krippendorff's alpha on test data, and 39.7% cohen's kappa

45.5% F1, 33.7% precision, 69.9% recall, 37.5% krippendorff's alpha on test data, and 38.6% cohen's kappa

44.8% F1, 34.3% precision, 64.5% recall, 36.9% krippendorff's alpha on test data, and 37.7% cohen's kappa

44.9% F1, 32.6% precision, 72.0% recall, 36.5% krippendorff's alpha on test data, and 37.9% cohen's kappa

43.3% F1, 32.4% precision, 65.6% recall, 35.1% krippendorff's alpha on test data, and 36.2% cohen's kappa


In [7]:
recoded = pd.read_csv("Pr_BERT+Logit_PI_recoded.txt", sep='\t', encoding = 'utf-8')

In [16]:
np.argmax(recoded["Predicted Probability"], axis = 0)

0

In [23]:
f1 = f1_score(recoded["True Label"], (recoded["Predicted Probability"] > 0.5)*1, pos_label=1)
alpha = krippendorff.alpha(np.stack((recoded["True Label"], (recoded["Predicted Probability"] > 0.5)*1)))
kappa = cohen_kappa_score(recoded["True Label"], (recoded["Predicted Probability"] > 0.5)*1)

In [22]:
f1

0.8813559322033898

In [24]:
alpha

0.8464991453537596

In [25]:
kappa

0.8467996132120938