In [None]:
!pip install pytorch-transformers --user

In [None]:
pip install pandas==1.3.5 --user

In [1]:
import torch
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.utils.data import TensorDataset

from tqdm.notebook import tqdm
from tqdm.auto import tqdm

from transformers import BertTokenizer, DistilBertTokenizer
from transformers import BertForSequenceClassification, DistilBertForSequenceClassification
from transformers import AdamW, get_linear_schedule_with_warmup

import os
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import random


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
train = pd.read_pickle('./data/train.pkl')
test = pd.read_pickle('./data/test.pkl')

print("Size of the train data: ", len(train))
print("Size of the test data: ", len(test))

Size of the train data:  3464
Size of the test data:  866


In [4]:
possible_labels = train["Code"].unique()

label_dict = {}
for index, possible_label in enumerate(possible_labels):
    label_dict[possible_label] = index
print(label_dict)

{'Solution Discussion': 0, 'Expected Behaviour': 1, 'Usage': 2, 'Social Conversation': 3, 'Contribution and Commitment': 4, 'Bug Reproduction': 5, 'Motivation': 6, 'Potential New Issues and Requests': 7, 'Investigation and Exploration': 8, 'Workarounds': 9, 'Observed Bug Behaviour': 10, 'Task Progress': 11, 'Action on Issue': 12}


In [5]:
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

In [6]:
train['Code'] = train["Code"].replace(label_dict)
test['Code'] = test["Code"].replace(label_dict)

In [7]:
x_train, x_val, y_train, y_val = train_test_split(train, 
                                                  train["Code"], 
                                                  test_size=0.15, 
                                                  random_state=42, 
                                                  stratify=train["Code"].values)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return floored.astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  return floored.astype(np.int)


In [8]:
encoded_data_train = tokenizer.batch_encode_plus(
    x_train["Text Content"].values, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    padding='longest',
    truncation=True, 
    return_tensors='pt'
)

input_ids_train = encoded_data_train['input_ids']
attention_masks_train = encoded_data_train['attention_mask']
labels_train = torch.tensor(x_train["Code"].values)

In [9]:
encoded_data_val = tokenizer.batch_encode_plus(
    x_val["Text Content"].values, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    padding='longest',
    truncation=True,
    return_tensors='pt'
)

input_ids_val = encoded_data_val['input_ids']
attention_masks_val = encoded_data_val['attention_mask']
labels_val = torch.tensor(x_val["Code"].values)

In [10]:
from collections import Counter
count = Counter(y_train)
 
class_count = np.array([count[i] for i in count])
 
weight = 1./class_count
 
weight

array([0.00104167, 0.00389105, 0.00184162, 0.01190476, 0.00512821,
       0.004     , 0.00598802, 0.00641026, 0.01666667, 0.01785714,
       0.01176471, 0.01123596, 0.02380952])

In [11]:
from torch.utils.data import WeightedRandomSampler

samples_weight = np.array([weight[t] for t in y_train])
samples_weight=torch.from_numpy(samples_weight)

sampler = WeightedRandomSampler(samples_weight, len(samples_weight))

In [12]:
count_val = Counter(y_val)
 
class_count_val = np.array([count_val[i] for i in count_val])
 
weight_val = 1./class_count_val

samples_weight_val = np.array([weight_val[t] for t in y_val])
samples_weight_val = torch.from_numpy(samples_weight_val)

sampler_val = WeightedRandomSampler(samples_weight_val, len(samples_weight_val))

In [13]:
dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train)
dataset_val = TensorDataset(input_ids_val, attention_masks_val, labels_val)
print(len(dataset_train), len(dataset_val))


2944 520


In [14]:
model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased",
                                                      num_labels=len(label_dict),
                                                      output_attentions=False,
                                                      output_hidden_states=False)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_projector.weight', 'vocab_layer_norm.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'classifier.weight', 'classifi

In [15]:

batch_size = 1

dataloader_train = DataLoader(dataset_train, 
                              sampler=sampler,
                              #sampler=RandomSampler(dataset_train), 
                              batch_size=batch_size)

dataloader_validation = DataLoader(dataset_val, 
                                   #sampler=SequentialSampler(dataset_val), 
                                   sampler=sampler_val, 
                                   batch_size=batch_size)


In [16]:
optimizer = AdamW(model.parameters(),
                  lr=1e-5, 
                  eps=1e-8)




In [17]:
epochs = 40

scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps=0,
                                            num_training_steps=len(dataloader_train)*epochs)

In [18]:
def f1_score_func(preds, labels):
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return f1_score(labels_flat, preds_flat, average='weighted')
    
def evaluate(dataloader_val):

    model.eval()
    
    loss_val_total = 0
    predictions, true_vals = [], []
    
    for batch in dataloader_val:
        
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }

        with torch.no_grad():        
            outputs = model(**inputs)
            
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
    
    loss_val_avg = loss_val_total/len(dataloader_val) 
    
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
            
    return loss_val_avg, predictions, true_vals

In [19]:

seed_val = 17
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [20]:
model.to(device)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
       

In [121]:
for epoch in tqdm(range(1, epochs+1)):
    
    model.train()
    
    loss_train_total = 0

    progress_bar = tqdm(dataloader_train, desc='Epoch {:1d}'.format(epoch), leave=False, disable=False)
    for batch in progress_bar:

        model.zero_grad()
        
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }       

        outputs = model(**inputs)
        
        loss = outputs[0]
        loss_train_total += loss.item()
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()
        scheduler.step()
        
        progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))})
         
        
    torch.save(model.state_dict(), f'./Models/distilbert_epoch_{epoch}.model')
        
    tqdm.write(f'\nEpoch {epoch}')
    
    loss_train_avg = loss_train_total/len(dataloader_train)            
    tqdm.write(f'Training loss: {loss_train_avg}')
    
    val_loss, predictions, true_vals = evaluate(dataloader_validation)
    val_f1 = f1_score_func(predictions, true_vals)
    tqdm.write(f'Validation loss: {val_loss}')
    tqdm.write(f'F1 Score (Weighted): {val_f1}')

HBox(children=(IntProgress(value=0, max=40), HTML(value='')))

HBox(children=(IntProgress(value=0, description='Epoch 1', max=2944, style=ProgressStyle(description_width='in…


Epoch 1
Training loss: 0.24495558464215908
Validation loss: 4.411667906487958
F1 Score (Weighted): 0.4101500557570241


HBox(children=(IntProgress(value=0, description='Epoch 2', max=2944, style=ProgressStyle(description_width='in…


Epoch 2
Training loss: 0.19517951723235694
Validation loss: 4.4773349873736095
F1 Score (Weighted): 0.48640892652168816


HBox(children=(IntProgress(value=0, description='Epoch 3', max=2944, style=ProgressStyle(description_width='in…


Epoch 3
Training loss: 0.17194548429347342
Validation loss: 4.83187087802423
F1 Score (Weighted): 0.48030841503545085


HBox(children=(IntProgress(value=0, description='Epoch 4', max=2944, style=ProgressStyle(description_width='in…


Epoch 4
Training loss: 0.18294317055702988
Validation loss: 5.869368711595395
F1 Score (Weighted): 0.47965550104198706


HBox(children=(IntProgress(value=0, description='Epoch 5', max=2944, style=ProgressStyle(description_width='in…


Epoch 5
Training loss: 0.16538170637608077
Validation loss: 4.9934420965033475
F1 Score (Weighted): 0.5313648533616151


HBox(children=(IntProgress(value=0, description='Epoch 6', max=2944, style=ProgressStyle(description_width='in…


Epoch 6
Training loss: 0.15709150781870576
Validation loss: 5.598535085738681
F1 Score (Weighted): 0.460515829973774


HBox(children=(IntProgress(value=0, description='Epoch 7', max=2944, style=ProgressStyle(description_width='in…


Epoch 7
Training loss: 0.12188658445052536
Validation loss: 7.207400619873215
F1 Score (Weighted): 0.382034858930453


HBox(children=(IntProgress(value=0, description='Epoch 8', max=2944, style=ProgressStyle(description_width='in…


Epoch 8
Training loss: 0.1590175290667161
Validation loss: 6.180652455258404
F1 Score (Weighted): 0.4318344361051936


HBox(children=(IntProgress(value=0, description='Epoch 9', max=2944, style=ProgressStyle(description_width='in…


Epoch 9
Training loss: 0.11190903714793204
Validation loss: 6.0740312133293335
F1 Score (Weighted): 0.44619844376185


HBox(children=(IntProgress(value=0, description='Epoch 10', max=2944, style=ProgressStyle(description_width='i…


Epoch 10
Training loss: 0.15229818207892667
Validation loss: 6.780134805060592
F1 Score (Weighted): 0.42345717081769063


HBox(children=(IntProgress(value=0, description='Epoch 11', max=2944, style=ProgressStyle(description_width='i…


Epoch 11
Training loss: 0.12661812459867838
Validation loss: 6.045170816842918
F1 Score (Weighted): 0.45794948864527585


HBox(children=(IntProgress(value=0, description='Epoch 12', max=2944, style=ProgressStyle(description_width='i…


Epoch 12
Training loss: 0.1126657662740855
Validation loss: 6.85831800941671
F1 Score (Weighted): 0.4197610440002856


HBox(children=(IntProgress(value=0, description='Epoch 13', max=2944, style=ProgressStyle(description_width='i…


Epoch 13
Training loss: 0.12468761022992735
Validation loss: 7.2964174693069515
F1 Score (Weighted): 0.43155977512698923


HBox(children=(IntProgress(value=0, description='Epoch 14', max=2944, style=ProgressStyle(description_width='i…


Epoch 14
Training loss: 0.08596651145393118
Validation loss: 7.14965697129138
F1 Score (Weighted): 0.41741592423108476


HBox(children=(IntProgress(value=0, description='Epoch 15', max=2944, style=ProgressStyle(description_width='i…


Epoch 15
Training loss: 0.08197611753135997
Validation loss: 6.997701553128661
F1 Score (Weighted): 0.4576288311141662


HBox(children=(IntProgress(value=0, description='Epoch 16', max=2944, style=ProgressStyle(description_width='i…


Epoch 16
Training loss: 0.06276513432910857
Validation loss: 7.6962915930662055
F1 Score (Weighted): 0.4443866802040706


HBox(children=(IntProgress(value=0, description='Epoch 17', max=2944, style=ProgressStyle(description_width='i…


Epoch 17
Training loss: 0.06532622272319043
Validation loss: 7.702767917016567
F1 Score (Weighted): 0.44561797403453823


HBox(children=(IntProgress(value=0, description='Epoch 18', max=2944, style=ProgressStyle(description_width='i…


Epoch 18
Training loss: 0.05337567266151853
Validation loss: 8.793646343219057
F1 Score (Weighted): 0.39328228196685994


HBox(children=(IntProgress(value=0, description='Epoch 19', max=2944, style=ProgressStyle(description_width='i…


Epoch 19
Training loss: 0.06754638376863681
Validation loss: 7.562034926482114
F1 Score (Weighted): 0.4320775466198523


HBox(children=(IntProgress(value=0, description='Epoch 20', max=2944, style=ProgressStyle(description_width='i…


Epoch 20
Training loss: 0.07529091068931149
Validation loss: 8.461112046717751
F1 Score (Weighted): 0.41325566366351935


HBox(children=(IntProgress(value=0, description='Epoch 21', max=2944, style=ProgressStyle(description_width='i…


Epoch 21
Training loss: 0.038849945162842354
Validation loss: 8.554287981897472
F1 Score (Weighted): 0.40735864601442806


HBox(children=(IntProgress(value=0, description='Epoch 22', max=2944, style=ProgressStyle(description_width='i…


Epoch 22
Training loss: 0.0666371891752663
Validation loss: 8.58591406133493
F1 Score (Weighted): 0.40009236590057035


HBox(children=(IntProgress(value=0, description='Epoch 23', max=2944, style=ProgressStyle(description_width='i…


Epoch 23
Training loss: 0.04371283496184395
Validation loss: 8.468652123406818
F1 Score (Weighted): 0.4027002961095497


HBox(children=(IntProgress(value=0, description='Epoch 24', max=2944, style=ProgressStyle(description_width='i…


Epoch 24
Training loss: 0.06089244295669818
Validation loss: 7.182685498800088
F1 Score (Weighted): 0.5007148222309722


HBox(children=(IntProgress(value=0, description='Epoch 25', max=2944, style=ProgressStyle(description_width='i…


Epoch 25
Training loss: 0.05036463704831375
Validation loss: 8.261394574261486
F1 Score (Weighted): 0.4079446947135162


HBox(children=(IntProgress(value=0, description='Epoch 26', max=2944, style=ProgressStyle(description_width='i…


Epoch 26
Training loss: 0.032800111355202013
Validation loss: 8.193840455549518
F1 Score (Weighted): 0.4065536777254815


HBox(children=(IntProgress(value=0, description='Epoch 27', max=2944, style=ProgressStyle(description_width='i…


Epoch 27
Training loss: 0.04748695079767812
Validation loss: 9.1257645576666
F1 Score (Weighted): 0.38966644025308717


HBox(children=(IntProgress(value=0, description='Epoch 28', max=2944, style=ProgressStyle(description_width='i…


Epoch 28
Training loss: 0.04115181193539001
Validation loss: 9.510283538229011
F1 Score (Weighted): 0.4086824055432744


HBox(children=(IntProgress(value=0, description='Epoch 29', max=2944, style=ProgressStyle(description_width='i…


Epoch 29
Training loss: 0.04165974259813808
Validation loss: 9.876274926172213
F1 Score (Weighted): 0.39100811448661543


HBox(children=(IntProgress(value=0, description='Epoch 30', max=2944, style=ProgressStyle(description_width='i…


Epoch 30
Training loss: 0.041266742923234186
Validation loss: 8.268700487885155
F1 Score (Weighted): 0.45789804670677553


HBox(children=(IntProgress(value=0, description='Epoch 31', max=2944, style=ProgressStyle(description_width='i…


Epoch 31
Training loss: 0.026393486901648822
Validation loss: 8.404471616999702
F1 Score (Weighted): 0.4386911359190348


HBox(children=(IntProgress(value=0, description='Epoch 32', max=2944, style=ProgressStyle(description_width='i…


Epoch 32
Training loss: 0.03253743893200024
Validation loss: 9.069957887438266
F1 Score (Weighted): 0.41190603273512805


HBox(children=(IntProgress(value=0, description='Epoch 33', max=2944, style=ProgressStyle(description_width='i…


Epoch 33
Training loss: 0.03424005923670438
Validation loss: 10.299561170183916
F1 Score (Weighted): 0.36731444313956646


HBox(children=(IntProgress(value=0, description='Epoch 34', max=2944, style=ProgressStyle(description_width='i…


Epoch 34
Training loss: 0.033204441710937345
Validation loss: 9.773427224571094
F1 Score (Weighted): 0.3962960096408361


HBox(children=(IntProgress(value=0, description='Epoch 35', max=2944, style=ProgressStyle(description_width='i…


Epoch 35
Training loss: 0.03761207719688484
Validation loss: 9.738187277349974
F1 Score (Weighted): 0.39711623829512055


HBox(children=(IntProgress(value=0, description='Epoch 36', max=2944, style=ProgressStyle(description_width='i…


Epoch 36
Training loss: 0.03922847505225629
Validation loss: 9.690344167005764
F1 Score (Weighted): 0.3751602730670165


HBox(children=(IntProgress(value=0, description='Epoch 37', max=2944, style=ProgressStyle(description_width='i…


Epoch 37
Training loss: 0.017357521815367902
Validation loss: 8.977865622065138
F1 Score (Weighted): 0.4270743441200292


HBox(children=(IntProgress(value=0, description='Epoch 38', max=2944, style=ProgressStyle(description_width='i…


Epoch 38
Training loss: 0.017648053563890577
Validation loss: 9.935456112602985
F1 Score (Weighted): 0.3558119470194741


HBox(children=(IntProgress(value=0, description='Epoch 39', max=2944, style=ProgressStyle(description_width='i…


Epoch 39
Training loss: 0.02657085419013178
Validation loss: 9.76748419769551
F1 Score (Weighted): 0.3989404441050552


HBox(children=(IntProgress(value=0, description='Epoch 40', max=2944, style=ProgressStyle(description_width='i…


Epoch 40
Training loss: 0.0251556821753401
Validation loss: 9.068009731358472
F1 Score (Weighted): 0.41917658788018736



In [39]:
checkpoint = torch.load("./Models/distilbert_epoch_30.model")
model.load_state_dict(checkpoint)
model.to(device)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
       

In [40]:
test['Code'] = test["Code"].replace(label_dict)
encoded_data_test = tokenizer.batch_encode_plus(
    test["Text Content"].values, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    padding='longest',
    truncation=True, 
    return_tensors='pt'
)

input_ids_test = encoded_data_test['input_ids']
attention_masks_test = encoded_data_test['attention_mask']
labels_test = torch.tensor(test["Code"].values)

dataset_test = TensorDataset(input_ids_test, attention_masks_test, labels_test)

dataloader_test = DataLoader(dataset_test, 
                                   sampler=SequentialSampler(dataset_test), 
                                   batch_size=batch_size)

val_loss, predictions, true_vals = evaluate(dataloader_test)
val_f1 = f1_score_func(predictions, true_vals)
tqdm.write(f'Validation loss: {val_loss}')
tqdm.write(f'F1 Score (Weighted): {val_f1}')

Validation loss: 5.8925778344789315
F1 Score (Weighted): 0.5319029720815789


In [41]:
y_pred = []
for i in range(len(predictions)):
    y_pred.append(predictions[i].tolist().index(max(predictions[i])))
print(y_pred)

[3, 8, 11, 3, 0, 8, 0, 0, 2, 3, 8, 0, 7, 6, 2, 0, 3, 0, 0, 0, 0, 5, 0, 0, 0, 3, 1, 3, 0, 5, 0, 0, 3, 0, 3, 3, 0, 0, 8, 0, 6, 0, 0, 0, 3, 0, 9, 0, 0, 0, 6, 2, 0, 5, 7, 2, 1, 2, 5, 0, 0, 7, 0, 5, 8, 3, 2, 0, 1, 0, 9, 2, 3, 5, 1, 3, 2, 3, 5, 0, 2, 3, 0, 0, 0, 2, 0, 0, 0, 6, 0, 11, 0, 3, 3, 5, 0, 3, 2, 0, 2, 1, 5, 1, 0, 6, 0, 3, 5, 2, 0, 0, 2, 2, 0, 2, 0, 7, 3, 2, 0, 0, 5, 3, 0, 3, 0, 8, 0, 0, 0, 8, 0, 0, 2, 5, 0, 0, 0, 6, 7, 0, 0, 2, 3, 0, 8, 0, 7, 3, 0, 3, 3, 0, 0, 5, 2, 2, 0, 3, 12, 5, 3, 3, 0, 2, 2, 7, 0, 8, 0, 2, 0, 11, 5, 3, 3, 0, 3, 8, 0, 2, 0, 2, 1, 0, 0, 6, 0, 6, 2, 5, 7, 3, 3, 0, 3, 6, 0, 0, 1, 0, 3, 1, 0, 10, 3, 0, 0, 0, 5, 0, 3, 2, 8, 3, 2, 5, 8, 0, 2, 8, 5, 0, 2, 0, 0, 2, 3, 0, 0, 0, 0, 2, 0, 0, 12, 0, 0, 7, 3, 3, 0, 11, 0, 3, 7, 3, 1, 0, 2, 3, 6, 3, 0, 11, 0, 8, 8, 7, 5, 3, 0, 0, 0, 0, 0, 3, 0, 0, 11, 3, 0, 0, 0, 3, 3, 2, 7, 5, 2, 5, 0, 0, 3, 0, 0, 8, 5, 0, 5, 0, 5, 2, 0, 11, 0, 0, 8, 0, 12, 2, 0, 0, 0, 2, 0, 0, 10, 0, 2, 0, 3, 0, 8, 3, 1, 8, 2, 0, 1, 3, 10, 8, 2, 3, 0, 0, 0,

In [42]:
from sklearn.metrics import precision_recall_fscore_support as score

precision, recall, fscore, support = score(true_vals, y_pred)

for label, i in label_dict.items():
    print('{}: \t\tprecision: {:.2f}, recall: {:.2f}, fscore: {:.2f}, support: {}'.format(label, precision[i], recall[i], fscore[i], support[i]))

Solution Discussion: 		precision: 0.57, recall: 0.72, fscore: 0.64, support: 282
Expected Behaviour: 		precision: 0.24, recall: 0.32, fscore: 0.28, support: 25
Usage: 		precision: 0.39, recall: 0.54, fscore: 0.45, support: 74
Social Conversation: 		precision: 0.80, recall: 0.74, fscore: 0.77, support: 159
Contribution and Commitment: 		precision: 0.67, recall: 0.35, fscore: 0.46, support: 17
Bug Reproduction: 		precision: 0.42, recall: 0.41, fscore: 0.41, support: 49
Motivation: 		precision: 0.44, recall: 0.24, fscore: 0.31, support: 58
Potential New Issues and Requests: 		precision: 0.33, recall: 0.28, fscore: 0.31, support: 46
Investigation and Exploration: 		precision: 0.48, recall: 0.33, fscore: 0.39, support: 75
Workarounds: 		precision: 0.50, recall: 0.11, fscore: 0.18, support: 18
Observed Bug Behaviour: 		precision: 0.33, recall: 0.15, fscore: 0.21, support: 26
Task Progress: 		precision: 0.55, recall: 0.48, fscore: 0.51, support: 25
Action on Issue: 		precision: 0.70, recall: 