In [None]:
# if u run this notebook in collab, install this dependences

# !pip install transformers
# !pip install datasets
# !pip install wandb

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, DataCollatorWithPadding
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
from transformers import EvalPrediction
import torch
import numpy as np
from datasets import Dataset, load_dataset, DatasetDict
import wandb
from datetime import datetime
import pandas as pd
import datasets
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
MODEL_NAME = "bert-base-uncased"

In [3]:
dataset = load_dataset("jakeazcona/short-text-multi-labeled-emotion-classification")

Downloading data: 100%|██████████| 2.88M/2.88M [00:00<00:00, 3.77MB/s]
Generating train split: 30040 examples [00:00, 347726.32 examples/s]


In [4]:
pd_dataset = dataset['train'].to_pandas()
pd_dataset.drop('Unnamed: 0', axis=1, inplace=True)
pd_dataset

Unnamed: 0,sample,emotion
0,i didnt feel humiliated,5
1,i can go from feeling so hopeless to so damned...,5
2,im grabbing a minute to post i feel greedy wrong,0
3,i am ever feeling nostalgic about the fireplac...,3
4,i am feeling grouchy,0
...,...,...
30035,Terribly sad story. I loved it.,5
30036,It's in case he suffers from amnesia and forge...,5
30037,Gotta love these kids! When they do or say the...,3
30038,Omg [NAME]! One of my favorite bb players,4


In [5]:
# Для скорости обучения возьмем только 28% датасета
_, part_dataset = train_test_split(pd_dataset, test_size=0.28, stratify=pd_dataset['emotion'], random_state=42)

In [6]:
train_df, test_df = train_test_split(part_dataset, test_size=0.2, stratify=part_dataset['emotion'], random_state=42)

In [7]:
train_df, val_df = train_test_split(train_df, test_size=0.2, stratify=train_df['emotion'], random_state=42)

In [8]:
dataset = datasets.dataset_dict.DatasetDict({
    'train': Dataset.from_pandas(train_df),
    'validation': Dataset.from_pandas(val_df),
    'test': Dataset.from_pandas(test_df)
  })

In [9]:
def preproces_dataset(raw_dataset):
  df = pd.DataFrame(raw_dataset)
  one_hot_encoded = pd.get_dummies(df['emotion'], prefix='emotion')
  df = pd.concat([df, one_hot_encoded], axis=1)
  df.drop('emotion', axis=1, inplace=True)
  new_dataset = Dataset.from_pandas(df)
  return new_dataset

def preproces_dict_dataset(raw_dataset):
  train_dataset = raw_dataset['train']
  validation_dataset = raw_dataset['validation']
  test_dataset = raw_dataset['test']

  train_dataset = preproces_dataset(train_dataset)
  validation_dataset = preproces_dataset(validation_dataset)
  test_dataset = preproces_dataset(test_dataset)

  dataset = datasets.dataset_dict.DatasetDict({
    'train': train_dataset,
    'validation': validation_dataset,
    'test': test_dataset
  })
  return dataset


In [10]:
dataset = preproces_dict_dataset(dataset)

In [11]:
dataset.remove_columns('__index_level_0__')

DatasetDict({
    train: Dataset({
        features: ['sample', 'emotion_0', 'emotion_1', 'emotion_2', 'emotion_3', 'emotion_4', 'emotion_5'],
        num_rows: 5383
    })
    validation: Dataset({
        features: ['sample', 'emotion_0', 'emotion_1', 'emotion_2', 'emotion_3', 'emotion_4', 'emotion_5'],
        num_rows: 1346
    })
    test: Dataset({
        features: ['sample', 'emotion_0', 'emotion_1', 'emotion_2', 'emotion_3', 'emotion_4', 'emotion_5'],
        num_rows: 1683
    })
})

In [12]:
labels = ['emotion_0', 'emotion_1', 'emotion_2', 'emotion_3', 'emotion_4', 'emotion_5']
id2label = {idx:label for idx, label in enumerate(labels)}
label2id = {label:idx for idx, label in enumerate(labels)}

In [13]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [14]:
def preprocess_data(examples):
  # take a batch of texts
  text = examples["sample"]
  # encode them
  encoding = tokenizer(text, padding="max_length", truncation=True, max_length=128)
  n_samples, sample_len = np.shape(np.array(encoding['input_ids']))
  # add labels
  labels_batch = {k: examples[k] for k in examples.keys() if k in labels}
  # create numpy array of shape (batch_size, num_labels)
  labels_matrix = np.zeros((n_samples, len(labels)))
  # fill numpy array
  for idx, label in enumerate(labels):
    labels_matrix[:, idx] = labels_batch[label]

  encoding["labels"] = labels_matrix.tolist()

  return encoding

In [15]:
encoded_dataset = dataset.map(preprocess_data, batched=True, remove_columns=dataset['train'].column_names)

Map: 100%|██████████| 5383/5383 [00:00<00:00, 8815.22 examples/s]
Map: 100%|██████████| 1346/1346 [00:00<00:00, 10116.75 examples/s]
Map: 100%|██████████| 1683/1683 [00:00<00:00, 6718.13 examples/s]


In [None]:
# # Alternative way to preprocess data
# def tokenize_function(example):
#     return tokenizer(example["sentence1"], example["sentence2"], truncation=True)

# # here we use datacollator, that means, that batching and paddig will be applied to the dataset during the training
# tokenized_datasets = train_dataset.map(tokenize_function, batched=True)
# data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding=True)

In [16]:
example = encoded_dataset['train'][0]
print(example.keys())
print(tokenizer.decode(example['input_ids']))
print(example['labels'])
print([id2label[idx] for idx, label in enumerate(example['labels']) if label == 1.0])

dict_keys(['input_ids', 'token_type_ids', 'attention_mask', 'labels'])
[CLS] i feel like i should not be surprised at this development [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]
[0.0, 0.0, 0.0, 0.0, 1.0, 0.0]
['emotion_4']


In [17]:
encoded_dataset.set_format("torch")

In [18]:
type(encoded_dataset['train']['input_ids'])

torch.Tensor

In [19]:
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased",
                                                           problem_type="multi_label_classification",
                                                           num_labels=len(labels),
                                                           id2label=id2label,
                                                           label2id=label2id)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [22]:
batch_size = 8
metric_name = "f1"

args = TrainingArguments(
    f"bert-finetuned-sem_eval-english",
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=10,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name
)

In [21]:
# source: https://jesusleal.io/2021/04/21/Longformer-multilabel-classification/
def multi_label_metrics(predictions, labels, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))
    # next, use threshold to turn them into integer predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1
    # finally, compute metrics
    y_true = labels
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    roc_auc = roc_auc_score(y_true, y_pred, average = 'micro')
    accuracy = accuracy_score(y_true, y_pred)
    # return as dictionary
    metrics = {'f1': f1_micro_average,
               'roc_auc': roc_auc,
               'accuracy': accuracy}
    return metrics

def compute_metrics(p: EvalPrediction):
    preds = p.predictions[0] if isinstance(p.predictions,
            tuple) else p.predictions
    result = multi_label_metrics(
        predictions=preds,
        labels=p.label_ids)
    return result

TODO: try to use raw model to predict some label

In [23]:
trainer = Trainer(
    model,
    args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

In [24]:
date = datetime.strftime(datetime.now(), "%d.%m.%Y-%H.%M.%S")
wandb.init(
    # set the wandb project where this run will be logged
    project="nlp-classifier",
    name=f"bert-multilabels-cls-{date}",

    # # track hyperparameters and run metadata
    # config={
    # "learning_rate": 0.02,
    # "architecture": "CNN",
    # "dataset": "CIFAR-100",
    # "epochs": 10,
    # }
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mrubikpf2002[0m ([33mrubikpf2002-mipt[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [25]:
trainer.train()

  7%|▋         | 500/6730 [14:26<3:00:14,  1.74s/it]

{'loss': 0.31, 'grad_norm': 1.0165866613388062, 'learning_rate': 1.8514115898959882e-05, 'epoch': 0.74}


                                                    
 10%|█         | 673/6730 [21:12<2:51:01,  1.69s/it]

{'eval_loss': 0.16393065452575684, 'eval_f1': 0.8332058148431523, 'eval_roc_auc': 0.8912332838038634, 'eval_accuracy': 0.8083209509658247, 'eval_runtime': 105.4577, 'eval_samples_per_second': 12.763, 'eval_steps_per_second': 1.603, 'epoch': 1.0}


 15%|█▍        | 1000/6730 [30:44<2:46:11,  1.74s/it]

{'loss': 0.1301, 'grad_norm': 3.2778053283691406, 'learning_rate': 1.7028231797919763e-05, 'epoch': 1.49}


                                                     
 20%|██        | 1346/6730 [42:31<2:31:42,  1.69s/it]

{'eval_loss': 0.13735869526863098, 'eval_f1': 0.8731762065095399, 'eval_roc_auc': 0.9216196136701338, 'eval_accuracy': 0.8647845468053492, 'eval_runtime': 105.1916, 'eval_samples_per_second': 12.796, 'eval_steps_per_second': 1.607, 'epoch': 2.0}


 22%|██▏       | 1500/6730 [47:01<2:31:38,  1.74s/it] 

{'loss': 0.0921, 'grad_norm': 4.355871677398682, 'learning_rate': 1.5542347696879644e-05, 'epoch': 2.23}


 30%|██▉       | 2000/6730 [1:01:30<2:16:59,  1.74s/it]

{'loss': 0.0605, 'grad_norm': 1.4673832654953003, 'learning_rate': 1.4056463595839525e-05, 'epoch': 2.97}


                                                       
 30%|███       | 2019/6730 [1:03:48<2:12:35,  1.69s/it]

{'eval_loss': 0.14955012500286102, 'eval_f1': 0.8662420382165605, 'eval_roc_auc': 0.9170133729569094, 'eval_accuracy': 0.8566121842496285, 'eval_runtime': 105.0849, 'eval_samples_per_second': 12.809, 'eval_steps_per_second': 1.608, 'epoch': 3.0}


 37%|███▋      | 2500/6730 [1:17:47<2:02:41,  1.74s/it] 

{'loss': 0.0379, 'grad_norm': 0.23714891076087952, 'learning_rate': 1.2570579494799406e-05, 'epoch': 3.71}


                                                       
 40%|████      | 2692/6730 [1:25:05<1:53:45,  1.69s/it]

{'eval_loss': 0.15868805348873138, 'eval_f1': 0.8708955223880597, 'eval_roc_auc': 0.9210995542347696, 'eval_accuracy': 0.8662704309063893, 'eval_runtime': 105.1172, 'eval_samples_per_second': 12.805, 'eval_steps_per_second': 1.608, 'epoch': 4.0}


 45%|████▍     | 3000/6730 [1:34:03<1:48:24,  1.74s/it] 

{'loss': 0.0308, 'grad_norm': 0.032928116619586945, 'learning_rate': 1.1084695393759288e-05, 'epoch': 4.46}


                                                       
 50%|█████     | 3365/6730 [1:46:24<1:35:03,  1.69s/it]

{'eval_loss': 0.16841435432434082, 'eval_f1': 0.86609474076837, 'eval_roc_auc': 0.9183506686478454, 'eval_accuracy': 0.8610698365527489, 'eval_runtime': 105.1072, 'eval_samples_per_second': 12.806, 'eval_steps_per_second': 1.608, 'epoch': 5.0}


 52%|█████▏    | 3500/6730 [1:50:21<1:33:26,  1.74s/it] 

{'loss': 0.0247, 'grad_norm': 0.024447226896882057, 'learning_rate': 9.59881129271917e-06, 'epoch': 5.2}


 59%|█████▉    | 4000/6730 [2:04:50<1:19:06,  1.74s/it]

{'loss': 0.0168, 'grad_norm': 0.03172755613923073, 'learning_rate': 8.11292719167905e-06, 'epoch': 5.94}


                                                       
 60%|██████    | 4038/6730 [2:07:41<1:15:48,  1.69s/it]

{'eval_loss': 0.18578793108463287, 'eval_f1': 0.8721580320536713, 'eval_roc_auc': 0.9222139673105498, 'eval_accuracy': 0.8684992570579495, 'eval_runtime': 105.103, 'eval_samples_per_second': 12.806, 'eval_steps_per_second': 1.608, 'epoch': 6.0}


 67%|██████▋   | 4500/6730 [2:21:06<1:04:31,  1.74s/it] 

{'loss': 0.0154, 'grad_norm': 2.1313068866729736, 'learning_rate': 6.62704309063893e-06, 'epoch': 6.69}


                                                       
 70%|███████   | 4711/6730 [2:28:57<56:53,  1.69s/it]

{'eval_loss': 0.19080188870429993, 'eval_f1': 0.8698884758364313, 'eval_roc_auc': 0.9216939078751858, 'eval_accuracy': 0.8684992570579495, 'eval_runtime': 105.1032, 'eval_samples_per_second': 12.806, 'eval_steps_per_second': 1.608, 'epoch': 7.0}


 74%|███████▍  | 5000/6730 [2:37:22<50:09,  1.74s/it]   

{'loss': 0.01, 'grad_norm': 0.017045512795448303, 'learning_rate': 5.141158989598811e-06, 'epoch': 7.43}


                                                     
 80%|████████  | 5384/6730 [2:50:14<38:01,  1.69s/it]

{'eval_loss': 0.19338195025920868, 'eval_f1': 0.8723483438779308, 'eval_roc_auc': 0.9228083209509658, 'eval_accuracy': 0.8684992570579495, 'eval_runtime': 105.0971, 'eval_samples_per_second': 12.807, 'eval_steps_per_second': 1.608, 'epoch': 8.0}


 82%|████████▏ | 5500/6730 [2:53:38<35:39,  1.74s/it]   

{'loss': 0.0083, 'grad_norm': 4.2883405685424805, 'learning_rate': 3.6552748885586927e-06, 'epoch': 8.17}


 89%|████████▉ | 6000/6730 [3:08:07<21:07,  1.74s/it]

{'loss': 0.0074, 'grad_norm': 0.015906207263469696, 'learning_rate': 2.169390787518574e-06, 'epoch': 8.92}


                                                     
 90%|█████████ | 6057/6730 [3:11:31<18:55,  1.69s/it]

{'eval_loss': 0.19529055058956146, 'eval_f1': 0.8737430167597765, 'eval_roc_auc': 0.9234026745913818, 'eval_accuracy': 0.8684992570579495, 'eval_runtime': 105.1128, 'eval_samples_per_second': 12.805, 'eval_steps_per_second': 1.608, 'epoch': 9.0}


 97%|█████████▋| 6500/6730 [3:24:23<06:39,  1.74s/it]  

{'loss': 0.0072, 'grad_norm': 0.01377320196479559, 'learning_rate': 6.835066864784547e-07, 'epoch': 9.66}


                                                     
100%|██████████| 6730/6730 [3:32:47<00:00,  1.69s/it]

{'eval_loss': 0.19848819077014923, 'eval_f1': 0.8695976154992549, 'eval_roc_auc': 0.9208023774145617, 'eval_accuracy': 0.8662704309063893, 'eval_runtime': 105.1291, 'eval_samples_per_second': 12.803, 'eval_steps_per_second': 1.608, 'epoch': 10.0}


100%|██████████| 6730/6730 [3:32:51<00:00,  1.90s/it]

{'train_runtime': 12771.2716, 'train_samples_per_second': 4.215, 'train_steps_per_second': 0.527, 'train_loss': 0.05595679738369243, 'epoch': 10.0}





TrainOutput(global_step=6730, training_loss=0.05595679738369243, metrics={'train_runtime': 12771.2716, 'train_samples_per_second': 4.215, 'train_steps_per_second': 0.527, 'total_flos': 3540944193776640.0, 'train_loss': 0.05595679738369243, 'epoch': 10.0})

In [26]:
# Закрыть сессию WandB
wandb.finish()

0,1
eval/accuracy,▁█▇█▇█████
eval/f1,▁█▇█▇█▇██▇
eval/loss,▄▁▂▃▅▇▇▇██
eval/roc_auc,▁█▇▇▇████▇
eval/runtime,█▃▁▂▁▁▁▁▂▂
eval/samples_per_second,▁▆█▇████▇▇
eval/steps_per_second,▁▇████████
train/epoch,▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇███
train/global_step,▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇███
train/grad_norm,▃▆█▃▁▁▁▁▄▁█▁▁

0,1
eval/accuracy,0.86627
eval/f1,0.8696
eval/loss,0.19849
eval/roc_auc,0.9208
eval/runtime,105.1291
eval/samples_per_second,12.803
eval/steps_per_second,1.608
total_flos,3540944193776640.0
train/epoch,10.0
train/global_step,6730.0


In [None]:
# model.save_pretrained("./seved_model")

In [None]:
# text = dataset['test'][18]['sentence1'] + ' ' + dataset['test'][18]['sentence2']
# true_label = [dataset['test'][18]['label_0'], dataset['test'][18]['label_1']]
# if true_label[0]:
#   true_label = 'label_0'
# else:
#   true_label = 'label_1'
# print(text)
# encoding = tokenizer(text, return_tensors="pt")
# encoding = {k: v.to(trainer.model.device) for k,v in encoding.items()}
# print(encoding)
# outputs = trainer.model(**encoding)
# logits = outputs.logits
# print(logits.shape)
# sigmoid = torch.nn.Sigmoid()
# probs = sigmoid(logits.squeeze().cpu())
# print(probs)
# predictions = np.zeros(probs.shape)
# predictions[np.where(probs >= 0.5)] = 1

# # turn predicted id's into actual label names
# predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
# print(predicted_labels)
# print(true_label)

In [27]:
# устанавливаем модель в inference режим
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [66]:
test_dataset = encoded_dataset['test'] # заметим, что здесь test_dataset уже подготовлен к инференсу см. выше

In [67]:
from torch.utils.data import DataLoader

test_dataloader = DataLoader(test_dataset, batch_size=16)

In [None]:
# for batch in test_dataloader:
#     # print(batch)
#     true_batch_labels = batch['labels'].cpu().numpy()
#     print(true_batch_labels)
#     indices = [int(np.nonzero(row == 1)[0]) for row in true_batch_labels]
#     print(indices)
#     break

[[0. 0. 0. 0. 1. 0.]
 [0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]]
[4, 2, 0, 0, 0, 4, 0, 5, 0, 5, 5, 4, 5, 2, 2, 0]


In [38]:
from sklearn.metrics import classification_report

In [69]:
predictions = []
true_labels = []

for batch in test_dataloader:
  # Move batch data to the same device as the model
  batch = {k: v.to(model.device) for k, v in batch.items()}

  with torch.no_grad():
    outputs = model(**batch)

  logits = outputs.logits
  preds = np.argmax(logits.cpu().numpy(), axis=1)
  predictions.extend(preds)

  true_batch_labels = batch['labels'].cpu().numpy()
  indices = np.array([int(np.nonzero(row == 1)[0]) for row in true_batch_labels])
  true_labels.extend(indices)  # Добавляем истинные метки для оценки

# 5. Оценка качества
f1_micro_average = f1_score(y_true=true_labels, y_pred=predictions, average='micro')
# roc_auc = roc_auc_score(true_labels, predictions, average = 'micro')
accuracy = accuracy_score(true_labels, predictions)
# return as dictionary
metrics = {'f1': f1_micro_average,
            # 'roc_auc': roc_auc,
            'accuracy': accuracy}
report = classification_report(true_labels, predictions)

print(f"Accuracy: {accuracy}")
print(metrics)
print(report)

Accuracy: 0.8847296494355318
{'f1': 0.8847296494355318, 'accuracy': 0.8847296494355318}
              precision    recall  f1-score   support

           0       0.89      0.88      0.88       262
           1       0.83      0.84      0.83       175
           2       0.91      0.89      0.90       479
           3       0.85      0.86      0.86       236
           4       0.83      0.81      0.82       115
           5       0.91      0.93      0.92       416

    accuracy                           0.88      1683
   macro avg       0.87      0.87      0.87      1683
weighted avg       0.88      0.88      0.88      1683

