# New Section

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import Trainer, TrainingArguments
from transformers import EarlyStoppingCallback
import torch
from tqdm import tqdm
import os
os.environ["WANDB_DISABLED"] = "true"

ModuleNotFoundError: ignored

In [None]:
def get_batch_tokenizer(tokenizer, dataset):
    return tokenizer.batch_encode_plus(dataset,
                                       max_length=256,
                                       padding=True,
                                       truncation=True,
                                       add_special_tokens=True,
                                       return_attention_mask=True,
                                       return_tensors='pt')


In [None]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val
                in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [None]:
def compute_metrics(p):
    prediction, labels = p
    preds_flat = np.argmax(prediction, axis=1).flatten()
    labels_flat = labels.flatten()
    f1 = f1_score(labels_flat, preds_flat, average='macro')
    return {"f1": f1}

In [None]:
df = pd.read_csv("../input/ecommerce-text-classification/ecommerceDataset.csv", names=["labels", "descriptions"])
descriptions = df["descriptions"].map(str).values.tolist()
labels = df["labels"].values.tolist()

le = LabelEncoder()
labels = le.fit_transform(labels).tolist()

In [None]:
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=4)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/420M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [None]:
tokenizer = BertTokenizer.from_pretrained(
        "bert-base-uncased",
        do_lower_case=True)

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

In [None]:
x_train, x_test, y_train, y_test = train_test_split(descriptions, labels, test_size=0.4, stratify=labels, random_state=42)
x_valid, x_test, y_valid, y_test = train_test_split(x_test, y_test, test_size=0.5, random_state=42)

In [None]:
x_train_tokens = get_batch_tokenizer(tokenizer, x_train)
x_valid_tokens = get_batch_tokenizer(tokenizer, x_valid)
x_test_tokens = get_batch_tokenizer(tokenizer, x_test)

In [None]:
train_dataset = Dataset(x_train_tokens, y_train)
valid_dataset = Dataset(x_valid_tokens, y_valid)
test_dataset = Dataset(x_test_tokens, y_test)

In [None]:
args = TrainingArguments(output_dir="output",
                            evaluation_strategy="epoch",
                            metric_for_best_model="f1",
                            save_strategy="epoch",
                            num_train_epochs=3,
                            load_best_model_at_end=True
                            )

Using the `WAND_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [None]:
trainer = Trainer(args=args,
                    model=model,
                    train_dataset=train_dataset,
                    eval_dataset=valid_dataset,
                    compute_metrics=compute_metrics,
                    callbacks=[EarlyStoppingCallback(
                            early_stopping_patience=3)]
                    )

In [None]:
trainer.train()

***** Running training *****
  Num examples = 30255
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 11346
  import sys


Epoch,Training Loss,Validation Loss,F1
1,0.1887,0.193811,0.966104
2,0.1007,0.15634,0.974063
3,0.0627,0.130058,0.97824


***** Running Evaluation *****
  Num examples = 10085
  Batch size = 8
Saving model checkpoint to output/checkpoint-3782
Configuration saved in output/checkpoint-3782/config.json
Model weights saved in output/checkpoint-3782/pytorch_model.bin
  import sys
***** Running Evaluation *****
  Num examples = 10085
  Batch size = 8
Saving model checkpoint to output/checkpoint-7564
Configuration saved in output/checkpoint-7564/config.json
Model weights saved in output/checkpoint-7564/pytorch_model.bin
  import sys
***** Running Evaluation *****
  Num examples = 10085
  Batch size = 8
Saving model checkpoint to output/checkpoint-11346
Configuration saved in output/checkpoint-11346/config.json
Model weights saved in output/checkpoint-11346/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from output/checkpoint-11346 (score: 0.9782403748917118).


TrainOutput(global_step=11346, training_loss=0.1357222509493425, metrics={'train_runtime': 2741.5097, 'train_samples_per_second': 33.108, 'train_steps_per_second': 4.139, 'total_flos': 1.194085189020672e+16, 'train_loss': 0.1357222509493425, 'epoch': 3.0})

In [None]:
trainer = Trainer(model=model)
predictions = trainer.predict(test_dataset)

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using the `WAND_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
***** Running Prediction *****
  Num examples = 10085
  Batch size = 8
  import sys


In [None]:
preds = np.argmax(predictions.predictions, axis=1).flatten()
true_vals = predictions.label_ids

In [None]:
print(classification_report(true_vals, preds, target_names=list(le.classes_)))

                        precision    recall  f1-score   support

                 Books       0.99      0.97      0.98      2335
Clothing & Accessories       0.99      0.98      0.99      1772
           Electronics       0.97      0.97      0.97      2111
             Household       0.98      0.99      0.98      3867

              accuracy                           0.98     10085
             macro avg       0.98      0.98      0.98     10085
          weighted avg       0.98      0.98      0.98     10085

