# Text Classification using Task Specific Encoder-Only Model

---



## Aim: to classify movie reviews into positive or negative categories

# Installing Libraries

In [1]:
!pip install datasets transformers

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (1

# Loading the Dataset

In [2]:
# Importing the 'load_dataset' function from the 'datasets' library to load datasets
from datasets import load_dataset

# Loading the "rotten_tomatoes" dataset from Hugging Face's datasets library
data = load_dataset("rotten_tomatoes")

# Printing the loaded dataset object to inspect its contents and structure
print(data)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/7.46k [00:00<?, ?B/s]

train.parquet:   0%|          | 0.00/699k [00:00<?, ?B/s]

validation.parquet:   0%|          | 0.00/90.0k [00:00<?, ?B/s]

test.parquet:   0%|          | 0.00/92.2k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8530 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1066 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1066 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 8530
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 1066
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 1066
    })
})


In [4]:
print(data["train"][0])

{'text': 'the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .', 'label': 1}


In [5]:
print(data["train"][6000])

{'text': "the script isn't very good ; not even someone as gifted as hoffman ( the actor ) can make it work .", 'label': 0}


# Text Classification with Representation Model: TASK SPECIFIC

In [6]:
# Importing the 'pipeline' function from the 'transformers' library to easily use pre-trained models
from transformers import pipeline

# Defining the model ID for the pre-trained sentiment analysis model, "cardiffnlp/twitter-roberta-base-sentiment-latest"
model_id = "cardiffnlp/twitter-roberta-base-sentiment-latest"

# Initializing the sentiment analysis pipeline with the specified model and tokenizer
# 'return_all_scores=True' returns the sentiment scores for all classes, not just the predicted class
# 'device="cuda:0"' ensures the model runs on the first GPU (if available)
pipe = pipeline(
    model=model_id,          # The model identifier for the sentiment analysis task
    tokenizer=model_id,      # The tokenizer corresponding to the model
    return_all_scores=True,  # Return sentiment scores for all possible sentiment classes (e.g., positive, neutral, negative)
    device='cuda:0'          # Use GPU (CUDA) for faster inference, if available
)


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0


# Let's perform predictions on test dataset split

In [7]:
# Extracting the 'test' split from the loaded dataset (data) to access the test data
test_data = data['test']

# Selecting the first data point (text) from the test dataset for testing the model
sample_test_datapoint = test_data[0]["text"]

# Passing the selected test data point (movie review or sentence) through the sentiment analysis pipeline
# 'pipe' processes the text and returns the sentiment predictions, extracting the first prediction result
prediction = pipe(sample_test_datapoint)[0]

# Printing the model's prediction for the selected test data point
print(prediction)


[{'label': 'negative', 'score': 0.00516123604029417}, {'label': 'neutral', 'score': 0.040233541280031204}, {'label': 'positive', 'score': 0.9546052813529968}]


In [None]:
test_data[0]["text"]

'lovingly photographed in the manner of a golden book sprung to life , stuart little 2 manages sweetness largely without stickiness .'

In [8]:
# Importing the 'numpy' library for array manipulation and mathematical operations
import numpy as np

# Extracting the score for the 'negative' sentiment from the prediction result
negative_score = prediction[0].get("score")

# Extracting the score for the 'positive' sentiment from the prediction result
postive_score = prediction[2].get("score")

# Using 'np.argmax' to find the index of the highest score between negative and positive sentiment
# The index 0 corresponds to 'negative' and index 1 corresponds to 'positive'
final_prediction = np.argmax([negative_score, postive_score])

# Printing the final prediction (0 for negative, 1 for positive) based on the higher sentiment score
print(final_prediction)


1


# Let's do evaluation on entire test set

In [9]:
# Importing the 'tqdm' library to display a progress bar for the loop
from tqdm import tqdm

# Initializing an empty list to store the final sentiment predictions for each test data point
predictions = []

# Iterating through the test data, displaying a progress bar with the description "predicting..."
for test_data_point in tqdm(test_data, desc="predicting..."):

  # Passing the text of the current test data point through the sentiment analysis pipeline
  prediction = pipe(test_data_point['text'])[0]

  # Extracting the sentiment score for 'negative' from the model's output
  negative_score = prediction[0].get("score")

  # Extracting the sentiment score for 'positive' from the model's output
  positive_score = prediction[2].get("score")

  # Using 'np.argmax' to determine the index of the highest sentiment score (0 for negative, 1 for positive)
  final_prediction = np.argmax([negative_score, positive_score])

  # Appending the final prediction (0 or 1) to the predictions list
  predictions.append(final_prediction)


predicting...:   0%|          | 5/1066 [00:00<00:21, 49.52it/s]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
predicting...: 100%|██████████| 1066/1066 [00:14<00:00, 73.69it/s]


# Let's compute the Classification Report / Confusion Matrix

In [10]:
# Importing the 'classification_report' function from the 'sklearn.metrics' module
from sklearn.metrics import classification_report

# Extracting the true labels (ground truth) from the 'test' split of the dataset
y_true = data['test']["label"]

# Generating the classification report, comparing the true labels ('y_true') with the predicted labels ('predictions')
# 'target_names' defines the readable names for each class (negative and positive movie reviews)
report = classification_report(y_true, predictions,
                               target_names=["Negative Movie Review", "Positive Movie Review"])

# Printing the generated classification report which includes metrics like precision, recall, and F1-score for each class
print(report)


                       precision    recall  f1-score   support

Negative Movie Review       0.76      0.88      0.81       533
Positive Movie Review       0.86      0.72      0.78       533

             accuracy                           0.80      1066
            macro avg       0.81      0.80      0.80      1066
         weighted avg       0.81      0.80      0.80      1066

