# Installing Libraries

In [None]:
!pip install datasets transformers

In [None]:
# Importing the 'load_dataset' function from the 'datasets' library to load datasets
from datasets import load_dataset

# Loading the "rotten_tomatoes" dataset from Hugging Face's datasets library
data = load_dataset("rotten_tomatoes")

# Displaying the loaded dataset object to check its structure and content
data


In [None]:
data["train"][0]

In [None]:
data["train"][7000]

# Text Classification using Generative Models

In [None]:
# Importing the 'pipeline' function from the 'transformers' library to easily use pre-trained models
from transformers import pipeline

# Specifying the model ID for the pre-trained model "flan-t5-small" by Google
model_id = "google/flan-t5-small"

# Loading the text-to-text generation pipeline with the specified model
# The 'device="cuda:0"' ensures the model runs on the first GPU (if available)
pipe = pipeline(
    "text2text-generation",  # Task type for generating text from text input
    model=model_id,          # Model identifier
    device="cuda:0"          # Use GPU (CUDA) for faster inference, if available
)


# Lets extract Test data

In [None]:
test_data = data['test']

# Lets play with Flan-T5

In [None]:
# Defining a prompt to guide the model to generate text based on sentiment analysis
prompt = "What is sentiment of input sentence?"

# Extracting the first text data point from the test data for testing the model
sample_test_datapoint = test_data['text'][0]

# Printing the selected sample text to verify the content
print(sample_test_datapoint)

# Concatenating the prompt with the sample text to create the final input for the model
final_input = prompt + sample_test_datapoint

# Passing the final input through the model pipeline to get a sentiment prediction
prediction = pipe(final_input)

# Printing the model's prediction (sentiment analysis result)
print(prediction)


# A more perfect prompt

In [None]:
# Defining a prompt to ask the model if the sentiment of the input movie review is 1 (positive) or 0 (negative)
prompt = "Is the sentiment of input movie review 1 or 0?"

# Extracting the 13th text data point (index 12) from the test data for testing the model
sample_test_datapoint = test_data['text'][12]

# Printing the selected movie review text to verify the content
print(sample_test_datapoint)

# Concatenating the prompt with the movie review to create the final input for the model
final_input = prompt + sample_test_datapoint

# Passing the final input through the model pipeline to get a sentiment prediction (1 or 0)
prediction = pipe(final_input)

# Printing the model's prediction (the sentiment result, either 1 or 0)
print(prediction)


 # Lets do evaluation on entire test set

In [None]:
# Importing the 'tqdm' library to display a progress bar for loops
from tqdm import tqdm

# Defining a prompt to ask the model if the sentiment of the input movie review is positive or negative
prompt = "Is the sentiment of input movie review positive or negative?"

# Initializing an empty list to store the sentiment predictions (1 for positive, 0 for negative)
predictions = []

# Looping over each movie review text in the test dataset, with a progress bar displayed by tqdm
for each_test_datapoint in tqdm(test_data['text']):
  # Concatenating the prompt with each test movie review to create the final input for the model
  test_sentence = prompt + each_test_datapoint

  # Getting the sentiment prediction from the model (outputs the generated text, e.g., 'positive' or 'negative')
  prediction = pipe(test_sentence)[0].get('generated_text')

  # Checking the model's output and appending the corresponding numeric sentiment (1 for positive, 0 for negative)
  if prediction == "positive":
    predictions.append(1)
  elif prediction == "negative":
    predictions.append(0)
  else:
    # In case the model generates unexpected output, print an alert
    print('ALERT: ', prediction)


In [None]:
# Importing the 'classification_report' function from the 'sklearn.metrics' module
from sklearn.metrics import classification_report

# Defining the true labels (ground truth) from the 'test' set of the dataset
y_true = data['test']["label"]

# Generating a classification report comparing the true labels with the predicted labels
# 'target_names' are the names assigned to each class for better readability in the report
report = classification_report(y_true, predictions,
                               target_names=["Negative Movie Review", "Positive Movie Review"])

# Printing the generated classification report, which includes precision, recall, F1-score, and support for each class
print(report)
