In [8]:
# # Import necessary libraries
# from transformers import pipeline
# import pandas as pd
# import evaluate

# # Load the dataset
# df = pd.read_csv('car_reviews.csv', delimiter=';')
# train = df['Review']

# # Sentiment Classification
# # Define model name
# model_name = "distilbert-base-uncased-finetuned-sst-2-english"

# # Load sentiment analysis pipeline
# classifier = pipeline("sentiment-analysis", model=model_name)

# # Classify sentiments
# predicted_labels = [classifier(x) for x in train]

# # Extract labels
# label = [x[0]['label'] for x in predicted_labels]

# # Convert labels to binary predictions
# predictions = list(map(lambda x: 1 if x == 'POSITIVE' else 0, label))

# # True labels
# true_labels = [1 if x == 'POSITIVE' else 0 for x in df['Class']]

# # Evaluate accuracy and F1 score
# accuracy = evaluate.load("accuracy")
# f1 = evaluate.load("f1")

# accuracy_result = accuracy.compute(references=true_labels, predictions=predictions)['accuracy']
# f1_result = f1.compute(references=true_labels, predictions=predictions)['f1']

# print(f"Accuracy: {accuracy_result}")
# print(f"F1 Score: {f1_result}")

# # Translation
# # Extract the first two sentences from the first review
# new = df['Review'][0].split('.')
# first_two = new[0:2]
# rev_1 = ".".join(first_two)

# # Load translation pipeline
# model_name = "Helsinki-NLP/opus-mt-en-es"
# translator = pipeline('translation', model=model_name, max_length=30)

# # Translate text
# translations = translator(rev_1)
# translated_review = translations[0]['translation_text']

# print(f"Translated Texts: {translated_review}")

# # Load reference translations
# with open('reference_translations.txt', 'r') as file:
#     references = file.read().split('\n')

# # Evaluate BLEU score
# bleu = evaluate.load("bleu")
# results = bleu.compute(predictions=[translated_review], references=[references])
# bleu_score = results['bleu']

# print(f"Bleu score: {bleu_score}")

# # Question Answering
# # Use the second review as context
# rev2_context = df['Review'][1]

# # Load question-answering pipeline
# model_name = "deepset/minilm-uncased-squad2"
# qa_model = pipeline('question-answering', model=model_name)

# # Define question and context
# question = "What did he like about the brand?"
# context = rev2_context

# # Get answer
# outputs = qa_model(question=question, context=context)
# answer = outputs['answer']

# print(answer)

# # Summarization
# # Summarize the last review
# rev_last = df['Review'][4]

# # Load summarization pipeline
# model_name = 't5-small'
# summarizer = pipeline("summarization", model=model_name)

# # Generate summary
# outputs = summarizer(rev_last, min_length=50, max_length=55)
# summarized_text = outputs[0]['summary_text']

# print(summarized_text)




Accuracy: 0.8
F1 Score: 0.8571428571428571




Translated Texts: Estoy muy satisfecho con mi Nissan NV SL 2014. Uso esta camioneta para mis entregas de negocios y uso personal
Bleu score: 0.7671176261207451


Some weights of the model checkpoint at deepset/minilm-uncased-squad2 were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


ride quality, reliability
the Nissan Rogue provides me with the desired SUV experience without burdening me with an exorbitant payment . the financial arrangement is quite reasonable; the handling and styling are great; I have hauled 12 bags of mulch in the back with the seats down


In [10]:
import pandas as pd
import torch

# Load the car reviews dataset
file_path = "car_reviews.csv"
df = pd.read_csv(file_path, delimiter=";")

# Put the car reviews and their associated sentiment labels in two lists
reviews = df['Review'].tolist()
real_labels = df['Class'].tolist()


# Instruction 1: sentiment classification

# Load a sentiment analysis LLM into a pipeline
from transformers import pipeline
classifier = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')

# Perform inference on the car reviews and display prediction results
predicted_labels = classifier(reviews)
for review, prediction, label in zip(reviews, predicted_labels, real_labels):
    print(f"Review: {review}\nActual Sentiment: {label}\nPredicted Sentiment: {prediction['label']} (Confidence: {prediction['score']:.4f})\n")

# Load accuracy and F1 score metrics    
import evaluate
accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")

# Map categorical sentiment labels into integer labels
references = [1 if label == "POSITIVE" else 0 for label in real_labels]
predictions = [1 if label['label'] == "POSITIVE" else 0 for label in predicted_labels]

# Calculate accuracy and F1 score
accuracy_result_dict = accuracy.compute(references=references, predictions=predictions)
accuracy_result = accuracy_result_dict['accuracy']
f1_result_dict = f1.compute(references=references, predictions=predictions)
f1_result = f1_result_dict['f1']
print(f"Accuracy: {accuracy_result}")
print(f"F1 result: {f1_result}")


# Instruction 2: Translation

# Load translation LLM into a pipeline and translate car review
first_review = reviews[0]
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-es")
translated_review = translator(first_review, max_length=27)[0]['translation_text']
print(f"Model translation:\n{translated_review}")

# Load reference translations from file
with open("reference_translations.txt", 'r') as file:
    lines = file.readlines()
references = [line.strip() for line in lines]
print(f"Spanish translation references:\n{references}")

# Load and calculate BLEU score metric
bleu = evaluate.load("bleu")
bleu_score = bleu.compute(predictions=[translated_review], references=[references])
print(bleu_score['bleu'])


# Instruction 3: extractive QA

# Import auto classes (optional: can be solved via pipelines too)
from transformers import AutoTokenizer
from transformers import AutoModelForQuestionAnswering

# Instantiate model and tokenizer
model_ckp = "deepset/minilm-uncased-squad2"
tokenizer = AutoTokenizer.from_pretrained(model_ckp)
model = AutoModelForQuestionAnswering.from_pretrained(model_ckp)

# Define context and question, and tokenize them
context = reviews[1]
print(f"Context:\n{context}")
question = "What did he like about the brand?"
inputs = tokenizer(question, context, return_tensors="pt")

# Perform inference and extract answer from raw outputs
with torch.no_grad():
  outputs = model(**inputs)
start_idx = torch.argmax(outputs.start_logits)
end_idx = torch.argmax(outputs.end_logits) + 1
answer_span = inputs["input_ids"][0][start_idx:end_idx]

# Decode and show answer
answer = tokenizer.decode(answer_span)
print("Answer: ", answer)


# Instruction 4

# Get original text to summarize upon car review
text_to_summarize = reviews[-1]
print(f"Original text:\n{text_to_summarize}")

# Load summarization pipeline and perform inference
model_name = "cnicu/t5-small-booksum"
summarizer = pipeline("summarization", model=model_name)
outputs = summarizer(text_to_summarize, max_length=53)
summarized_text = outputs[0]['summary_text']
print(f"Summarized text:\n{summarized_text}")




Review: I am very satisfied with my 2014 Nissan NV SL. I use this van for my business deliveries and personal use. Camping, road trips, etc. We dont have any children so I store most of the seats in my warehouse. I wanted the passenger van for the rear air conditioning. We drove our van from Florida to California for a Cross Country trip in 2014. We averaged about 18 mpg. We drove thru a lot of rain and It was a very comfortable and stable vehicle. The V8 Nissan Titan engine is a 500k mile engine. It has been tested many times by delivery and trucking companies. This is why Nissan gives you a 5 year or 100k mile bumper to bumper warranty. Many people are scared about driving this van because of its size. But with front and rear sonar sensors, large mirrors and the back up camera. It is easy to drive. The front and rear sensors also monitor the front and rear sides of the bumpers making it easier to park close to objects. Our Nissan NV is a Tow Monster. It pulls our 5000 pound travel tr

Your input_length: 365 is bigger than 0.9 * max_length: 27. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)


Model translation:
Estoy muy satisfecho con mi 2014 Nissan NV SL. Uso esta furgoneta para mis entregas de negocios y uso personal.
Spanish translation references:
['Estoy muy satisfecho con mi Nissan NV SL 2014. Utilizo esta camioneta para mis entregas comerciales y uso personal.', 'Estoy muy satisfecho con mi Nissan NV SL 2014. Uso esta furgoneta para mis entregas comerciales y uso personal.']
0.6022774485691839


Some weights of the model checkpoint at deepset/minilm-uncased-squad2 were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Context:
The car is fine. It's a bit loud and not very powerful. On one hand, compared to its peers, the interior is well-built. The transmission failed a few years ago, and the dealer replaced it under warranty with no issues. Now, about 60k miles later, the transmission is failing again. It sounds like a truck, and the issues are well-documented. The dealer tells me it is normal, refusing to do anything to resolve the issue. After owning the car for 4 years, there are many other vehicles I would purchase over this one. Initially, I really liked what the brand is about: ride quality, reliability, etc. But I will not purchase another one. Despite these concerns, I must say, the level of comfort in the car has always been satisfactory, but not worth the rest of issues found.
Answer:  ride quality, reliability
Original text:
I've been dreaming of owning an SUV for quite a while, but I've been driving cars that were already paid for during an extended period. I ultimately made the decisio

config.json:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


pytorch_model.bin:   0%|          | 0.00/242M [00:00<?, ?B/s]



pytorch_model.bin:  82%|########2 | 199M/242M [00:00<?, ?B/s]

ValueError: Could not load model cnicu/t5-small-booksum with any of the following classes: (<class 'transformers.models.auto.modeling_auto.AutoModelForSeq2SeqLM'>, <class 'transformers.models.auto.modeling_tf_auto.TFAutoModelForSeq2SeqLM'>, <class 'transformers.models.t5.modeling_t5.T5ForConditionalGeneration'>, <class 'transformers.models.t5.modeling_tf_t5.TFT5ForConditionalGeneration'>). See the original errors:

while loading with AutoModelForSeq2SeqLM, an error is thrown:
Traceback (most recent call last):
  File "d:\Anaconda\Lib\site-packages\transformers\pipelines\base.py", line 286, in infer_framework_load_model
    model = model_class.from_pretrained(model, **kwargs)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\transformers\models\auto\auto_factory.py", line 564, in from_pretrained
    return model_class.from_pretrained(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\transformers\modeling_utils.py", line 3557, in from_pretrained
    resolved_archive_file = cached_file(
                            ^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\transformers\utils\hub.py", line 402, in cached_file
    resolved_file = hf_hub_download(
                    ^^^^^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\huggingface_hub\utils\_deprecation.py", line 101, in inner_f
    return f(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\huggingface_hub\utils\_validators.py", line 114, in _inner_fn
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\huggingface_hub\file_download.py", line 1240, in hf_hub_download
    return _hf_hub_download_to_cache_dir(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\huggingface_hub\file_download.py", line 1389, in _hf_hub_download_to_cache_dir
    _download_to_tmp_and_move(
  File "d:\Anaconda\Lib\site-packages\huggingface_hub\file_download.py", line 1915, in _download_to_tmp_and_move
    http_get(
  File "d:\Anaconda\Lib\site-packages\huggingface_hub\file_download.py", line 552, in http_get
    temp_file.write(chunk)
OSError: [Errno 28] No space left on device

while loading with TFAutoModelForSeq2SeqLM, an error is thrown:
Traceback (most recent call last):
  File "d:\Anaconda\Lib\site-packages\transformers\pipelines\base.py", line 286, in infer_framework_load_model
    model = model_class.from_pretrained(model, **kwargs)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\transformers\models\auto\auto_factory.py", line 564, in from_pretrained
    return model_class.from_pretrained(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\transformers\modeling_tf_utils.py", line 2873, in from_pretrained
    raise EnvironmentError(
OSError: cnicu/t5-small-booksum does not appear to have a file named tf_model.h5 but there is a file for PyTorch weights. Use `from_pt=True` to load this model from those weights.

while loading with T5ForConditionalGeneration, an error is thrown:
Traceback (most recent call last):
  File "d:\Anaconda\Lib\site-packages\transformers\pipelines\base.py", line 286, in infer_framework_load_model
    model = model_class.from_pretrained(model, **kwargs)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\transformers\modeling_utils.py", line 3557, in from_pretrained
    resolved_archive_file = cached_file(
                            ^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\transformers\utils\hub.py", line 402, in cached_file
    resolved_file = hf_hub_download(
                    ^^^^^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\huggingface_hub\utils\_deprecation.py", line 101, in inner_f
    return f(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\huggingface_hub\utils\_validators.py", line 114, in _inner_fn
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\huggingface_hub\file_download.py", line 1240, in hf_hub_download
    return _hf_hub_download_to_cache_dir(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\huggingface_hub\file_download.py", line 1389, in _hf_hub_download_to_cache_dir
    _download_to_tmp_and_move(
  File "d:\Anaconda\Lib\site-packages\huggingface_hub\file_download.py", line 1915, in _download_to_tmp_and_move
    http_get(
  File "d:\Anaconda\Lib\site-packages\huggingface_hub\file_download.py", line 552, in http_get
    temp_file.write(chunk)
OSError: [Errno 28] No space left on device

while loading with TFT5ForConditionalGeneration, an error is thrown:
Traceback (most recent call last):
  File "d:\Anaconda\Lib\site-packages\transformers\pipelines\base.py", line 286, in infer_framework_load_model
    model = model_class.from_pretrained(model, **kwargs)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Anaconda\Lib\site-packages\transformers\modeling_tf_utils.py", line 2873, in from_pretrained
    raise EnvironmentError(
OSError: cnicu/t5-small-booksum does not appear to have a file named tf_model.h5 but there is a file for PyTorch weights. Use `from_pt=True` to load this model from those weights.


