In [1]:
# Import necessary libraries
import pandas as pd
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
from evaluate import load

# Load car reviews dataset
file_path = "dodge.csv"
df = pd.read_csv(file_path, delimiter=";")

# Extract reviews and sentiment labels
reviews = df['Review'].tolist()
real_labels = df['Class'].tolist()

# Sentiment Classification
# Load sentiment analysis model (Source: Hugging Face Transformers)
classifier = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')

# Perform inference and display results
predicted_labels = classifier(reviews)
for review, prediction, label in zip(reviews, predicted_labels, real_labels):
    print(f"Review: {review}\nActual Sentiment: {label}\nPredicted Sentiment: {prediction['label']} (Confidence: {prediction['score']:.4f})\n")

# Calculate accuracy and F1 score (Source: Hugging Face Evaluate library)
accuracy = load("accuracy")
f1 = load("f1")
references = [1 if label == "POSITIVE" else 0 for label in real_labels]
predictions = [1 if label['label'] == "POSITIVE" else 0 for label in predicted_labels]
accuracy_result = accuracy.compute(references=references, predictions=predictions)['accuracy']
f1_result = f1.compute(references=references, predictions=predictions)['f1']
print(f"Accuracy: {accuracy_result}")
print(f"F1 result: {f1_result}")

# Translation
# Load translation model (Source: Hugging Face Transformers)
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-es")
first_review = reviews[0]
translated_review = translator(first_review, max_length=27)[0]['translation_text']
print(f"Model translation:\n{translated_review}")

# Load reference translations and calculate BLEU score (Source: Hugging Face Evaluate library)
with open("data/reference_translations.txt", 'r') as file:
    lines = file.readlines()
references = [line.strip() for line in lines]
bleu = load("bleu")
bleu_score = bleu.compute(predictions=[translated_review], references=[references])['bleu']
print(bleu_score)

# Extractive QA
# Load question answering model (Source: Hugging Face Transformers)
model_ckp = "deepset/minilm-uncased-squad2"
tokenizer = AutoTokenizer.from_pretrained(model_ckp)
model = AutoModelForQuestionAnswering.from_pretrained(model_ckp)

# Define context and question
context = reviews[1]
print(f"Context:\n{context}")
question = "What did he like about the brand?"
inputs = tokenizer(question, context, return_tensors="pt")

# Perform inference and extract answer
with torch.no_grad():
    outputs = model(**inputs)
start_idx = torch.argmax(outputs.start_logits)
end_idx = torch.argmax(outputs.end_logits) + 1
answer_span = inputs["input_ids"][0][start_idx:end_idx]
answer = tokenizer.decode(answer_span)
print("Answer: ", answer)

# Text Summarization
# Load summarization model (Source: Hugging Face Transformers)
text_to_summarize = reviews[-1]
print(f"Original text:\n{text_to_summarize}")
model_name = "cnicu/t5-small-booksum"
summarizer = pipeline("summarization", model=model_name)
outputs = summarizer(text_to_summarize, max_length=53)
summarized_text = outputs[0]['summary_text']
print(f"Summarized text:\n{summarized_text}")


  from .autonotebook import tqdm as notebook_tqdm


FileNotFoundError: [Errno 2] No such file or directory: 'data/dodge.csv'