In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import pipeline
from tqdm import tqdm

In [None]:
df = pd.read_csv('../data/raw/car_reviews.csv')
df.head()

In [None]:
df.info()

# Zero-shot Classification

Model Name: facebook/bart-large-mnli

Why?

    It's a popular zero-shot text classification model.

    Based on BART transformer trained on MNLI (Multi-Genre Natural Language Inference).

    High performance in zero-shot inference tasks with label definitions.

    Efficient for multi-label or single-label classification without needing retraining.

In [None]:
labels = [
    "talk about driving experience",
    "talks about features",
    "talks about value for money",
    "talks about issues",
    "other"
    ]

# Load the zero-shot classification pipeline
zero_shot_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Apply zero-shot classification
talks_about = []

for review in tqdm(df['Review'], desc="Classifying topics"):
    result = zero_shot_classifier(review, labels)
    top_label = result['labels'][0]
    talks_about.append(top_label)

df["talks_about"] = talks_about

# SENTIMENT ANALYSIS

In [None]:
sentiment_classifier = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english"
)

sentiments = []

print("Detecting sentiment...")
for review in tqdm(df["Review"], desc="Sentiment"):
    result = sentiment_classifier(review[:512])  # Truncate if needed
    sentiments.append(result[0]['label'].lower())

df["sentiment"] = sentiments