## Import Required Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import pipeline
from tqdm import tqdm

## Load Dataset

In [None]:
df = pd.read_csv('../data/raw/car_reviews.csv')
df.head()
df.info()

# Zero-shot Classification

Model Name: facebook/bart-large-mnli

Why?

    It's a popular zero-shot text classification model.

    Based on BART transformer trained on MNLI (Multi-Genre Natural Language Inference).

    High performance in zero-shot inference tasks with label definitions.

    Efficient for multi-label or single-label classification without needing retraining.

In [None]:
# Set up zero-shot classifier
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=0)  # use device=0 if you have GPU

# Define candidate labels
labels = [
    "talks about driving experience",
    "talks about features",
    "talks about value for money",
    "talks about issues",
    "other"
]

# Batch classification function
def classify_in_batches(texts, labels, batch_size=32):
    results = []
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        outputs = classifier(batch, labels)
        for output in outputs:
            results.append(output['labels'][0])  # take top predicted label
    return results

# Run classification on full dataset (all rows)
df['talks_about'] = classify_in_batches(df['Review'].tolist(), labels, batch_size=32)

# Preview result
print(df[['Review', 'talks_about']].head())

# Optionally save to CSV
df.to_csv("car_reviews_with_topics.csv", index=False)

# SENTIMENT ANALYSIS

In [None]:
sentiment_classifier = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english"
)

sentiments = []

print("Detecting sentiment...")
for review in tqdm(df["Review"], desc="Sentiment"):
    result = sentiment_classifier(review[:512])  # Truncate if needed
    sentiments.append(result[0]['label'].lower())

df["sentiment"] = sentiments