In [7]:
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
from transformers import pipeline

df = pd.read_csv('cleaned_reviews.csv')

# Hugging Face's pre-trained DistilBERT model for sentiment classification
pipe = pipeline('text-classification', model='distilbert-base-uncased-finetuned-sst-2-english')

# Tokenize and truncate text data (Model input size - 512 tokens max)
data = df['CleanedText'].tolist()
truncated_data = [text[:512] for text in data]  

# Predictions from pre-trained model
predictions = pipe(truncated_data)

# Display predictions for the first 10 reviews
for text, prediction in zip(truncated_data[:10], predictions[:10]):
    print(f"Text: {text}\nPrediction: {prediction}\n")

# Convert predictions into numeric labels (1 for positive, 0 for negative)
labels_pred_classifier1 = [1 if prediction['label'].lower()[:3] == 'pos' else 0 for prediction in predictions]

# 1 for positive and 0 for negative
labels_true = [1 if sentiment == 'positive' else 0 for sentiment in df['Sentiment']]

# Classification report
report_classifier1 = classification_report(labels_true, labels_pred_classifier1)
print("Classification Report for DistilBERT:\n", report_classifier1)

# Confusion Matrix
cm = confusion_matrix(labels_true, labels_pred_classifier1)
print("Confusion Matrix:\n", cm)


Device set to use cpu


Text: Wow Loved this place
Prediction: {'label': 'POSITIVE', 'score': 0.9998728036880493}

Text: Crust is not good
Prediction: {'label': 'NEGATIVE', 'score': 0.9997629523277283}

Text: Not tasty and the texture was just nasty
Prediction: {'label': 'NEGATIVE', 'score': 0.9996601343154907}

Text: Stopped by during the late May bank holiday off Rick Steve recommendation and loved it
Prediction: {'label': 'POSITIVE', 'score': 0.9995071887969971}

Text: The selection on the menu was great and so were the prices
Prediction: {'label': 'POSITIVE', 'score': 0.9995456337928772}

Text: Now I am getting angry and I want my damn pho
Prediction: {'label': 'NEGATIVE', 'score': 0.9933871626853943}

Text: Honeslty it didnt taste THAT fresh
Prediction: {'label': 'NEGATIVE', 'score': 0.9942670464515686}

Text: The potatoes were like rubber and you could tell they had been made up ahead of time being kept under a warmer
Prediction: {'label': 'NEGATIVE', 'score': 0.8497899770736694}

Text: The fries were g