In [None]:
# Install necessary libraries
# !pip install transformers pandas matplotlib

# Step 1: Load the CSV Dataset
import pandas as pd

try:
    # Load the dataset
    dataset = pd.read_csv("wine_reviews.csv")
    print("Dataset Loaded Successfully!")
    print(dataset.head())
except FileNotFoundError:
    print("Error: File 'wine_reviews.csv' not found. Ensure the file is in the correct directory.")
    exit()

# Step 2: Import HuggingFace Pipeline and Load the Zero-Shot Classification Model
from transformers import pipeline

try:
    # Define the zero-shot classification model
    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
    print("Model Loaded: facebook/bart-large-mnli")
except Exception as e:
    print("Error loading the zero-shot classification model:", e)
    print("Ensure you have an active internet connection and the transformers library installed.")
    exit()

# Step 3: Define Candidate Labels
candidate_labels = [
    "talks about food combinations",
    "talks about taste",
    "talks about value for money",
    "other"
]

# Step 4: Classify Reviews and Add Predicted Labels
def classify_review(review):
    """Classify the review text into one of the candidate labels."""
    try:
        # Ensure the review text is valid
        if pd.isna(review) or not isinstance(review, str):
            return "other"
        # Perform classification
        result = classifier(review, candidate_labels)
        return result['labels'][0]  # Return the label with the highest score
    except Exception as e:
        print(f"Error classifying review: {review}\n{e}")
        return "other"

# Add the predictions as a new column
try:
    dataset['talks_about'] = dataset['review'].apply(classify_review)
    print("Classification Completed!")
except Exception as e:
    print("Error applying classification to the dataset:", e)
    exit()

# Save the updated dataset (optional)
output_file = "classified_wine_reviews.csv"
try:
    dataset.to_csv(output_file, index=False)
    print(f"Classified dataset saved to {output_file}")
except Exception as e:
    print(f"Error saving the dataset to file: {e}")

# Step 5: Visualize the Spread of Categories
import matplotlib
matplotlib.use('TkAgg')  # Use 'Agg' for static environments or 'TkAgg' for interactive plotting
import matplotlib.pyplot as plt

try:
    # Count the occurrences of each category
    category_counts = dataset['talks_about'].value_counts()

    # Plot a bar chart
    plt.figure(figsize=(10, 6))
    category_counts.plot(kind='bar', color='skyblue', edgecolor='black')
    plt.title("Spread of Review Categories")
    plt.xlabel("Categories")
    plt.ylabel("Number of Reviews")
    plt.xticks(rotation=45)
    plt.tight_layout()

    # Save or display the plot
    plt.show()  # Show the chart interactively
    # Alternatively, save the chart as an image file
    # plt.savefig("category_distribution.png")
    # print("Chart saved as 'category_distribution.png'")
except Exception as e:
    print(f"Error visualizing the data: {e}")


Dataset Loaded Successfully!
                                              review
0  Easily the best Sauvignon Blanc I've had to da...
1                      Excellent, Mockingbird Inn?\n
2  Absolutely incredible. One of the best sauvign...
3                   Reported in nytimes. Did not try
4  Great. Much different than other Sauv Blancs s...


Device set to use cpu


Model Loaded: facebook/bart-large-mnli
Classification Completed!
Classified dataset saved to classified_wine_reviews.csv
