# Customer Review Analysis and Product Categorization
## Using NLP and LLMs

In [None]:
# Required Libraries
import pandas as pd
import numpy as np
from transformers import pipeline
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import torch
from sentence_transformers import SentenceTransformer
import matplotlib.pyplot as plt
import seaborn as sns


  from .autonotebook import tqdm as notebook_tqdm


## 1. Data Loading

In [2]:
# Required Functions to load data and get summary

def load_data(file_path):
    df = pd.read_csv(file_path)
    return df

def get_summary(df):
    print(df.head())
    print(df.info())
    print(df.describe())
    print(df.isnull().sum()) 


In [3]:
#defining the file path
file_path = '1429_1.csv'

In [4]:
#loading the data set
df = load_data(file_path)

  df = pd.read_csv(file_path)


In [5]:
#get_summary of the data set
get_summary(df)


                     id                                               name  \
0  AVqkIhwDv8e3D1O-lebb  All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...   
1  AVqkIhwDv8e3D1O-lebb  All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...   
2  AVqkIhwDv8e3D1O-lebb  All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...   
3  AVqkIhwDv8e3D1O-lebb  All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...   
4  AVqkIhwDv8e3D1O-lebb  All-New Fire HD 8 Tablet, 8 HD Display, Wi-Fi,...   

        asins   brand                                         categories  \
0  B01AHB9CN2  Amazon  Electronics,iPad & Tablets,All Tablets,Fire Ta...   
1  B01AHB9CN2  Amazon  Electronics,iPad & Tablets,All Tablets,Fire Ta...   
2  B01AHB9CN2  Amazon  Electronics,iPad & Tablets,All Tablets,Fire Ta...   
3  B01AHB9CN2  Amazon  Electronics,iPad & Tablets,All Tablets,Fire Ta...   
4  B01AHB9CN2  Amazon  Electronics,iPad & Tablets,All Tablets,Fire Ta...   

                                                keys manufacturer  \
0  84

## 1.1 Preprocessing

In [1]:
# Required Functions to clean data

def clean_data(df):
    # Drop rows with missing review title
    df = df.dropna(subset=['reviews.title'])
    # Fill missing review text with empty string
    df['reviews.text'] = df['reviews.text'].fillna('')
    return df

def get_clean_summary(df):
    print(df.head())
    print(df.info())
    print(df.describe())
    print(df.isnull().sum())

#cleaning the data set
df = clean_data(df)

#getting the summary of the cleaned data set
get_clean_summary(df)

# Required Functions to preprocess data

def preprocess_data(df):
    # Combine review title and review text
    df['text'] = df['reviews.title'] + ' ' + df['reviews.text']
    return df

def get_preprocess_summary(df):
    print(df.head())
    print(df.info())
    print(df.describe())
    print(df.isnull().sum())

#preprocessing the data set
df = preprocess_data(df)

#getting the summary of the preprocessed data set
get_preprocess_summary(df)

# Required Functions to extract features

def extract_features(df):
    # Load pre-trained BERT model
    model = SentenceTransformer('bert-base-nli-mean-tokens')
    # Encode text to get features
    features = model.encode(df['text'], show_progress_bar=True)
    return features

def get_features_summary(features):
    print(features.shape)
    print(features)

#extracting features from the preprocessed data set
features = extract_features(df)

#getting the summary of the extracted features
get_features_summary(features)

# Required Functions to cluster data

def cluster_data(features, n_clusters):
    # Cluster features
    kmeans = KMeans(n_clusters=n_clusters, random_state=0)
    clusters = kmeans.fit_predict(features)
    return clusters

def get_cluster_summary(clusters):
    print(clusters.shape)
    print(clusters)

#clustering the extracted features
n_clusters = 5
clusters = cluster_data(features, n_clusters)

#getting the summary of the clustered data
get_cluster_summary(clusters)

# Required Functions to evaluate clustering

def evaluate_clustering(features, clusters):
    # Evaluate clustering
    silhouette_avg = silhouette_score(features, clusters)
    return silhouette_avg

#evaluating the clustering
silhouette_avg = evaluate_clustering(features, clusters)
print(silhouette_avg)

# Required Functions to visualize clustering

def visualize_clustering(features, clusters):
    # Reduce dimensionality to 2D
    pca = PCA(n_components=2)
    features_2d = pca.fit_transform(features)
    # Visualize clustering
    plt.figure(figsize=(10, 10))
    sns.scatterplot(x=features_2d[:, 0], y=features_2d[:, 1], hue=clusters, palette='tab10')
    plt.title('Clustering')
    plt.show()

#visualizing the clustering
visualize_clustering(features, clusters)

# Required Functions to get cluster summaries

def get_cluster_summaries(df, clusters):
    # Add clusters to DataFrame
    df['cluster'] = clusters
    # Get cluster summaries
    cluster_summaries = df.groupby('cluster').agg({
        'reviews.rating': ['count', 'mean'],
        'reviews.text': 'count'
    }).reset_index()
    return cluster_summaries

def get_cluster_summaries_summary(cluster_summaries):
    print(cluster_summaries)

#getting the cluster summaries
cluster_summaries = get_cluster_summaries(df, clusters)

#getting the summary of the cluster summaries
get_cluster_summaries_summary(cluster_summaries)

# Required Functions to get cluster reviews

def get_cluster_reviews(df, cluster):
    # Get cluster reviews
    cluster_reviews = df[df['cluster'] == cluster]['text']
    return cluster_reviews

def get_cluster_reviews_summary(cluster_reviews):
    print(cluster_reviews)

#getting the cluster reviews
cluster = 0
cluster_reviews = get_cluster_reviews(df, cluster)

#getting the summary of the cluster reviews
get_cluster_reviews_summary(cluster_reviews)

# Required Functions to get cluster keywords

def get_cluster_keywords(df, clusters, n_keywords):
    # Get cluster keywords
    cluster_keywords = []
    for cluster in range(max(clusters) + 1):
        cluster_features = df[clusters == cluster]['text']
        # Load pre-trained BERT model
        model = SentenceTransformer('bert-base-nli-mean-tokens')
        # Encode text to get features
        features = model.encode(cluster_features, show_progress_bar=True)
        # Get keywords
        centroid = np.mean(features, axis=0)
        keywords = model.decode(centroid, top_k=n_keywords)
        cluster_keywords.append(keywords)
    return cluster_keywords

def get_cluster_keywords_summary(cluster_keywords):
    print(cluster_keywords)

#getting the cluster keywords
n_keywords = 5
cluster_keywords = get_cluster_keywords(df, clusters, n_keywords)

#getting the summary of the cluster keywords
get_cluster_keywords_summary(cluster_keywords)

# Required Functions to get cluster summaries with keywords

def get_cluster_summaries_with_keywords(cluster_summaries, cluster_keywords):
    # Add cluster keywords to cluster summaries
    cluster_summaries['keywords'] = cluster_keywords
    return cluster_summaries

def get_cluster_summaries_with_keywords_summary(cluster_summaries_with_keywords):
    print(cluster_summaries_with_keywords)

#getting the cluster summaries with keywords
cluster_summaries_with_keywords = get_cluster_summaries_with_keywords(cluster_summaries, cluster_keywords)

#getting the summary of the cluster summaries with keywords
get_cluster_summaries_with_keywords_summary(cluster_summaries_with_keywords)

# Required Functions to get cluster reviews with keywords

def get_cluster_reviews_with_keywords(df, clusters, cluster_keywords):
    # Add cluster keywords to DataFrame
    df['keywords'] = cluster_keywords[clusters]
    # Get cluster reviews with keywords
    cluster_reviews_with_keywords = df.groupby('cluster').apply(lambda x: x.sample(1))
    return cluster_reviews_with_keywords

def get_cluster_reviews_with_keywords_summary(cluster_reviews_with_keywords):
    print(cluster_reviews_with_keywords)

#getting the cluster reviews with keywords
cluster_reviews_with_keywords = get_cluster_reviews_with_keywords(df, clusters, cluster_keywords)

#getting the summary of the cluster reviews with keywords
get_cluster_reviews_with_keywords_summary(cluster_reviews_with_keywords)

# Required Functions to get cluster summaries with reviews and keywords

def get_cluster_summaries_with_reviews_and_keywords(cluster_summaries, cluster_reviews_with_keywords):
    # Add cluster reviews with keywords to cluster summaries
    cluster_summaries['review'] = cluster_reviews_with_keywords['text'].values
    return cluster_summaries

def get_cluster_summaries_with_reviews_and_keywords_summary(cluster_summaries_with_reviews_and_keywords):
    print(cluster_summaries_with_reviews_and_keywords)

#getting the cluster summaries with reviews and keywords
cluster_summaries_with_reviews_and_keywords = get_cluster_summaries_with_reviews_and_keywords(cluster_summaries, cluster_reviews_with_keywords)

#getting the summary of the cluster summaries with reviews and keywords
get_cluster_summaries_with_reviews_and_keywords_summary(cluster_summaries_with_reviews_and_keywords)

# Required Functions to get cluster summaries with reviews, ratings, and keywords

def get_cluster_summaries_with_reviews_ratings_and_keywords(cluster_summaries, cluster_reviews_with_keywords):
    # Add cluster reviews with keywords to cluster summaries
    cluster_summaries['review'] = cluster_reviews_with_keywords['text'].values
    cluster_summaries['rating'] = cluster_reviews_with_keywords['reviews.rating'].values
    return cluster_summaries

def get_cluster_summaries_with_reviews_ratings_and_keywords_summary(cluster_summaries_with_reviews_ratings_and_keywords):
    print(cluster_summaries_with_reviews_ratings_and_keywords)

#getting the cluster summaries with reviews, ratings, and keywords
cluster_summaries_with_reviews_ratings_and_keywords = get_cluster_summaries_with_reviews_ratings_and_keywords(cluster_summaries, cluster_reviews_with_keywords)

#getting the summary of the cluster summaries with reviews, ratings, and keywords
get_cluster_summaries_with_reviews_ratings_and_keywords_summary(cluster_summaries_with_reviews_ratings_and_keywords)

# Required Functions to get cluster summaries with reviews, ratings, keywords, and sentiment

def get_cluster_summaries_with_reviews_ratings_keywords_and_sentiment(cluster_summaries, cluster_reviews_with_keywords):
    # Add cluster reviews with keywords to cluster summaries
    cluster_summaries['review'] = cluster_reviews_with_keywords['text'].values
    cluster_summaries['rating'] = cluster_reviews_with_keywords['reviews.rating'].values
    # Get sentiment of cluster reviews
    sentiment_analyzer = pipeline('sentiment-analysis')
    cluster_summaries['sentiment'] = cluster_summaries['review'].apply(lambda x: sentiment_analyzer(x)[0]['label'])
    return cluster_summaries    

def get_cluster_summaries_with_reviews_ratings_keywords_and_sentiment_summary(cluster_summaries_with_reviews_ratings_keywords_and_sentiment):
    print(cluster_summaries_with_reviews_ratings_keywords_and_sentiment)

#getting the cluster summaries with reviews, ratings, keywords, and sentiment
cluster_summaries_with_reviews_ratings_keywords_and_sentiment = get_cluster_summaries_with_reviews_ratings_keywords_and_sentiment(cluster_summaries, cluster_reviews_with_keywords)

#getting the summary of the cluster summaries with reviews, ratings, keywords, and sentiment
get_cluster_summaries_with_reviews_ratings_keywords_and_sentiment_summary(cluster_summaries_with_reviews_ratings_keywords_and_sentiment)

# Required Functions to get cluster summaries with reviews, ratings, keywords, sentiment, and aspect

def get_cluster_summaries_with_reviews_ratings_keywords_sentiment_and_aspect(cluster_summaries, cluster_reviews_with_keywords):
    # Add cluster reviews with keywords to cluster summaries
    cluster_summaries['review'] = cluster_reviews_with_keywords['text'].values
    cluster_summaries['rating'] = cluster_reviews_with_keywords['reviews.rating'].values
    # Get sentiment of cluster reviews
    sentiment_analyzer = pipeline('sentiment-analysis')
    cluster_summaries['sentiment'] = cluster_summaries['review'].apply(lambda x: sentiment_analyzer(x)[0]['label'])
    # Get aspect of cluster reviews
    aspect_analyzer = pipeline('aspect-based-sentiment-analysis')
    cluster_summaries['aspect'] = cluster_summaries['review'].apply(lambda x: aspect_analyzer(x))
    return cluster_summaries

def get_cluster_summaries_with_reviews_ratings_keywords_sentiment_and_aspect_summary(cluster_summaries_with_reviews_ratings_keywords_sentiment_and_aspect):
    print(cluster_summaries_with_reviews_ratings_keywords_sentiment_and_aspect)

#getting the cluster summaries with reviews, ratings, keywords, sentiment, and aspect
cluster_summaries_with_reviews_ratings_keywords_sentiment_and_aspect = get_cluster_summaries_with_reviews_ratings_keywords_sentiment_and_aspect(cluster_summaries, cluster_reviews_with_keywords)

#getting the summary of the cluster summaries with reviews, ratings, keywords, sentiment, and aspect
get_cluster_summaries_with_reviews_ratings_keywords_sentiment_and_aspect_summary(cluster_summaries_with_reviews_ratings_keywords_sentiment_and_aspect)

# Required Functions to get cluster summaries with reviews, ratings, keywords, sentiment, aspect, and emotion

def get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_and_emotion(cluster_summaries, cluster_reviews_with_keywords):
    # Add cluster reviews with keywords to cluster summaries
    cluster_summaries['review'] = cluster_reviews_with_keywords['text'].values
    cluster_summaries['rating'] = cluster_reviews_with_keywords['reviews.rating'].values
    # Get sentiment of cluster reviews
    sentiment_analyzer = pipeline('sentiment-analysis')
    cluster_summaries['sentiment'] = cluster_summaries['review'].apply(lambda x: sentiment_analyzer(x)[0]['label'])
    # Get aspect of cluster reviews
    aspect_analyzer = pipeline('aspect-based-sentiment-analysis')
    cluster_summaries['aspect'] = cluster_summaries['review'].apply(lambda x: aspect_analyzer(x))
    # Get emotion of cluster reviews
    emotion_analyzer = pipeline('emotion')
    cluster_summaries['emotion'] = cluster_summaries['review'].apply(lambda x: emotion_analyzer(x))
    return cluster_summaries

def get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_and_emotion_summary(cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_and_emotion):
    print(cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_and_emotion)

#getting the cluster summaries with reviews, ratings, keywords, sentiment, aspect, and emotion
cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_and_emotion = get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_and_emotion(cluster_summaries, cluster_reviews_with_keywords)

#getting the summary of the cluster summaries with reviews, ratings, keywords, sentiment, aspect, and emotion
get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_and_emotion_summary(cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_and_emotion)

# Required Functions to get cluster summaries with reviews, ratings, keywords, sentiment, aspect, emotion, and entity

def get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_and_entity(cluster_summaries, cluster_reviews_with_keywords):
    # Add cluster reviews with keywords to cluster summaries
    cluster_summaries['review'] = cluster_reviews_with_keywords['text'].values
    cluster_summaries['rating'] = cluster_reviews_with_keywords['reviews.rating'].values
    # Get sentiment of cluster reviews
    sentiment_analyzer = pipeline('sentiment-analysis')
    cluster_summaries['sentiment'] = cluster_summaries['review'].apply(lambda x: sentiment_analyzer(x)[0]['label'])
    # Get aspect of cluster reviews
    aspect_analyzer = pipeline('aspect-based-sentiment-analysis')
    cluster_summaries['aspect'] = cluster_summaries['review'].apply(lambda x: aspect_analyzer(x))
    # Get emotion of cluster reviews
    emotion_analyzer = pipeline('emotion')
    cluster_summaries['emotion'] = cluster_summaries['review'].apply(lambda x: emotion_analyzer(x))
    # Get entity of cluster reviews
    entity_analyzer = pipeline('ner')
    cluster_summaries['entity'] = cluster_summaries['review'].apply(lambda x: entity_analyzer(x))
    return cluster_summaries

def get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_and_entity_summary(cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_and_entity):
    print(cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_and_entity)

#getting the cluster summaries with reviews, ratings, keywords, sentiment, aspect, emotion, and entity
cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_and_entity = get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_and_entity(cluster_summaries, cluster_reviews_with_keywords)

#getting the summary of the cluster summaries with reviews, ratings, keywords, sentiment, aspect, emotion, and entity
get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_and_entity_summary(cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_and_entity)

# Required Functions to get cluster summaries with reviews, ratings, keywords, sentiment, aspect, emotion, entity, and category

def get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_and_category(cluster_summaries, cluster_reviews_with_keywords):
    # Add cluster reviews with keywords to cluster summaries
    cluster_summaries['review'] = cluster_reviews_with_keywords['text'].values
    cluster_summaries['rating'] = cluster_reviews_with_keywords['reviews.rating'].values
    # Get sentiment of cluster reviews
    sentiment_analyzer = pipeline('sentiment-analysis')
    cluster_summaries['sentiment'] = cluster_summaries['review'].apply(lambda x: sentiment_analyzer(x)[0]['label'])
    # Get aspect of cluster reviews
    aspect_analyzer = pipeline('aspect-based-sentiment-analysis')
    cluster_summaries['aspect'] = cluster_summaries['review'].apply(lambda x: aspect_analyzer(x))
    # Get emotion of cluster reviews
    emotion_analyzer = pipeline('emotion')
    cluster_summaries['emotion'] = cluster_summaries['review'].apply(lambda x: emotion_analyzer(x))
    # Get entity of cluster reviews
    entity_analyzer = pipeline('ner')
    cluster_summaries['entity'] = cluster_summaries['review'].apply(lambda x: entity_analyzer(x))
    # Get category of cluster reviews
    category_analyzer = pipeline('zero-shot-classification')
    cluster_summaries['category'] = cluster_summaries['review'].apply(lambda x: category_analyzer(x, candidate_labels=['positive', 'negative']))
    return cluster_summaries

def get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_and_category_summary(cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_and_category):
    print(cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_and_category)

#getting the cluster summaries with reviews, ratings, keywords, sentiment, aspect, emotion, entity, and category
cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_and_category = get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_and_category(cluster_summaries, cluster_reviews_with_keywords)

#getting the summary of the cluster summaries with reviews, ratings, keywords, sentiment, aspect, emotion, entity, and category
get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_and_category_summary(cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_and_category)

# Required Functions to get cluster summaries with reviews, ratings, keywords, sentiment, aspect, emotion, entity, category, and emotion

def get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_category_and_emotion(cluster_summaries, cluster_reviews_with_keywords):
    # Add cluster reviews with keywords to cluster summaries
    cluster_summaries['review'] = cluster_reviews_with_keywords['text'].values
    cluster_summaries['rating'] = cluster_reviews_with_keywords['reviews.rating'].values
    # Get sentiment of cluster reviews
    sentiment_analyzer = pipeline('sentiment-analysis')
    cluster_summaries['sentiment'] = cluster_summaries['review'].apply(lambda x: sentiment_analyzer(x)[0]['label'])
    # Get aspect of cluster reviews
    aspect_analyzer = pipeline('aspect-based-sentiment-analysis')
    cluster_summaries['aspect'] = cluster_summaries['review'].apply(lambda x: aspect_analyzer(x))
    # Get emotion of cluster reviews
    emotion_analyzer = pipeline('emotion')
    cluster_summaries['emotion'] = cluster_summaries['review'].apply(lambda x: emotion_analyzer(x))
    # Get entity of cluster reviews
    entity_analyzer = pipeline('ner')
    cluster_summaries['entity'] = cluster_summaries['review'].apply(lambda x: entity_analyzer(x))
    # Get category of cluster reviews
    category_analyzer = pipeline('zero-shot-classification')
    cluster_summaries['category'] = cluster_summaries['review'].apply(lambda x: category_analyzer(x, candidate_labels=['positive', 'negative']))
    return cluster_summaries

def get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_category_and_emotion_summary(cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_category_and_emotion):
    print(cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_category_and_emotion)

#getting the cluster summaries with reviews, ratings, keywords, sentiment, aspect, emotion, entity, category, and emotion
cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_category_and_emotion = get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_category_and_emotion(cluster_summaries, cluster_reviews_with_keywords)

#getting the summary of the cluster summaries with reviews, ratings, keywords, sentiment, aspect, emotion, entity, category, and emotion
get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_category_and_emotion_summary(cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_category_and_emotion)

# Required Functions to get cluster summaries with reviews, ratings, keywords, sentiment, aspect, emotion, entity, category, emotion, and aspect

def get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_category_emotion_and_aspect(cluster_summaries, cluster_reviews_with_keywords):
    # Add cluster reviews with keywords to cluster summaries
    cluster_summaries['review'] = cluster_reviews_with_keywords['text'].values
    cluster_summaries['rating'] = cluster_reviews_with_keywords['reviews.rating'].values
    # Get sentiment of cluster reviews
    sentiment_analyzer = pipeline('sentiment-analysis')
    cluster_summaries['sentiment'] = cluster_summaries['review'].apply(lambda x: sentiment_analyzer(x)[0]['label'])
    # Get aspect of cluster reviews
    aspect_analyzer = pipeline('aspect-based-sentiment-analysis')
    cluster_summaries['aspect'] = cluster_summaries['review'].apply(lambda x: aspect_analyzer(x))
    # Get emotion of cluster reviews
    emotion_analyzer = pipeline('emotion')
    cluster_summaries['emotion'] = cluster_summaries['review'].apply(lambda x: emotion_analyzer(x))
    # Get entity of cluster reviews
    entity_analyzer = pipeline('ner')
    cluster_summaries['entity'] = cluster_summaries['review'].apply(lambda x: entity_analyzer(x))
    # Get category of cluster reviews
    category_analyzer = pipeline('zero-shot-classification')
    cluster_summaries['category'] = cluster_summaries['review'].apply(lambda x: category_analyzer(x, candidate_labels=['positive', 'negative']))
    return cluster_summaries

def get_cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_category_emotion_and_aspect_summary(cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_category_emotion_and_aspect):
    print(cluster_summaries_with_reviews_ratings_keywords_sentiment_aspect_emotion_entity_category_emotion_and_aspect)

    




NameError: name 'df' is not defined

## 2. Sentiment Analysis using LLM

In [None]:
def analyze_sentiment(texts):
    # Initialize sentiment analyzer
    classifier = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')
    
    results = []
    for text in texts:
        result = classifier(text)[0]
        # Convert binary sentiment to three-class
        score = result['score']
        if score > 0.7:
            sentiment = 'positive'
        elif score < 0.3:
            sentiment = 'negative'
        else:
            sentiment = 'neutral'
        results.append(sentiment)
    
    return results

## 3. Product Categorization using Clustering

In [None]:
def create_embeddings(texts):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = model.encode(texts)
    return embeddings

def cluster_products(embeddings, n_clusters=5):
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    clusters = kmeans.fit_predict(embeddings)
    return clusters

## 4. Evaluation and Visualization

In [None]:
def plot_sentiment_distribution(sentiments):
    plt.figure(figsize=(10, 6))
    sns.countplot(x=sentiments)
    plt.title('Sentiment Distribution')
    plt.show()

def evaluate_clusters(embeddings, clusters):
    score = silhouette_score(embeddings, clusters)
    print(f'Silhouette Score: {score:.3f}')