In [1]:
import pandas as pd
from keybert import KeyBERT # Pre-trained BERT-based model for keyword extraction. It finds important keywords/phrases from text using embeddings.

# ------------------------------
# 1. Load CSV File (replace with your file path)
# ------------------------------
df = pd.read_csv("television_reviews.csv")

# Find the review/comment column automatically
review_column = None
for col in df.columns:
    if "review" in col.lower() or "text" in col.lower() or "comment" in col.lower():
        review_column = col
        break

if review_column is None:
    raise ValueError("No column found with 'review', 'text', or 'comment' in the name!")

print(f"Using column: {review_column}")

# Combine all reviews into one large text
text = " ".join(df[review_column].dropna().astype(str))

# ------------------------------
# 2. Initialize KeyBERT
# ------------------------------
kw_model = KeyBERT('distilbert-base-nli-mean-tokens')

# ------------------------------
# 3. Extract top 10 keywords/phrases
# ------------------------------
keywords = kw_model.extract_keywords(
    text,
    keyphrase_ngram_range=(1, 2),  # unigrams and bigrams
    stop_words='english',
    top_n=10
)

# ------------------------------
# 4. Print results
# ------------------------------
print("\nExtracted Keywords from Reviews:")
for kw, score in keywords:
    print(f"- {kw}  (Relevance: {round(score, 3)})")


Using column: Comment

Extracted Keywords from Reviews:
- friendly netflix  (Relevance: 0.397)
- smoothly netflix  (Relevance: 0.381)
- sharp gaming  (Relevance: 0.37)
- friendly hdr  (Relevance: 0.362)
- worth netflix  (Relevance: 0.345)
- friendly gaming  (Relevance: 0.343)
- energy efficient  (Relevance: 0.341)
- sharp hdr  (Relevance: 0.339)
- sharp netflix  (Relevance: 0.335)
- netflix great  (Relevance: 0.335)
