Basic tutorial - https://colab.research.google.com/drive/1FieRA9fLdkQEGDIMYl0I3MCjSUKVF8C-?usp=sharing#scrollTo=S9qDqEHddgKq

Best practices - https://colab.research.google.com/drive/1BoQ_vakEVtojsd2x_U6-_x52OOuqruj2?usp=sharing

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import sys

folder_path = '/content/drive/MyDrive/Nova IMS/Text Mining/Project'
sys.path.append(folder_path)

In [1]:
import time
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
import emoji
import Preprocessing_topicM as preproc
import nltk
import warnings
warnings.filterwarnings("ignore")

nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('punkt_tab')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [11]:
reviews_raw = pd.read_csv(r"data_hyderabad\10k_reviews.csv")

In [12]:
reviews_data = reviews_raw[reviews_raw["Rating"].notna() & reviews_raw["Review"].notna()]
reviews_data.isna().sum()

Restaurant    0
Reviewer      0
Review        0
Rating        0
Metadata      0
Time          0
Pictures      0
dtype: int64

In [13]:
# split UPPERCASE WORDS
def splitting_words_process(word):
    # only upper case letters
    if word.isupper():
        return word

    # more than one upper case letter inside
    elif re.search(r'[A-Z][a-z]*[A-Z]', word):
        split_word = re.findall(r'[A-Z][a-z]*', word)
        return ' '.join(split_word)

    # <2 upper case letters
    else:
        return word

reviews_data['Review'] = reviews_data['Review'].apply(lambda x: ' '.join([splitting_words_process(word) for word in x.split()]))

In [14]:
# Function to replace 'gud', 'goo', 'gd' with the appropriate 'good'
def replace_gud_with_good(text):
    if isinstance(text, str):
        # Define the regex pattern to match 'gud', 'goo', 'gd' in various capitalizations
        pattern = re.compile(r'\b([Gg][Uu][Dd]|[Gg][Oo][Oo]|[Gg][Dd])\b')

        # Replacement function to check the case of the first letter
        def replacement(match):
            word = match.group()
            # Check if the first letter is uppercase, then return 'Good', else 'good'
            if word[0].isupper():
                return 'Good'
            else:
                return 'good'

        # Use re.sub to apply the replacement function
        return pattern.sub(replacement, text)

    return text

# Apply the function to the 'Review' column to replace the variants of 'good'
reviews_data['Review'] = reviews_data['Review'].apply(replace_gud_with_good)

In [15]:
# Function to replace 'kk', 'Oke', 'k', 'Ok' with 'ok'
def replace_to_ok(text):
    if isinstance(text, str):
        # Define the regex pattern to match the variants of 'ok'
        pattern = re.compile(r'\b(k|kk|Ok|Oke)\b', re.IGNORECASE)

        # Replacement function to return 'ok' for all matched words
        def replacement(match):
            return 'ok'

        # Use re.sub to apply the replacement function
        return pattern.sub(replacement, text)

    return text

# Apply the function to the 'Review' column to replace the variants of 'ok'
reviews_data['Review'] = reviews_data['Review'].apply(replace_to_ok)

In [16]:
# add space after ! | " | # | $ | % | & | ( | ) | * | + | , | . | : | ; followed immediately by a word
def add_space_after_punctuation(df):

    df['Review'] = df['Review'].apply(lambda text: re.sub(r'([\u0021-\u0026\u0028-\u002C\u002E\u003A-\u003F]+(?=\w))', r'\1 ', text) if isinstance(text, str) else text)
    return df

# Example usage:
reviews_data = add_space_after_punctuation(reviews_data)

In [17]:
# remove gibberish words like "ggggggggggd", "eshjdgue"
def remove_gibberish(text):
    cleaned_text = re.sub(r'\b\w{15,}\b', '', text)  # removes 15+ words
    cleaned_text = re.sub(r'\b\w*(\w)\1{2,}\w*\b', '', cleaned_text)  # removes words that contain 3+ repeating letters

    return cleaned_text

reviews_data['Review'] = reviews_data['Review'].apply(remove_gibberish)

In [18]:
def remove_space_before_punctuation(text):
    if isinstance(text, str):
        # Define the regex pattern to match spaces before punctuation
        pattern = re.compile(r'\s+([?.!,;:])')

        # Use re.sub to remove spaces before punctuation
        return pattern.sub(r'\1', text)

    return text

# Apply the function to the 'Review' column to remove spaces before punctuation
reviews_data['Review'] = reviews_data['Review'].apply(remove_space_before_punctuation)

In [19]:
reviews_preproc = reviews_data['Review'].apply(lambda x: preproc.main_pipeline(
    x,
    print_output=False,
    no_stopwords=False,
    custom_stopwords=[],
    convert_diacritics=True,
    no_punctuation=False,
    remove_contractions = True,
    lowercase=False,
    lemmatized=False,
    stemmed=False,
    tokenized_output=False
))

In [20]:
def remove_space_before_punctuation(text):
    if isinstance(text, str):
        # Define the regex pattern to match spaces before punctuation
        pattern = re.compile(r'\s+([?.!,;:])')

        # Use re.sub to remove spaces before punctuation
        return pattern.sub(r'\1', text)

    return text

# Apply the function to the 'Review' column to remove spaces before punctuation
reviews_preproc = reviews_preproc.apply(remove_space_before_punctuation)

In [21]:
reviews_preproc = reviews_preproc[reviews_preproc.apply(lambda x: len(x) >= 3 if isinstance(x, str) else False)]

In [22]:
len(reviews_preproc)

9910

In [23]:
print(reviews_preproc)

0       The ambience was good, food was quite good. ha...
1       Ambience is too good for a pleasant evening. S...
2       A must try.. great food great ambience. Thnx f...
3       Soumen das and Arun was a great guy. Only beca...
4       Food is good. we ordered Kodi drumsticks and b...
                              ...                        
9995    Madhumathi Mahajan Well to start with nice cou...
9996    This place has never disappointed us.. The foo...
9997    Bad rating is mainly because of "Chicken Bone ...
9998    I personally love and prefer Chinese Food. Had...
9999    Checked in here to try some delicious chinese ...
Name: Review, Length: 9910, dtype: object


In [24]:
from bertopic import BERTopic
from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, PartOfSpeech
from sklearn.feature_extraction.text import CountVectorizer
from bertopic.vectorizers import ClassTfidfTransformer
from umap import UMAP
from hdbscan import HDBSCAN

In [25]:
from sentence_transformers import SentenceTransformer

# Pre-calculate embeddings
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedding_model.encode(reviews_preproc.tolist(), show_progress_bar=True)

Batches:   0%|          | 0/310 [00:00<?, ?it/s]

In [69]:
vectorizer_model = CountVectorizer(stop_words="english", ngram_range=(1, 1))
ctfidf_model = ClassTfidfTransformer()
umap_model = UMAP(n_neighbors=15, n_components=5, min_dist=0.0, metric='cosine')
hdbscan_model = HDBSCAN(min_cluster_size=50,metric='euclidean', cluster_selection_method='eom', prediction_data=True)

In [70]:
model_base = BERTopic(
    language="english",
    calculate_probabilities=True,
    embedding_model= embedding_model,
    vectorizer_model=vectorizer_model,
    ctfidf_model=ctfidf_model,
    umap_model= umap_model,
    hdbscan_model=hdbscan_model,
    representation_model={"MMR": MaximalMarginalRelevance(diversity=0.3),
                          "KeyBert": KeyBERTInspired(),
                          "Pos": PartOfSpeech()},
    verbose = True
)
topics_base, probs_base = model_base.fit_transform(reviews_preproc,embeddings)

2024-12-15 21:45:02,068 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-15 21:45:20,810 - BERTopic - Dimensionality - Completed ✓
2024-12-15 21:45:20,811 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-15 21:45:21,614 - BERTopic - Cluster - Completed ✓
2024-12-15 21:45:21,619 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-15 21:45:25,080 - BERTopic - Representation - Completed ✓


In [71]:
model_base.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,MMR,KeyBert,Pos,Representative_Docs
0,-1,121,-1_nice_super_awesome_sonalin,"[nice, super, awesome, sonalin, voice, job, te...","[awesome, sonalin, voice, singer, priyadarshin...","[nice, awesome, great, cool, good, excellent, ...","[nice, super, awesome, voice, job, test, singe...","[nice, nice, Nice]"
1,0,8925,0_food_good_place_service,"[food, good, place, service, chicken, taste, o...","[food, chicken, ambience, biryani, restaurant,...","[restaurant, buffet, food, dishes, lunch, dinn...","[food, good, place, service, chicken, taste, a...",[Good place to dine with family and friends. T...
2,1,335,1_delivery_time_fast_good,"[delivery, time, fast, good, delivered, late, ...","[delivery, delivered, super, deliver, man, ser...","[delivery, deliver, delivered, timely, delay, ...","[delivery, time, fast, good, late, boy, quick,...","[Delivery is Good on Time, Good delivery time,..."
3,2,307,2_good_bhehave_verry_nice,"[good, bhehave, verry, nice, , , , , , ]","[good, bhehave, verry, nice, , , , , , , , , ,...","[good, nice, , , , , , , , ]","[good, nice, , , , , , , , ]","[Good, good, good]"
4,3,91,3_excellent_good_nice_awsome,"[excellent, good, nice, awsome, boy, great, th...","[excellent, awsome, thank, rider, amazing, guy...","[excellent, great, good, amazing, nice, perfec...","[excellent, good, nice, boy, great, rider, guy...","[excellent, excellent, excellent]"
5,4,73,4_service_good_superb_thank,"[service, good, superb, thank, nice, thanks, e...","[service, superb, sarvic, rehman, customer, pr...","[service, services, served, customer, best, ex...","[service, good, superb, nice, thanks, excellen...","[good service keep it up, very good service, v..."
6,5,58,5_momos_momo_kurkure_steamed,"[momos, momo, kurkure, steamed, chicken, fried...","[momos, kurkure, steamed, chicken, mayo, mayon...","[momos, momo, steamed, kurkure, tasty, kukure,...","[momos, momo, kurkure, steamed, chicken, fried...",[Big fan of kurkure chicken momos served here....


In [72]:
# Create a DataFrame with reviews and their top 3 assigned topics with probabilities
reviews_with_topics = pd.DataFrame({
    'Review': reviews_preproc,
    'Top_3_Topics': [np.argsort(probs_base[i])[-3:][::-1] for i in range(len(reviews_preproc))],
    'Top_3_Probabilities': [np.round(np.sort(probs_base[i])[-3:][::-1] * 100, 2) for i in range(len(reviews_preproc))]
})

# Map the topic IDs to their custom names from topic_aspects_["MMR"]
topic_info_MMR = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in model_base.topic_aspects_["MMR"].items()}
topic_info_KB = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in model_base.topic_aspects_["KeyBert"].items()}
topic_info_POS = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in model_base.topic_aspects_["Pos"].items()}
reviews_with_topics['Topics_MMR'] = reviews_with_topics['Top_3_Topics'].apply(lambda topics: [topic_info_MMR[topic] for topic in topics])
reviews_with_topics['Topics_KB'] = reviews_with_topics['Top_3_Topics'].apply(lambda topics: [topic_info_KB[topic] for topic in topics])
reviews_with_topics['Topics_POS'] = reviews_with_topics['Top_3_Topics'].apply(lambda topics: [topic_info_POS[topic] for topic in topics])

# Display the first few rows of the new DataFrame
reviews_with_topics


Unnamed: 0,Review,Top_3_Topics,Top_3_Probabilities,Topics_MMR,Topics_KB,Topics_POS
0,"The ambience was good, food was quite good. ha...","[0, 5, 1]","[79.72, 7.0, 4.7]","[food | chicken | ambience, momos | kurkure | ...","[restaurant | buffet | food, momos | momo | st...","[food | good | place, momos | momo | kurkure, ..."
1,Ambience is too good for a pleasant evening. S...,"[0, 5, 1]","[100.0, 0.0, 0.0]","[food | chicken | ambience, momos | kurkure | ...","[restaurant | buffet | food, momos | momo | st...","[food | good | place, momos | momo | kurkure, ..."
2,A must try.. great food great ambience. Thnx f...,"[0, 5, 1]","[89.3, 3.58, 2.5]","[food | chicken | ambience, momos | kurkure | ...","[restaurant | buffet | food, momos | momo | st...","[food | good | place, momos | momo | kurkure, ..."
3,Soumen das and Arun was a great guy. Only beca...,"[0, 5, 1]","[94.65, 1.89, 1.21]","[food | chicken | ambience, momos | kurkure | ...","[restaurant | buffet | food, momos | momo | st...","[food | good | place, momos | momo | kurkure, ..."
4,Food is good. we ordered Kodi drumsticks and b...,"[0, 5, 1]","[78.44, 8.28, 4.69]","[food | chicken | ambience, momos | kurkure | ...","[restaurant | buffet | food, momos | momo | st...","[food | good | place, momos | momo | kurkure, ..."
...,...,...,...,...,...,...
9995,Madhumathi Mahajan Well to start with nice cou...,"[0, 5, 1]","[97.41, 1.12, 0.51]","[food | chicken | ambience, momos | kurkure | ...","[restaurant | buffet | food, momos | momo | st...","[food | good | place, momos | momo | kurkure, ..."
9996,This place has never disappointed us.. The foo...,"[0, 5, 1]","[100.0, 0.0, 0.0]","[food | chicken | ambience, momos | kurkure | ...","[restaurant | buffet | food, momos | momo | st...","[food | good | place, momos | momo | kurkure, ..."
9997,"Bad rating is mainly because of ""Chicken Bone ...","[0, 5, 1]","[78.72, 10.44, 3.74]","[food | chicken | ambience, momos | kurkure | ...","[restaurant | buffet | food, momos | momo | st...","[food | good | place, momos | momo | kurkure, ..."
9998,I personally love and prefer Chinese Food. Had...,"[0, 5, 1]","[100.0, 0.0, 0.0]","[food | chicken | ambience, momos | kurkure | ...","[restaurant | buffet | food, momos | momo | st...","[food | good | place, momos | momo | kurkure, ..."


In [53]:
hierarchical_topics = model_base.hierarchical_topics(reviews_preproc)

100%|██████████| 52/52 [00:00<00:00, 257.71it/s]


In [54]:
model_base.visualize_hierarchy(hierarchical_topics=hierarchical_topics)

In [55]:
len(model_base.get_topics())

54

In [56]:
model_reduced = model_base.reduce_topics(reviews_preproc, nr_topics=10)

2024-12-15 21:38:16,524 - BERTopic - Topic reduction - Reducing number of topics
2024-12-15 21:38:20,638 - BERTopic - Topic reduction - Reduced number of topics from 54 to 10


In [57]:
topics_reduced = model_reduced.topics_
probs_reduced = model_reduced.probabilities_

In [58]:
# Create a DataFrame with reviews and their top 3 assigned topics with probabilities
reviews_with_topics_merged = pd.DataFrame({
    'Review': reviews_preproc,
    'Top_3_Topics': [np.argsort(probs_reduced[i])[-3:][::-1] for i in range(len(reviews_preproc))],
    'Top_3_Probabilities': [np.round(np.sort(probs_reduced[i])[-3:][::-1] * 100, 2) for i in range(len(reviews_preproc))]
})

# Map the topic IDs to their custom names from topic_aspects_["MMR"]
topic_info_MMR_merged = {topic: " | ".join(list(zip(*values))[0][:1]) for topic, values in model_reduced.topic_aspects_["MMR"].items()}
topic_info_KB_merged = {topic: " | ".join(list(zip(*values))[0][:1]) for topic, values in model_reduced.topic_aspects_["KeyBert"].items()}
topic_info_POS_merged = {topic: " | ".join(list(zip(*values))[0][:1]) for topic, values in model_reduced.topic_aspects_["Pos"].items()}
reviews_with_topics_merged['Topics_MMR'] = reviews_with_topics_merged['Top_3_Topics'].apply(lambda topics: [topic_info_MMR_merged[topic] for topic in topics])
reviews_with_topics_merged['Topics_KB'] = reviews_with_topics_merged['Top_3_Topics'].apply(lambda topics: [topic_info_KB_merged[topic] for topic in topics])
reviews_with_topics_merged['Topics_POS'] = reviews_with_topics_merged['Top_3_Topics'].apply(lambda topics: [topic_info_POS_merged[topic] for topic in topics])

# Display the first few rows of the new DataFrame
reviews_with_topics_merged

Unnamed: 0,Review,Top_3_Topics,Top_3_Probabilities,Topics_MMR,Topics_KB,Topics_POS
0,"The ambience was good, food was quite good. ha...","[0, 2, 4]","[58.83, 5.82, 2.5]","[food, chocolate, spicy]","[restaurant, icecream, spicy]","[food, cake, spicy]"
1,Ambience is too good for a pleasant evening. S...,"[0, 2, 4]","[77.88, 3.42, 1.36]","[food, chocolate, spicy]","[restaurant, icecream, spicy]","[food, cake, spicy]"
2,A must try.. great food great ambience. Thnx f...,"[0, 2, 4]","[59.79, 5.87, 2.14]","[food, chocolate, spicy]","[restaurant, icecream, spicy]","[food, cake, spicy]"
3,Soumen das and Arun was a great guy. Only beca...,"[0, 2, 7]","[81.65, 3.0, 1.4]","[food, chocolate, sizzlers]","[restaurant, icecream, sizzlers]","[food, cake, sizzlers]"
4,Food is good. we ordered Kodi drumsticks and b...,"[0, 2, 4]","[15.72, 1.45, 0.73]","[food, chocolate, spicy]","[restaurant, icecream, spicy]","[food, cake, spicy]"
...,...,...,...,...,...,...
9995,Madhumathi Mahajan Well to start with nice cou...,"[0, 2, 4]","[100.0, 0.0, 0.0]","[food, chocolate, spicy]","[restaurant, icecream, spicy]","[food, cake, spicy]"
9996,This place has never disappointed us.. The foo...,"[0, 2, 4]","[46.7, 2.23, 1.52]","[food, chocolate, spicy]","[restaurant, icecream, spicy]","[food, cake, spicy]"
9997,"Bad rating is mainly because of ""Chicken Bone ...","[0, 2, 4]","[20.43, 1.44, 1.09]","[food, chocolate, spicy]","[restaurant, icecream, spicy]","[food, cake, spicy]"
9998,I personally love and prefer Chinese Food. Had...,"[0, 2, 4]","[100.0, 0.0, 0.0]","[food, chocolate, spicy]","[restaurant, icecream, spicy]","[food, cake, spicy]"


In [59]:
from bertopic import BERTopic
from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, PartOfSpeech

# Set the random seed for reproducibility
random_seed = 42

# Initialize the UMAP model with a random seed
umap_model = UMAP(n_neighbors=15, n_components=5, min_dist=0.0, metric='cosine', random_state=random_seed)

# Initialize the HDBSCAN model with a random seed
hdbscan_model = HDBSCAN(min_cluster_size=30, metric='euclidean', cluster_selection_method='eom', prediction_data=True)

# Initialize the BERTopic model with the random seed
model_custom_merged = BERTopic(
    language="english",
    calculate_probabilities=True,
    embedding_model=embedding_model,
    vectorizer_model=vectorizer_model,
    ctfidf_model=ctfidf_model,
    umap_model=umap_model,
    hdbscan_model=hdbscan_model,
    representation_model={
        "MMR": MaximalMarginalRelevance(diversity=0.3),
        "KeyBert": KeyBERTInspired(),
        "Pos": PartOfSpeech()
    },
    verbose=True,
)

# Fit the model and transform the data
topics_custom, probs_custom = model_custom_merged.fit_transform(reviews_preproc, embeddings)

2024-12-15 21:38:44,946 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-15 21:39:01,401 - BERTopic - Dimensionality - Completed ✓
2024-12-15 21:39:01,403 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-15 21:39:02,998 - BERTopic - Cluster - Completed ✓
2024-12-15 21:39:03,004 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-15 21:39:24,301 - BERTopic - Representation - Completed ✓


In [61]:
hierarchical_topics = model_custom_merged.hierarchical_topics(reviews_preproc)

100%|██████████| 52/52 [00:00<00:00, 212.98it/s]


In [62]:
model_custom_merged.visualize_hierarchy(hierarchical_topics=hierarchical_topics)

In [None]:
model_custom_merged.merge_topics(reviews_preproc, topics_to_merge=[[41,42,2],[53,3],[29,2,7],[32,17,47],[34,37],[53,3],[4,10],[25,22,28,26]])

In [None]:
len(model_custom_merged.get_topic_info())

44

In [None]:
model_custom_merged.visualize_hierarchy(hierarchical_topics=hierarchical_topics)

In [None]:
model_custom_merged.merge_topics(reviews_preproc, topics_to_merge=[[25,11,4], [26,8],[18,16],[38,6],[0,41]])

In [None]:
len(model_custom_merged.get_topic_info())

38

In [None]:
model_custom_merged.visualize_hierarchy(hierarchical_topics=hierarchical_topics)

In [None]:
model_custom_merged.merge_topics(reviews_preproc, topics_to_merge=[[20,27],[7,26,16], [12,15],[24,36,31,27,32],[8,23]])

In [None]:
len(model_custom_merged.get_topic_info())

30

In [None]:
model_custom_merged.visualize_hierarchy(hierarchical_topics=hierarchical_topics)

In [None]:
model_custom_merged.merge_topics(reviews_preproc, topics_to_merge=[[3,2], [4,8],[12,14]])

In [None]:
model_custom_merged.visualize_hierarchy(hierarchical_topics=hierarchical_topics)

In [None]:
topics_custom = model_custom_merged.topics_
probs_custom = model_custom_merged.probabilities_

In [None]:
# Create a DataFrame with reviews and their top 3 assigned topics with probabilities
reviews_with_topics_custom = pd.DataFrame({
    'Review': reviews_preproc,
    'Top_3_Topics': [np.argsort(probs_custom[i])[-3:][::-1] for i in range(len(reviews_preproc))],
    'Top_3_Probabilities': [np.round(np.sort(probs_custom[i])[-3:][::-1] * 100, 2) for i in range(len(reviews_preproc))]
})

# Map the topic IDs to their custom names from topic_aspects_["MMR"]
topic_info_MMR_custom = {topic: " | ".join(list(zip(*values))[0][:1]) for topic, values in model_custom_merged.topic_aspects_["MMR"].items()}
topic_info_KB_custom = {topic: " | ".join(list(zip(*values))[0][:1]) for topic, values in model_custom_merged.topic_aspects_["KeyBert"].items()}
topic_info_POS_custom = {topic: " | ".join(list(zip(*values))[0][:1]) for topic, values in model_custom_merged.topic_aspects_["Pos"].items()}
reviews_with_topics_custom['Topics_MMR'] = reviews_with_topics_custom['Top_3_Topics'].apply(lambda topics: [topic_info_MMR_custom[topic] for topic in topics])
reviews_with_topics_custom['Topics_KB'] = reviews_with_topics_custom['Top_3_Topics'].apply(lambda topics: [topic_info_KB_custom[topic] for topic in topics])
#reviews_with_topics_custom['Topics_POS'] = reviews_with_topics_custom['Top_3_Topics'].apply(lambda topics: [topic_info_POS_custom[topic] for topic in topics])

# Display the first few rows of the new DataFrame
reviews_with_topics_custom

Unnamed: 0,Review,Top_3_Topics,Top_3_Probabilities,Topics_MMR,Topics_KB
0,"The ambience was good, food was quite good. ha...","[3, 0, 9]","[12.87, 11.6, 5.24]","[place, service, momos]","[hangout, service, momos]"
1,Ambience is too good for a pleasant evening. S...,"[3, 0, 9]","[50.38, 5.48, 3.94]","[place, service, momos]","[hangout, service, momos]"
2,A must try.. great food great ambience. Thnx f...,"[3, 0, 9]","[20.3, 6.49, 5.25]","[place, service, momos]","[hangout, service, momos]"
3,Soumen das and Arun was a great guy. Only beca...,"[0, 3, 9]","[29.91, 7.12, 4.5]","[service, place, momos]","[service, hangout, momos]"
4,Food is good. we ordered Kodi drumsticks and b...,"[0, 3, 5]","[6.73, 6.39, 6.16]","[service, place, buffet]","[service, hangout, buffet]"
...,...,...,...,...,...
9995,Madhumathi Mahajan Well to start with nice cou...,"[5, 6, 9]","[29.27, 7.66, 5.46]","[buffet, paneer, momos]","[buffet, punjabi, momos]"
9996,This place has never disappointed us.. The foo...,"[8, 6, 9]","[50.48, 3.95, 3.11]","[chinese, paneer, momos]","[chinese, punjabi, momos]"
9997,"Bad rating is mainly because of ""Chicken Bone ...","[8, 6, 9]","[6.12, 3.17, 2.46]","[chinese, paneer, momos]","[chinese, punjabi, momos]"
9998,I personally love and prefer Chinese Food. Had...,"[8, 6, 9]","[68.19, 2.97, 2.34]","[chinese, paneer, momos]","[chinese, punjabi, momos]"


In [None]:
model_custom_merged.visualize_hierarchy(hierarchical_topics=hierarchical_topics)

In [None]:
model_custom_merged.merge_topics(reviews_preproc, topics_to_merge=[[16,23,21,25], [4,3]])

In [None]:
len(model_custom_merged.get_topic_info())

23

In [None]:
model_custom_merged.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,MMR,KeyBert,Pos,Representative_Docs
0,-1,4328,-1_food_good_place_service,"[food, good, place, service, chicken, ambience...","[food, service, chicken, ambience, experience,...","[restaurant, buffet, dishes, food, lunch, dinn...","[food, good, place, service, chicken, ambience...",[Visited the place on sunday and the place was...
1,0,947,0_place_ambience_food_hyderabad,"[place, ambience, food, hyderabad, good, music...","[place, ambience, hyderabad, dj, gachibowli, v...","[hyderabad, hyderabadi, restaurant, place, pub...","[place, ambience, food, hyderabad, good, music...",[One of my favourite place in Hyderabad Good ...
2,1,823,1_service_food_staff_manager,"[service, food, staff, manager, experience, go...","[service, staff, manager, experience, table, r...","[service, served, customer, customers, serving...","[service, food, staff, manager, experience, go...","[good service, service is good, As the name su..."
3,2,654,2_delivery_good_nice_fast,"[delivery, good, nice, fast, order, time, boy,...","[delivery, delivered, received, deliver, man, ...","[delivery, deliver, delivered, excellent, good...","[delivery, good, nice, fast, order, time, boy,...","[Good delivery, good delivery, good delivery]"
4,3,601,3_biryani_chicken_taste_ordered,"[biryani, chicken, taste, ordered, good, mutto...","[biryani, chicken, mutton, biriyani, rice, res...","[biryani, biryanis, biriyani, briyani, mutton,...","[biryani, chicken, taste, good, mutton, biriya...",[Paradise Biryani is really good service also ...
5,4,299,4_buffet_chicken_veg_fish,"[buffet, chicken, veg, fish, starters, good, m...","[buffet, chicken, lunch, food, desserts, panee...","[buffet, restaurant, dishes, lunch, dish, dinn...","[buffet, chicken, veg, fish, starters, good, m...",[A go-to place in Gachibowli (especially for T...
6,5,275,5_paneer_punjabi_paratha_indian,"[paneer, punjabi, paratha, indian, north, para...","[paneer, punjabi, parathas, food, masala, chut...","[punjabi, paneer, meal, punjab, restaurant, pa...","[paneer, punjabi, paratha, indian, north, para...",[Awesome North Indian punjabi food served here...
7,6,260,6_quantity_taste_quality_food,"[quantity, taste, quality, food, bad, good, wa...","[quantity, taste, quality, food, bad, poor, sa...","[quantity, quality, portions, food, value, tas...","[quantity, taste, quality, food, bad, good, pr...","[quantity less but quality good, Food is not t..."
8,7,239,7_chinese_noodles_rice_fried,"[chinese, noodles, rice, fried, manchurian, fo...","[chinese, noodles, rice, manchurian, chicken, ...","[cuisine, noodles, rice, restaurant, dishes, c...","[chinese, noodles, rice, fried, manchurian, fo...",[As soon you enter the restaurant you get an a...
9,8,217,8_momos_haleem_wrap_coffee,"[momos, haleem, wrap, coffee, sizzlers, cafe, ...","[momos, sizzlers, cafe, kurkure, bikes, wraps,...","[momos, momo, chicken, steamed, rice, tasty, m...","[momos, haleem, wrap, coffee, sizzlers, cafe, ...","[Want to enjoy best steamed momos, this is a m..."


In [None]:
topics_custom = model_custom_merged.topics_
probs_custom = model_custom_merged.probabilities_

In [None]:
# Create a DataFrame with reviews and their top 3 assigned topics with probabilities
reviews_with_topics_custom = pd.DataFrame({
    'Review': reviews_preproc,
    'Top_3_Topics': [np.argsort(probs_custom[i])[-3:][::-1] for i in range(len(reviews_preproc))],
    'Top_3_Probabilities': [np.round(np.sort(probs_custom[i])[-3:][::-1] * 100, 2) for i in range(len(reviews_preproc))]
})

# Map the topic IDs to their custom names from topic_aspects_["MMR"]
topic_info_MMR_custom = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in model_custom_merged.topic_aspects_["MMR"].items()}
topic_info_KB_custom = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in model_custom_merged.topic_aspects_["KeyBert"].items()}
topic_info_POS_custom = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in model_custom_merged.topic_aspects_["Pos"].items()}
reviews_with_topics_custom['Topics_MMR'] = reviews_with_topics_custom['Top_3_Topics'].apply(lambda topics: [topic_info_MMR_custom[topic] for topic in topics])
reviews_with_topics_custom['Topics_KB'] = reviews_with_topics_custom['Top_3_Topics'].apply(lambda topics: [topic_info_KB_custom[topic] for topic in topics])
#reviews_with_topics_custom['Topics_POS'] = reviews_with_topics_custom['Top_3_Topics'].apply(lambda topics: [topic_info_POS_custom[topic] for topic in topics])

# Display the first few rows of the new DataFrame
reviews_with_topics_custom

Unnamed: 0,Review,Top_3_Topics,Top_3_Probabilities,Topics_MMR,Topics_KB
0,"The ambience was good, food was quite good. ha...","[0, 1, 8]","[14.87, 11.6, 5.24]","[place | ambience | hyderabad, service | staff...","[hyderabad | hyderabadi | restaurant, service ..."
1,Ambience is too good for a pleasant evening. S...,"[0, 1, 8]","[51.96, 5.48, 3.94]","[place | ambience | hyderabad, service | staff...","[hyderabad | hyderabadi | restaurant, service ..."
2,A must try.. great food great ambience. Thnx f...,"[0, 1, 8]","[22.3, 6.49, 5.25]","[place | ambience | hyderabad, service | staff...","[hyderabad | hyderabadi | restaurant, service ..."
3,Soumen das and Arun was a great guy. Only beca...,"[1, 0, 8]","[29.91, 8.8, 4.5]","[service | staff | manager, place | ambience |...","[service | served | customer, hyderabad | hyde..."
4,Food is good. we ordered Kodi drumsticks and b...,"[0, 1, 4]","[8.96, 6.73, 6.16]","[place | ambience | hyderabad, service | staff...","[hyderabad | hyderabadi | restaurant, service ..."
...,...,...,...,...,...
9995,Madhumathi Mahajan Well to start with nice cou...,"[4, 0, 5]","[29.27, 8.22, 7.66]","[buffet | chicken | lunch, place | ambience | ...","[buffet | restaurant | dishes, hyderabad | hyd..."
9996,This place has never disappointed us.. The foo...,"[7, 5, 0]","[50.48, 3.95, 3.81]","[chinese | noodles | rice, paneer | punjabi | ...","[cuisine | noodles | rice, punjabi | paneer | ..."
9997,"Bad rating is mainly because of ""Chicken Bone ...","[7, 5, 0]","[6.12, 3.17, 2.68]","[chinese | noodles | rice, paneer | punjabi | ...","[cuisine | noodles | rice, punjabi | paneer | ..."
9998,I personally love and prefer Chinese Food. Had...,"[7, 5, 0]","[68.19, 2.97, 2.89]","[chinese | noodles | rice, paneer | punjabi | ...","[cuisine | noodles | rice, punjabi | paneer | ..."


In [None]:
model_custom_merged.merge_topics(reviews_preproc, topics_to_merge=[[9,10,18], [16,20]])

In [None]:
model_custom_merged.visualize_hierarchy(hierarchical_topics=hierarchical_topics)

In [None]:
topics_custom = model_custom_merged.topics_
probs_custom = model_custom_merged.probabilities_

In [233]:
# Create a DataFrame with reviews and their top 3 assigned topics with probabilities
reviews_with_topics_custom = pd.DataFrame({
    'Review': reviews_preproc,
    'Top_3_Topics': [np.argsort(probs_custom[i])[-3:][::-1] for i in range(len(reviews_preproc))],
    'Top_3_Probabilities': [np.round(np.sort(probs_custom[i])[-3:][::-1] * 100, 2) for i in range(len(reviews_preproc))]
})

# Map the topic IDs to their custom names from topic_aspects_["MMR"]
topic_info_MMR_custom = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in model_custom_merged.topic_aspects_["MMR"].items()}
topic_info_KB_custom = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in model_custom_merged.topic_aspects_["KeyBert"].items()}
topic_info_POS_custom = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in model_custom_merged.topic_aspects_["Pos"].items()}
reviews_with_topics_custom['Topics_MMR'] = reviews_with_topics_custom['Top_3_Topics'].apply(lambda topics: [topic_info_MMR_custom[topic] for topic in topics])
reviews_with_topics_custom['Topics_KB'] = reviews_with_topics_custom['Top_3_Topics'].apply(lambda topics: [topic_info_KB_custom[topic] for topic in topics])
#reviews_with_topics_custom['Topics_POS'] = reviews_with_topics_custom['Top_3_Topics'].apply(lambda topics: [topic_info_POS_custom[topic] for topic in topics])

# Display the first few rows of the new DataFrame
reviews_with_topics_custom

Unnamed: 0,Review,Top_3_Topics,Top_3_Probabilities,Topics_MMR,Topics_KB
0,"The ambience was good, food was quite good. ha...","[0, 1, 2]","[34.63, 14.87, 11.6]","[chicken | biryani | food, place | ambience | ...","[buffet | restaurant | tasty, restaurant | pla..."
1,Ambience is too good for a pleasant evening. S...,"[1, 0, 2]","[51.96, 24.97, 5.48]","[place | ambience | hyderabad, chicken | birya...","[restaurant | place | hangout, buffet | restau..."
2,A must try.. great food great ambience. Thnx f...,"[0, 1, 2]","[32.72, 22.3, 6.49]","[chicken | biryani | food, place | ambience | ...","[buffet | restaurant | tasty, restaurant | pla..."
3,Soumen das and Arun was a great guy. Only beca...,"[2, 0, 1]","[29.91, 27.3, 8.8]","[service | staff | experience, chicken | birya...","[service | served | customer, buffet | restaur..."
4,Food is good. we ordered Kodi drumsticks and b...,"[0, 1, 2]","[43.51, 8.96, 6.73]","[chicken | biryani | food, place | ambience | ...","[buffet | restaurant | tasty, restaurant | pla..."
...,...,...,...,...,...
9995,Madhumathi Mahajan Well to start with nice cou...,"[0, 1, 2]","[75.76, 8.22, 5.39]","[chicken | biryani | food, place | ambience | ...","[buffet | restaurant | tasty, restaurant | pla..."
9996,This place has never disappointed us.. The foo...,"[0, 1, 3]","[73.95, 3.81, 2.52]","[chicken | biryani | food, place | ambience | ...","[buffet | restaurant | tasty, restaurant | pla..."
9997,"Bad rating is mainly because of ""Chicken Bone ...","[0, 1, 3]","[24.57, 2.68, 1.98]","[chicken | biryani | food, place | ambience | ...","[buffet | restaurant | tasty, restaurant | pla..."
9998,I personally love and prefer Chinese Food. Had...,"[0, 1, 3]","[85.72, 2.89, 1.9]","[chicken | biryani | food, place | ambience | ...","[buffet | restaurant | tasty, restaurant | pla..."


In [None]:
model_custom_merged.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,MMR,KeyBert,Pos,Representative_Docs
0,-1,4328,-1_food_good_place_service,"[food, good, place, service, chicken, ambience...","[food, service, chicken, ambience, experience,...","[restaurant, buffet, dishes, food, lunch, dinn...","[food, good, place, service, chicken, ambience...",[The food was awesome and as was the ambience....
1,0,947,0_place_food_good_ambience,"[place, food, good, ambience, hyderabad, music...","[place, ambience, hyderabad, dj, gachibowli, v...","[hyderabad, hyderabadi, restaurant, place, pub...","[place, food, good, ambience, hyderabad, music...",[One of my favourite place in Hyderabad Good ...
2,1,823,1_service_food_staff_manager,"[service, food, staff, manager, good, experien...","[service, staff, manager, experience, table, r...","[service, served, customer, customers, serving...","[service, food, staff, manager, good, experien...","[service is good, Good service, As the name su..."
3,2,654,2_delivery_good_nice_fast,"[delivery, good, nice, fast, order, time, boy,...","[delivery, delivered, received, deliver, man, ...","[delivery, deliver, delivered, excellent, good...","[delivery, good, nice, fast, order, time, boy,...","[good delivery, good delivery, Good delivery]"
4,3,601,3_biryani_chicken_taste_ordered,"[biryani, chicken, taste, ordered, good, mutto...","[biryani, chicken, mutton, biriyani, rice, res...","[biryani, biryanis, biriyani, briyani, mutton,...","[biryani, chicken, taste, good, mutton, biriya...",[Paradise Biryani is really good service also ...
5,4,465,4_cake_cream_chocolate_ice,"[cake, cream, chocolate, ice, donuts, shake, c...","[chocolate, donuts, shake, cakes, brownies, co...","[flavors, chocolate, flavours, bakery, donuts,...","[cake, cream, chocolate, ice, donuts, cakes, p...",[This is a small ice cream parlour. It has all...
6,5,299,5_buffet_chicken_veg_good,"[buffet, chicken, veg, good, fish, starters, m...","[buffet, chicken, food, lunch, soup, paneer, t...","[buffet, restaurant, dishes, lunch, dish, dinn...","[buffet, chicken, veg, good, fish, starters, m...",[Recently we were invited for a food tasting s...
7,6,275,6_paneer_punjabi_paratha_indian,"[paneer, punjabi, paratha, indian, north, para...","[paneer, punjabi, paratha, food, masala, chutn...","[punjabi, punjab, paneer, restaurant, meal, hy...","[paneer, punjabi, paratha, indian, north, para...",[Awesome North Indian punjabi food served here...
8,7,260,7_quantity_taste_quality_food,"[quantity, taste, quality, food, bad, good, wa...","[quantity, taste, quality, food, bad, poor, sa...","[quantity, quality, portions, food, taste, val...","[quantity, taste, quality, food, bad, good, pr...","[quantity less but quality good, very less qua..."
9,8,239,8_chinese_noodles_rice_fried,"[chinese, noodles, rice, fried, manchurian, fo...","[chinese, noodles, rice, manchurian, ordered, ...","[cuisine, noodles, rice, restaurant, dishes, c...","[chinese, noodles, rice, fried, manchurian, fo...",[As soon you enter the restaurant you get an a...


## **END  - FINAL VERSION ABOVE**

In [None]:
reviews_with_topics_custom.to_csv('reviews_with_topics_custom.csv', index=False)

In [None]:
model_custom_merged.visualize_hierarchy(hierarchical_topics=hierarchical_topics)

In [None]:
model_custom_merged.merge_topics(reviews_preproc, topics_to_merge=[[17,18,14,10,16],[6,8,3,5,13,15,4,9]])

In [None]:
model_custom_merged.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,MMR,KeyBert,Pos,Representative_Docs
0,-1,4328,-1_food_good_place_service,"[food, good, place, service, chicken, ambience...","[food, service, chicken, ambience, visit, veg,...","[restaurant, buffet, lunch, food, dishes, visi...","[food, good, place, service, chicken, ambience...",[Experience at Arena E Since it is in SLN Term...
1,0,2276,0_biryani_good_chicken_place,"[biryani, good, chicken, place, food, taste, o...","[biryani, chicken, food, rice, paneer, buffet,...","[buffet, restaurant, dishes, dinner, lunch, fo...","[biryani, good, chicken, place, food, taste, v...",[Around of us went to this place to have lunch...
2,1,947,1_place_good_food_ambience,"[place, good, food, ambience, hyderabad, servi...","[place, ambience, hyderabad, visit, dj, gachib...","[hangout, restaurant, place, pub, city, places...","[place, good, food, ambience, hyderabad, servi...",[Had been to this place with friends on Saturd...
3,2,823,2_service_food_good_staff,"[service, food, good, staff, place, experience...","[service, staff, experience, manager, table, r...","[service, served, customer, excellent, quality...","[service, food, good, staff, place, experience...","[good service, service is good, good service]"
4,3,654,3_delivery_good_nice_fast,"[delivery, good, nice, fast, order, time, boy,...","[delivery, delivered, received, deliver, man, ...","[delivery, deliver, delivered, excellent, good...","[delivery, good, nice, fast, order, time, boy,...","[good delivery, good delivery, good delivery]"
5,4,391,4_chicken_shawarma_burger_mandi,"[chicken, shawarma, burger, mandi, wings, magg...","[chicken, shawarma, mandi, wings, ordered, bur...","[chicken, wings, bbq, restaurant, gachibowli, ...","[chicken, shawarma, burger, mandi, wings, magg...",[American Wild Wings is a very affordable plac...
6,5,260,5_quantity_taste_quality_food,"[quantity, taste, quality, food, good, bad, pr...","[quantity, taste, quality, food, bad, poor, sa...","[quantity, quality, portions, value, food, tas...","[quantity, taste, quality, food, good, bad, pr...","[Taste is good, but quantity is very less for ..."
7,6,120,6_spicy_super_good_test,"[spicy, super, good, test, bad, __, expected, ...","[spicy, test, bad, uncooked, stomach, oily, ta...","[spicy, spices, chilli, tasty, taste, tasteles...","[spicy, super, good, test, bad, uncooked, food...","[good but not spicy, not so good and it is not..."
8,7,111,7_zomato_gold_order_restaurant,"[zomato, gold, order, restaurant, food, servic...","[zomato, gold, order, restaurant, service, del...","[zomato, zomoto, gold, restaurant, customers, ...","[zomato, gold, order, restaurant, food, servic...",[Place was fine. And the food was okay. But i ...


In [None]:
topics_custom = model_custom_merged.topics_
probs_custom = model_custom_merged.probabilities_

In [None]:
# Create a DataFrame with reviews and their top 3 assigned topics with probabilities
reviews_with_topics_custom = pd.DataFrame({
    'Review': reviews_preproc,
    'Top_3_Topics': [np.argsort(probs_custom[i])[-3:][::-1] for i in range(len(reviews_preproc))],
    'Top_3_Probabilities': [np.round(np.sort(probs_custom[i])[-2:][::-1] * 100, 2) for i in range(len(reviews_preproc))]
})

# Map the topic IDs to their custom names from topic_aspects_["MMR"]
topic_info_MMR_custom = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in model_custom_merged.topic_aspects_["MMR"].items()}
topic_info_KB_custom = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in model_custom_merged.topic_aspects_["KeyBert"].items()}
topic_info_POS_custom = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in model_custom_merged.topic_aspects_["Pos"].items()}
reviews_with_topics_custom['Topics_MMR'] = reviews_with_topics_custom['Top_3_Topics'].apply(lambda topics: [topic_info_MMR_custom[topic] for topic in topics])
reviews_with_topics_custom['Topics_KB'] = reviews_with_topics_custom['Top_3_Topics'].apply(lambda topics: [topic_info_KB_custom[topic] for topic in topics])
reviews_with_topics_custom['Topics_POS'] = reviews_with_topics_custom['Top_3_Topics'].apply(lambda topics: [topic_info_POS_custom[topic] for topic in topics])

# Display the first few rows of the new DataFrame
reviews_with_topics_custom

Unnamed: 0,Review,Top_3_Topics,Top_3_Probabilities,Topics_MMR,Topics_KB,Topics_POS
0,"The ambience was good, food was quite good. ha...","[0, 1, 2]","[27.33, 14.87]","[biryani | chicken | food, place | ambience | ...","[buffet | restaurant | dishes, hangout | resta...","[biryani | good | chicken, place | good | food..."
1,Ambience is too good for a pleasant evening. S...,"[1, 0, 2]","[51.96, 19.36]","[place | ambience | hyderabad, biryani | chick...","[hangout | restaurant | place, buffet | restau...","[place | good | food, biryani | good | chicken..."
2,A must try.. great food great ambience. Thnx f...,"[0, 1, 2]","[25.83, 22.3]","[biryani | chicken | food, place | ambience | ...","[buffet | restaurant | dishes, hangout | resta...","[biryani | good | chicken, place | good | food..."
3,Soumen das and Arun was a great guy. Only beca...,"[2, 0, 1]","[29.91, 21.15]","[service | staff | experience, biryani | chick...","[service | served | customer, buffet | restaur...","[service | food | good, biryani | good | chick..."
4,Food is good. we ordered Kodi drumsticks and b...,"[0, 1, 2]","[34.0, 8.96]","[biryani | chicken | food, place | ambience | ...","[buffet | restaurant | dishes, hangout | resta...","[biryani | good | chicken, place | good | food..."
...,...,...,...,...,...,...
9995,Madhumathi Mahajan Well to start with nice cou...,"[0, 1, 4]","[63.78, 8.22]","[biryani | chicken | food, place | ambience | ...","[buffet | restaurant | dishes, hangout | resta...","[biryani | good | chicken, place | good | food..."
9996,This place has never disappointed us.. The foo...,"[0, 4, 1]","[67.07, 4.57]","[biryani | chicken | food, chicken | shawarma ...","[buffet | restaurant | dishes, chicken | wings...","[biryani | good | chicken, chicken | shawarma ..."
9997,"Bad rating is mainly because of ""Chicken Bone ...","[0, 4, 1]","[19.09, 3.59]","[biryani | chicken | food, chicken | shawarma ...","[buffet | restaurant | dishes, chicken | wings...","[biryani | good | chicken, chicken | shawarma ..."
9998,I personally love and prefer Chinese Food. Had...,"[0, 4, 1]","[80.59, 3.4]","[biryani | chicken | food, chicken | shawarma ...","[buffet | restaurant | dishes, chicken | wings...","[biryani | good | chicken, chicken | shawarma ..."


In [None]:
model_custom_merged.visualize_hierarchy(hierarchical_topics=hierarchical_topics)

In [None]:
model_custom_merged.merge_topics(reviews_preproc, topics_to_merge=[[0,5,4,6],[7,3]])

In [None]:
topics_custom = model_custom_merged.topics_
probs_custom = model_custom_merged.probabilities_

In [None]:
# Create a DataFrame with reviews and their top 3 assigned topics with probabilities
reviews_with_topics_custom = pd.DataFrame({
    'Review': reviews_preproc,
    'Top_3_Topics': [np.argsort(probs_custom[i])[-3:][::-1] for i in range(len(reviews_preproc))],
    'Top_3_Probabilities': [np.round(np.sort(probs_custom[i])[-2:][::-1] * 100, 2) for i in range(len(reviews_preproc))]
})

# Map the topic IDs to their custom names from topic_aspects_["MMR"]
topic_info_MMR_custom = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in model_custom_merged.topic_aspects_["MMR"].items()}
topic_info_KB_custom = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in model_custom_merged.topic_aspects_["KeyBert"].items()}
topic_info_POS_custom = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in model_custom_merged.topic_aspects_["Pos"].items()}
reviews_with_topics_custom['Topics_MMR'] = reviews_with_topics_custom['Top_3_Topics'].apply(lambda topics: [topic_info_MMR_custom[topic] for topic in topics])
reviews_with_topics_custom['Topics_KB'] = reviews_with_topics_custom['Top_3_Topics'].apply(lambda topics: [topic_info_KB_custom[topic] for topic in topics])
reviews_with_topics_custom['Topics_POS'] = reviews_with_topics_custom['Top_3_Topics'].apply(lambda topics: [topic_info_POS_custom[topic] for topic in topics])

# Display the first few rows of the new DataFrame
reviews_with_topics_custom

Unnamed: 0,Review,Top_3_Topics,Top_3_Probabilities,Topics_MMR,Topics_KB,Topics_POS
0,"The ambience was good, food was quite good. ha...","[0, 1, 2]","[34.63, 14.87]","[chicken | biryani | food, place | ambience | ...","[buffet | restaurant | tasty, restaurant | pla...","[good | chicken | biryani, place | good | food..."
1,Ambience is too good for a pleasant evening. S...,"[1, 0, 2]","[51.96, 24.97]","[place | ambience | hyderabad, chicken | birya...","[restaurant | place | hangout, buffet | restau...","[place | good | food, good | chicken | biryani..."
2,A must try.. great food great ambience. Thnx f...,"[0, 1, 2]","[32.72, 22.3]","[chicken | biryani | food, place | ambience | ...","[buffet | restaurant | tasty, restaurant | pla...","[good | chicken | biryani, place | good | food..."
3,Soumen das and Arun was a great guy. Only beca...,"[2, 0, 1]","[29.91, 27.3]","[service | staff | experience, chicken | birya...","[service | served | customer, buffet | restaur...","[service | food | good, good | chicken | birya..."
4,Food is good. we ordered Kodi drumsticks and b...,"[0, 1, 2]","[43.51, 8.96]","[chicken | biryani | food, place | ambience | ...","[buffet | restaurant | tasty, restaurant | pla...","[good | chicken | biryani, place | good | food..."
...,...,...,...,...,...,...
9995,Madhumathi Mahajan Well to start with nice cou...,"[0, 1, 2]","[75.76, 8.22]","[chicken | biryani | food, place | ambience | ...","[buffet | restaurant | tasty, restaurant | pla...","[good | chicken | biryani, place | good | food..."
9996,This place has never disappointed us.. The foo...,"[0, 1, 3]","[73.95, 3.81]","[chicken | biryani | food, place | ambience | ...","[buffet | restaurant | tasty, restaurant | pla...","[good | chicken | biryani, place | good | food..."
9997,"Bad rating is mainly because of ""Chicken Bone ...","[0, 1, 3]","[24.57, 2.68]","[chicken | biryani | food, place | ambience | ...","[buffet | restaurant | tasty, restaurant | pla...","[good | chicken | biryani, place | good | food..."
9998,I personally love and prefer Chinese Food. Had...,"[0, 1, 3]","[85.72, 2.89]","[chicken | biryani | food, place | ambience | ...","[buffet | restaurant | tasty, restaurant | pla...","[good | chicken | biryani, place | good | food..."


In [None]:
representation_model = model_custom_merged.representation_model

In [None]:
reviews_with_topics_custom.to_csv('reviews_with_topics_custom(mini).csv', index=False)

In [None]:
# Modify existing representation models or add new ones
representation_model["MMR"].diversity = 0.5  # Example: Change diversity of MMR


In [None]:
model_custom_merged.representation_model = representation_model

In [None]:
# Create a DataFrame with reviews and their top 3 assigned topics with probabilities
reviews_with_topics_custom = pd.DataFrame({
    'Review': reviews_preproc,
    'Top_3_Topics': [np.argsort(probs_custom[i])[-3:][::-1] for i in range(len(reviews_preproc))],
    'Top_3_Probabilities': [np.round(np.sort(probs_custom[i])[-3:][::-1] * 100, 2) for i in range(len(reviews_preproc))]
})

# Map the topic IDs to their custom names from topic_aspects_["MMR"]
#topic_info_MMR_custom = {topic: " | ".join(list(zip(*values))[0][:1]) for topic, values in model_custom_merged.topic_aspects_["MMR"].items()}
topic_info_KB_custom = {topic: " | ".join(list(zip(*values))[0][:1]) for topic, values in model_custom_merged.topic_aspects_["KeyBert"].items()}
#topic_info_POS_custom = {topic: " | ".join(list(zip(*values))[0][:1]) for topic, values in model_custom_merged.topic_aspects_["Pos"].items()}

# Apply the logic to display only the first topic when its probability is 100
def get_topics(row):
    if row['Top_3_Probabilities'][0] == 100:
        return [[topic_info_MMR_custom[row['Top_3_Topics'][0]].split(' | ')[0]], # Get the first word of the topic
                [topic_info_KB_custom[row['Top_3_Topics'][0]].split(' | ')[0]],  # Get the first word of the topic
                [topic_info_POS_custom[row['Top_3_Topics'][0]].split(' | ')[0]]]  # Get the first word of the topic
    else:
        return [[topic_info_MMR_custom[topic] for topic in row['Top_3_Topics']],
                [topic_info_KB_custom[topic] for topic in row['Top_3_Topics']],
                [topic_info_POS_custom[topic] for topic in row['Top_3_Topics']]]

reviews_with_topics_custom[['Topics_MMR', 'Topics_KB', 'Topics_POS']] = reviews_with_topics_custom.apply(get_topics, axis=1, result_type='expand')

# Display the first few rows of the new DataFrame
reviews_with_topics_custom

Unnamed: 0,Review,Top_3_Topics,Top_3_Probabilities,Topics_MMR,Topics_KB,Topics_POS
0,"The ambience was good, food was quite good. ha...","[3, 1, 4]","[12.87, 10.83, 7.23]","[place, service, chocolate]","[hangout, service, cakes]","[place, service, cake]"
1,Ambience is too good for a pleasant evening. S...,"[3, 4, 1]","[50.38, 5.47, 4.87]","[place, chocolate, service]","[hangout, cakes, service]","[place, cake, service]"
2,A must try.. great food great ambience. Thnx f...,"[3, 4, 1]","[20.3, 8.08, 5.78]","[place, chocolate, service]","[hangout, cakes, service]","[place, cake, service]"
3,Soumen das and Arun was a great guy. Only beca...,"[1, 3, 4]","[29.2, 7.12, 4.72]","[service, place, chocolate]","[service, hangout, cakes]","[service, place, cake]"
4,Food is good. we ordered Kodi drumsticks and b...,"[4, 3, 7]","[7.28, 6.39, 6.26]","[chocolate, place, buffet]","[cakes, hangout, buffet]","[cake, place, buffet]"
...,...,...,...,...,...,...
9995,Madhumathi Mahajan Well to start with nice cou...,"[13, 0, 6]","[26.99, 7.84, 7.66]","[chicken, mutton, paneer]","[buffet, biryani, punjabi]","[chicken, biryani, paneer]"
9996,This place has never disappointed us.. The foo...,"[8, 6, 0]","[50.48, 3.95, 3.69]","[chinese, paneer, mutton]","[noodles, punjabi, biryani]","[chinese, paneer, biryani]"
9997,"Bad rating is mainly because of ""Chicken Bone ...","[8, 6, 0]","[6.12, 3.17, 2.83]","[chinese, paneer, mutton]","[noodles, punjabi, biryani]","[chinese, paneer, biryani]"
9998,I personally love and prefer Chinese Food. Had...,"[8, 6, 0]","[68.19, 2.97, 2.78]","[chinese, paneer, mutton]","[noodles, punjabi, biryani]","[chinese, paneer, biryani]"


In [None]:
from sentence_transformers import SentenceTransformer

# Pre-calculate embeddings
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedding_model.encode(reviews_preproc.tolist(), show_progress_bar=True)

Batches:   0%|          | 0/312 [00:00<?, ?it/s]

In [None]:
from umap import UMAP

umap_model = UMAP(n_neighbors=15, n_components=5, min_dist=0.0, metric='cosine', random_state=42)
from hdbscan import HDBSCAN

hdbscan_model = HDBSCAN(min_cluster_size=30,metric='euclidean', cluster_selection_method='eom', prediction_data=True)

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer_model = CountVectorizer(stop_words="english", min_df=2, ngram_range=(1, 2))

In [None]:
from bertopic.vectorizers import ClassTfidfTransformer

ctfidf_model = ClassTfidfTransformer()

In [None]:
from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, PartOfSpeech

# KeyBERT
keybert_model = KeyBERTInspired()

# Part-of-Speech
pos_model = PartOfSpeech("en_core_web_sm")

# MMR
mmr_model = MaximalMarginalRelevance(diversity=0.2)

# All representation models
representation_model = {
    "KeyBERT": keybert_model,
    "MMR": mmr_model,
    "POS": pos_model
}

In [None]:
from bertopic import BERTopic

custom_model = BERTopic(

  # Pipeline models
  embedding_model=embedding_model,
  umap_model=umap_model,
  hdbscan_model=hdbscan_model,
  vectorizer_model=vectorizer_model,
  ctfidf_model=ctfidf_model,
  representation_model=representation_model,

  # Hyperparameters
  top_n_words=10,
  verbose=True
)

topics, probs = custom_model.fit_transform(reviews_preproc, embeddings)

2024-12-14 17:39:51,547 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-12-14 17:40:08,317 - BERTopic - Dimensionality - Completed ✓
2024-12-14 17:40:08,318 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-12-14 17:40:08,762 - BERTopic - Cluster - Completed ✓
2024-12-14 17:40:08,766 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-12-14 17:40:35,096 - BERTopic - Representation - Completed ✓


In [None]:
mrr_topic_labels = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in custom_model.topic_aspects_["MMR"].items()}
custom_model.set_topic_labels(mrr_topic_labels)

In [None]:
custom_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,KeyBERT,MMR,POS,Representative_Docs
0,-1,4186,-1_ambience_great_amazing_starters,"[ambience, great, amazing, starters, really, n...","[chicken tikka, restaurant, good food, dishes,...","[ambience, starters, pizza, ambiance, lunch, v...","[ambience, great, amazing, starters, nice, fri...","[""Oh! Hyderabad"" brings amazing super exciting..."
1,0,577,0_biryani_chicken biryani_mutton biryani_biriyani,"[biryani, chicken biryani, mutton biryani, bir...","[biryani tasty, biryani food, hyderabadi birya...","[chicken biryani, mutton biryani, biryani good...","[biryani, biriyani, biryanis, mutton, good bir...",[Another place fo delicious Hyderabadi rum bir...
2,1,429,1_manager_rude_asked_table,"[manager, rude, asked, table, pathetic, waiter...","[serving, waiters, waiter, serve, service bad,...","[rude, waiter, customer, worst service, waiter...","[manager, rude, table, pathetic, waiter, worst...",[I visited this place with my wife on 16 March...
3,2,332,2_dj_dance_dance floor_floor,"[dj, dance, dance floor, floor, hangout, music...","[place party, places hangout, place amazing, f...","[dance, dance floor, hangout, place hangout, p...","[dj, dance, floor, hangout, music, rooftop, pa...","[This place has amazing setup, separated in mu..."
4,3,309,3_good good_good_combo good_nice good,"[good good, good, combo good, nice good, verry...","[good, good good, nice good, good nice, nice, ...","[good good, good, combo good, nice good, verry...","[good, combo, nice, , , , , , , ]","[good, good, good good]"
5,4,231,4_hyderabad_place hyderabad_places hyderabad_h...,"[hyderabad, place hyderabad, places hyderabad,...","[hyderabadi cuisine, restaurants hyderabad, hy...","[place hyderabad, places hyderabad, hyderabad ...","[hyderabad, hyderabadi, beer, beers, places, b...",[So this place is located near ikea above the ...
6,5,226,5_govind_excellent service_excellent_abs,"[govind, excellent service, excellent, abs, sh...","[excellent food, food excellent, good hospital...","[suraj, food excellent, visit soon, service go...","[excellent service, excellent, abs, suraj, exc...",[Excellent food and excellent service by bahad...
7,6,213,6_zomato_gold_zomato gold_order,"[zomato, gold, zomato gold, order, dominos, de...","[zomato restaurant, restaurant zomato, food zo...","[zomato, zomato gold, order, dominos, refund, ...","[zomato, gold, order, dominos, support, contac...","[delivery, poor customer support . Both me and..."
8,7,192,7_buffet_spread_hyatt_lunch buffet,"[buffet, spread, hyatt, lunch buffet, buffets,...","[buffet food, buffet menu, dinner buffet, plac...","[buffet, lunch buffet, buffets, buffet spread,...","[buffet, spread, buffets, carte, lunch, good b...",[I have had the breakfast buffet here quite a ...
9,8,166,8_cake_brownies_cupcakes_cakes,"[cake, brownies, cupcakes, cakes, red velvet, ...","[chocolate cake, bakery items, bakery products...","[brownies, cupcakes, cakes, cookies, chocolate...","[cake, brownies, cupcakes, cakes, red velvet, ...","[A true match for Theo' s from Mumbai, one of ..."


In [None]:
# Create a DataFrame with reviews and their assigned topics with probabilities, excluding topic -1
topic_info = custom_model.get_topic_info()
reviews_with_topics = pd.DataFrame({
    'Review': reviews_preproc,
    'Assigned_Topic': [topics[i] if topics[i] != -1 else (np.argsort(probs[i])[-2] if len(probs[i]) > 1 else np.argsort(probs[i])[-1]) for i in range(len(reviews_preproc))],
    'Assigned_Topic_Name': [topic_info.iloc[topics[i]]['CustomName'] if topics[i] != -1 else (topic_info.iloc[np.argsort(probs[i])[-2]]['CustomName'] if len(probs[i]) > 1 else topic_info.iloc[np.argsort(probs[i])[-1]]['CustomName']) for i in range(len(reviews_preproc))],
    'Topic_Probabilities': [np.round(probs[i] * 100, 2) if topics[i] != -1 else (np.round(probs[i][np.argsort(probs[i])[-2]] * 100, 2) if len(probs[i]) > 1 else np.round(probs[i][np.argsort(probs[i])[-1]] * 100, 2)) for i in range(len(reviews_preproc))]
}).dropna()

# Display the first few rows of the new DataFrame
reviews_with_topics.head()


TypeError: object of type 'numpy.float64' has no len()

Unnamed: 0,Topic,Count,Name,CustomName,Representation,KeyBERT,MMR,POS,Representative_Docs
0,-1,3387,-1_food_chicken_good_place,food | chicken | restaurant,"[food, chicken, good, place, ordered, taste, s...","[restaurant, buffet, good food, food good, dis...","[food, chicken, restaurant, order, rice, panee...","[food, chicken, good, place, taste, service, g...",[When you love food and especially north India...
1,0,1753,0_place_good_ambience_service,place | ambience | service,"[place, good, ambience, service, food, great, ...","[great place, visit place, best place, good pl...","[place, ambience, service, visit, drinks, frie...","[place, good, ambience, service, food, great, ...",[Great place to hangout . Good food + Good dri...
2,1,561,1_biryani_chicken_chicken biryani_taste,chicken biryani | biriyani | mutton biryani,"[biryani, chicken, chicken biryani, taste, bir...","[chicken biryani, biryani chicken, biryani tas...","[chicken biryani, biriyani, mutton biryani, re...","[biryani, chicken, taste, biriyani, mutton, go...","[biryani is good, Just the name of Biryani and..."
3,2,365,2_manager_service_asked_table,manager | table | rude,"[manager, service, asked, table, worst, rude, ...","[restaurant, serving, waiters, waiter, lunch, ...","[manager, table, rude, staff, waiter, customer...","[manager, service, table, worst, rude, staff, ...","[The worst place in the city, I was here for a..."
4,3,309,3_good good_good_nice good_good combo,good good | good | nice good,"[good good, good, nice good, good combo, combo...","[good, good good, nice good, good nice, nice, ...","[good good, good, nice good, good combo, combo...","[good, combo, nice, , , , , , , ]","[good, good, good good]"
5,4,233,4_hyderabad_best_place_food,hyderabad | place hyderabad | visit,"[hyderabad, best, place, food, place hyderabad...","[hyderabad food, restaurant hyderabad, food hy...","[hyderabad, place hyderabad, visit, places, hy...","[hyderabad, best, place, food, service, great,...",[I am a huge fan of regional cuisines . The di...
6,5,175,5_buffet_spread_lunch_starters,buffet | lunch | main course,"[buffet, spread, lunch, starters, main, main c...","[buffet food, buffet lunch, dinner buffet, lun...","[buffet, lunch, main course, lunch buffet, des...","[buffet, spread, lunch, starters, main, main c...",[Very good place to have buffet . Buffet sprea...
7,6,169,6_cake_cakes_brownies_cupcakes,cakes | brownies | cupcakes,"[cake, cakes, brownies, cupcakes, chocolate, r...","[cup cakes, cup cake, cakes, cake chocolate, c...","[cakes, brownies, cupcakes, red velvet, bakery...","[cake, cakes, brownies, cupcakes, chocolate, r...",[My boyfriend had gotten a Cheese cake for my ...
8,7,131,7_gachibowli_place_place gachibowli_food,gachibowli | place gachibowli | food,"[gachibowli, place, place gachibowli, food, go...","[gachibowli place, visit gachibowli, gachibowl...","[gachibowli, place gachibowli, food, visit, pl...","[gachibowli, place, food, good, ambience, best...",[I love this place in Gachibowli . I have been...
9,8,131,8_quantity_packing_food_food food,food food | food good | food cold,"[quantity, packing, food, food food, cold, tas...","[food quality, quality food, taste quality, qu...","[food food, food good, food cold, quantity foo...","[quantity, packing, food, cold, taste, good qu...",[Food is not that good . Quantity is more but ...


In [None]:
custom_model.visualize_topics()

In [None]:
custom_model_MRR = custom_model

In [None]:
# assign labels
keybert_topic_labels = {topic: " | ".join(list(zip(*values))[0][:3]) for topic, values in custom_model.topic_aspects_["KeyBERT"].items()}
custom_model.set_topic_labels(keybert_topic_labels)

In [None]:
from transformers import pipeline

# Summarization pipeline
summarizer = pipeline("summarization", model="t5-small")

topic_labels = {}
topic_info = custom_model.get_topic_info()

for index, row in topic_info.iterrows():
    topic = row['Topic']
    representative_docs = row['Representation']
    text = " ".join(representative_docs)
    summary = summarizer(text, max_length=2, min_length=1, do_sample=False)
    topic_labels[topic] = summary[0]["summary_text"]

print(topic_labels)


Device set to use cpu


{-1: 'food', 0: 'place', 1: 'bi', 2: 'manager', 3: 'good', 4: '', 5: 'buffet', 6: 'cake', 7: '', 8: 'quantity', 9: 'authentic', 10: '', 11: 'chicken', 12: '', 13: 'noodles', 14: 'super', 15: 'quantity', 16: 'delivery', 17: '', 18: 'delivery', 19: '', 20: 'pizza', 21: 'pan', 22: 'para', 23: 'chicken', 24: 'order', 25: 'good', 26: 'in', 27: 'mango', 28: 'superb', 29: 'mom', 30: 'pun', 31: 'man', 32: 'taste', 33: 'good', 34: 'bread', 35: 'shake', 36: 'coffee', 37: 'fast', 38: 'ma', 39: 'awesome', 40: 'service', 41: '', 42: 'tasty', 43: 'spicy', 44: 'nice', 45: 'excellent', 46: 'pi', 47: 'wa', 48: 'and', 49: 'wings', 50: '', 51: 'si', 52: 'wrap', 53: 'awesome'}


In [None]:
custom_model_MRR.get_topic_info().head()

Unnamed: 0,Topic,Count,Name,CustomName,Representation,KeyBERT,MMR,POS,Representative_Docs
0,-1,3387,-1_food_chicken_good_place,food | chicken | restaurant,"[food, chicken, good, place, ordered, taste, s...","[restaurant, buffet, good food, food good, dis...","[food, chicken, restaurant, order, rice, panee...","[food, chicken, good, place, taste, service, g...",[When you love food and especially north India...
1,0,1753,0_place_good_ambience_service,place | ambience | service,"[place, good, ambience, service, food, great, ...","[great place, visit place, best place, good pl...","[place, ambience, service, visit, drinks, frie...","[place, good, ambience, service, food, great, ...",[Great place to hangout . Good food + Good dri...
2,1,561,1_biryani_chicken_chicken biryani_taste,chicken biryani | biriyani | mutton biryani,"[biryani, chicken, chicken biryani, taste, bir...","[chicken biryani, biryani chicken, biryani tas...","[chicken biryani, biriyani, mutton biryani, re...","[biryani, chicken, taste, biriyani, mutton, go...","[biryani is good, Just the name of Biryani and..."
3,2,365,2_manager_service_asked_table,manager | table | rude,"[manager, service, asked, table, worst, rude, ...","[restaurant, serving, waiters, waiter, lunch, ...","[manager, table, rude, staff, waiter, customer...","[manager, service, table, worst, rude, staff, ...","[The worst place in the city, I was here for a..."
4,3,309,3_good good_good_nice good_good combo,good good | good | nice good,"[good good, good, nice good, good combo, combo...","[good, good good, nice good, good nice, nice, ...","[good good, good, nice good, good combo, combo...","[good, combo, nice, , , , , , , ]","[good, good, good good]"


In [None]:
custom_model.get_topic_info().head()

Unnamed: 0,Topic,Count,Name,CustomName,Representation,KeyBERT,MMR,POS,Representative_Docs
0,-1,3387,-1_food_chicken_good_place,food | chicken | restaurant,"[food, chicken, good, place, ordered, taste, s...","[restaurant, buffet, good food, food good, dis...","[food, chicken, restaurant, order, rice, panee...","[food, chicken, good, place, taste, service, g...",[When you love food and especially north India...
1,0,1753,0_place_good_ambience_service,place | ambience | service,"[place, good, ambience, service, food, great, ...","[great place, visit place, best place, good pl...","[place, ambience, service, visit, drinks, frie...","[place, good, ambience, service, food, great, ...",[Great place to hangout . Good food + Good dri...
2,1,561,1_biryani_chicken_chicken biryani_taste,chicken biryani | biriyani | mutton biryani,"[biryani, chicken, chicken biryani, taste, bir...","[chicken biryani, biryani chicken, biryani tas...","[chicken biryani, biriyani, mutton biryani, re...","[biryani, chicken, taste, biriyani, mutton, go...","[biryani is good, Just the name of Biryani and..."
3,2,365,2_manager_service_asked_table,manager | table | rude,"[manager, service, asked, table, worst, rude, ...","[restaurant, serving, waiters, waiter, lunch, ...","[manager, table, rude, staff, waiter, customer...","[manager, service, table, worst, rude, staff, ...","[The worst place in the city, I was here for a..."
4,3,309,3_good good_good_nice good_good combo,good good | good | nice good,"[good good, good, nice good, good combo, combo...","[good, good good, nice good, good nice, nice, ...","[good good, good, nice good, good combo, combo...","[good, combo, nice, , , , , , , ]","[good, good, good good]"


In [None]:
custom_model_df = pd.DataFrame(custom_model.get_topic_info())
custom_model_mrr_df = pd.DataFrame(custom_model_MRR.get_topic_info())

In [None]:
merged_df = custom_model_df.join(custom_model_mrr_df, lsuffix='_keybert', rsuffix='_mrr')

In [None]:
merged_df.head()

Unnamed: 0,Topic_keybert,Count_keybert,Name_keybert,CustomName_keybert,Representation_keybert,KeyBERT_keybert,MMR_keybert,POS_keybert,Representative_Docs_keybert,Topic_mrr,Count_mrr,Name_mrr,CustomName_mrr,Representation_mrr,KeyBERT_mrr,MMR_mrr,POS_mrr,Representative_Docs_mrr
0,-1,3387,-1_food_good_chicken_place,food | chicken | ambience,"[food, good, chicken, place, ordered, taste, s...","[restaurant, buffet, good food, food good, dis...","[food, chicken, ambience, restaurant, order, r...","[food, good, chicken, place, taste, service, g...",[When you love food and especially north India...,-1,3387,-1_food_good_chicken_place,food | chicken | ambience,"[food, good, chicken, place, ordered, taste, s...","[restaurant, buffet, good food, food good, dis...","[food, chicken, ambience, restaurant, order, r...","[food, good, chicken, place, taste, service, g...",[When you love food and especially north India...
1,0,1753,0_place_good_ambience_service,place | ambience | service,"[place, good, ambience, service, food, great, ...","[great place, best place, visit place, good pl...","[place, ambience, service, visit, drinks, frie...","[place, good, ambience, service, food, great, ...",[Komatose is such wonderful place to chilled o...,0,1753,0_place_good_ambience_service,place | ambience | service,"[place, good, ambience, service, food, great, ...","[great place, best place, visit place, good pl...","[place, ambience, service, visit, drinks, frie...","[place, good, ambience, service, food, great, ...",[Komatose is such wonderful place to chilled o...
2,1,561,1_biryani_chicken_chicken biryani_taste,chicken biryani | biriyani | mutton biryani,"[biryani, chicken, chicken biryani, taste, ord...","[ordered chicken biryani, chicken biryani, bir...","[chicken biryani, biriyani, mutton biryani, re...","[biryani, chicken, taste, biriyani, mutton, go...",[One of the new tastes of biryanis in hyd we o...,1,561,1_biryani_chicken_chicken biryani_taste,chicken biryani | biriyani | mutton biryani,"[biryani, chicken, chicken biryani, taste, ord...","[ordered chicken biryani, chicken biryani, bir...","[chicken biryani, biriyani, mutton biryani, re...","[biryani, chicken, taste, biriyani, mutton, go...",[One of the new tastes of biryanis in hyd we o...
3,2,365,2_manager_service_asked_table,manager | table | worst,"[manager, service, asked, table, worst, rude, ...","[restaurant, serving, waiters, waiter, serve, ...","[manager, table, worst, rude, staff, waiter, c...","[manager, service, table, worst, rude, staff, ...","[Visited this place for my team lunch, and the...",2,365,2_manager_service_asked_table,manager | table | worst,"[manager, service, asked, table, worst, rude, ...","[restaurant, serving, waiters, waiter, serve, ...","[manager, table, worst, rude, staff, waiter, c...","[manager, service, table, worst, rude, staff, ...","[Visited this place for my team lunch, and the..."
4,3,309,3_good good good_good good_good_nice good good,good good good | good good | nice good good,"[good good good, good good, good, nice good go...","[good, good good, nice good, good good good, g...","[good good good, good good, nice good good, ni...","[good, combo, nice, , , , , , , ]","[good, good, good good]",3,309,3_good good good_good good_good_nice good good,good good good | good good | nice good good,"[good good good, good good, good, nice good go...","[good, good good, nice good, good good good, g...","[good good good, good good, nice good good, ni...","[good, combo, nice, , , , , , , ]","[good, good, good good]"


In [None]:
merged_df = merged_df[['Topic_keybert', 'Count_keybert', 'Name_keybert', 'CustomName_keybert', 'CustomName_mrr','Representation_keybert', 'KeyBERT_keybert','MMR_keybert', 'POS_keybert', 'Representative_Docs_keybert']]