In [1]:
import pandas as pd
import numpy as np
import ast
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

In [2]:
file_path = 'scores_csv_file.csv'
df = pd.read_csv(file_path)

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,title,categoryName,website,url,reviewsCount,stars,text,latitude,longitude,_text,sentiment_score
0,2,The Optimist,Seafood restaurant,https://www.theoptimistrestaurant.com/,https://www.google.com/maps/place/The+Optimist...,3349,5,"One word amazing!! The red fish, halibut, fr...",33.779814,-84.410451,one word amazing red fish halibut fried rice b...,"('POSITIVE', 0.9867098927497864)"
1,3,The Optimist,Seafood restaurant,https://www.theoptimistrestaurant.com/,https://www.google.com/maps/place/The+Optimist...,3349,5,First time here and the food is great and the ...,33.779814,-84.410451,first time food great waiter excellent,"('POSITIVE', 0.9997603297233582)"
2,4,The Optimist,Seafood restaurant,https://www.theoptimistrestaurant.com/,https://www.google.com/maps/place/The+Optimist...,3349,5,I recently had the pleasure of dining at Optim...,33.779814,-84.410451,recently pleasure dining optimist atlanta ga l...,"('POSITIVE', 0.9986514449119568)"
3,5,The Optimist,Seafood restaurant,https://www.theoptimistrestaurant.com/,https://www.google.com/maps/place/The+Optimist...,3349,5,Beautiful atmosphere and delicious food. All o...,33.779814,-84.410451,beautiful atmosphere delicious food food ’ fla...,"('POSITIVE', 0.9995262622833252)"
4,6,The Optimist,Seafood restaurant,https://www.theoptimistrestaurant.com/,https://www.google.com/maps/place/The+Optimist...,3349,5,We had a wonderful dinner at the Optimist. Our...,33.779814,-84.410451,wonderful dinner optimist group 9 great meal o...,"('POSITIVE', 0.999870777130127)"


In [4]:
df['_text'].isnull().sum()

33

In [5]:
df = df.dropna(subset=['_text'])

In [6]:
def parse_sentiment(score_tuple_str):
    try:
        score_tuple = ast.literal_eval(score_tuple_str)
        if score_tuple[0] == 'POSITIVE':
            return score_tuple[1]
        elif score_tuple[0] == 'NEGATIVE':
            return -score_tuple[1]
    except:
        return np.nan

In [7]:
def display_topics(model, feature_names, no_top_words):
    topics = []
    for topic_idx, topic in enumerate(model.components_):
        topic_words = " ".join([feature_names[i] for i in topic.argsort()[:-no_top_words - 1:-1]])
        topics.append((topic_idx, topic_words))
    return topics

In [8]:
df['parsed_sentiment'] = df['sentiment_score'].apply(parse_sentiment)

positive_reviews = df[df['parsed_sentiment'] > 0]['_text']
neutral_negative_reviews = df[df['parsed_sentiment'] <= 0]['_text']

In [22]:
def lda_topics(text_data, n_topics=50, no_top_words=55):
    text_data = text_data.fillna('').tolist()
    cv = CountVectorizer(max_df=0.95, min_df=2, stop_words='english')
    dtm = cv.fit_transform(text_data)
    lda = LatentDirichletAllocation(n_components=n_topics, random_state=0)
    lda.fit(dtm)
    topic_words = display_topics(lda, cv.get_feature_names_out(), no_top_words)
    topics_dict = {idx: words for idx, words in topic_words}
    return lda, cv, topics_dict

In [23]:
lda_positive, cv_positive, positive_topics = lda_topics(positive_reviews)
lda_neutral_negative, cv_neutral_negative, neutral_negative_topics = lda_topics(neutral_negative_reviews)

In [24]:
positive_topics

{0: 'enjoyed meal patio experience outside atmosphere beautiful dining really comfortable seating wonderful dinner perfect visit nice great outdoor thoroughly pleasant make day delicious cup server setting bakery expectation sat enjoy inside truly way friend time restaurant attentive food weather interior met covered exceptionally people waiter exceeded sure overall evening place enjoyable recently entire quiet intimate',
 1: 'cheese potato steak pork mac bbq delicious rib cooked sweet good shrimp bean green fried perfectly got brisket sauce salad chicken amazing lobster beef tender baked ordered grilled sandwich meat definitely seasoned tomato corn filet grit salmon try mashed short smoked flavor rice pulled came ribeye melt plate tasty really loved dip collard perfect tried',
 2: 'food came drink tea 1010 waiter said told soul birthday table went say regular amazing mom ordered sweet place left ok boyfriend make walked daughter sister like ate sure good thing tasted persian waitress 

In [None]:
neutral_negative_topics

{0: 'card tip charged charge pay receipt credit cash fee food extra order said got paid added time payment check dollar took didnt price told total gratuity went gave 20 refund star asked use server paying menu instead ask change manager good register bank say restaurant left coupon going lunch place want owner cashier service tax',
 1: 'pizza time good crust like topping place cheese better taste slice food great ordered new really got ive make love way service dough pepperoni oven dont money quality pie customer burnt old know dish spicy sauce free used le terrible today order half try location didnt going horrible undercooked disappointed meat johnny ask sure guy',
 2: 'staff left disappointed food atlanta time husband table say restaurant wait hostess experience people waited told family said great like way home waitress didnt went come stopped kid longer seated asked owner visit favorite sorry outside lack georgia review running leave hour min inside opened good want away business

In [None]:
def transform_and_apply_lda(lda_model, count_vectorizer, reviews):
    transformed_reviews = count_vectorizer.transform(reviews)
    return pd.DataFrame(lda_model.transform(transformed_reviews))

doc_topic_positive = transform_and_apply_lda(lda_positive, cv_positive, positive_reviews)
dominant_topic_positive = doc_topic_positive.idxmax(axis=1)

In [None]:
doc_topic_neutral_negative = transform_and_apply_lda(lda_neutral_negative, cv_neutral_negative, neutral_negative_reviews)
dominant_topic_neutral_negative = doc_topic_neutral_negative.idxmax(axis=1)

In [None]:
pros_cons_df = pd.DataFrame({
    'Restaurant': df['title'],
    'Pros': dominant_topic_positive,
    'Cons': dominant_topic_neutral_negative
})

In [None]:
topic_counts_pros = pros_cons_df.groupby(['Restaurant', 'Pros']).size().unstack(fill_value=0)
topic_counts_cons = pros_cons_df.groupby(['Restaurant', 'Cons']).size().unstack(fill_value=0)

In [None]:
def get_top_topics(topic_counts):
    return topic_counts.apply(lambda x: x.nlargest(3).index.tolist() + [None]*(3-len(x.nlargest(3))), axis=1)

In [None]:
top_pros = get_top_topics(topic_counts_pros)
top_cons = get_top_topics(topic_counts_cons)

In [None]:
def map_topics_to_descriptions(topic_numbers, topics_dict):
    return [topics_dict.get(topic, "Unknown") for topic in topic_numbers]


In [None]:
positive_topic_names = {
    0: "Outdoor Dining Experience",
    1: "Meat and Barbecue Delights",
    2: "Mixed Dining Experiences",
    3: "Global Cuisine Sampling",
    4: "Sushi and Bar Ambiance",
    5: "Indian Cuisine and Spices",
    6: "Desserts and Casual Eats",
    7: "Quick Service and Takeout",
    8: "Customer Service Excellence",
    9: "Mexican Cuisine Favorites",
    10: "Varied Food Experiences",
    11: "Burger and Shake Enjoyment",
    12: "Convenience and Drive-Thru",
    13: "Fine Dining and Beverages",
    14: "Caribbean and Jamaican Flavors",
    15: "Cleanliness and Comfort",
    16: "Frequent Visits and Loyalty",
    17: "Exceptional Service and Atmosphere",
    18: "Burgers and Healthy Options",
    19: "Affordable and Authentic Eats",
    20: "Quality Food and Ambiance",
    21: "Casual Lunch and Family Dining",
    22: "Friendly and Efficient Staff",
    23: "Best Local and Ethnic Foods",
    24: "Wide Selection and Drinks",
    25: "Cozy and Welcoming Places",
    26: "Mixed Reviews and Experiences",
    27: "Waiting and Service Times",
    28: "Group Dining and Events",
    29: "Diverse Cuisine and Markets",
    30: "Convenience and Accessibility",
    31: "Cuban Cuisine and Coffee",
    32: "Breakfast and Brunch Specials",
    33: "Efficient Service in Busy Settings",
    34: "Happy Hour and Value Deals",
    35: "Family-friendly and Consistent Quality",
    36: "Sandwiches and Gluten-Free Options",
    37: "Asian Cuisine and Spicy Dishes",
    38: "Italian and Seafood Specialties",
    39: "Average Dining Experiences",
    40: "Remarkable Service and Recommendations",
    41: "Comfort Food and Diner Classics",
    42: "Pizza and Italian Favorites",
    43: "Mediterranean and Middle Eastern Cuisine",
    44: "Special Occasions and Fine Dining",
    45: "Fun and Lively Atmosphere",
    46: "Mexican Cuisine and Family Dining",
    47: "Great Food and Relaxed Ambiance",
    48: "Exceptional Service and Atmosphere",
    49: "Music and Entertainment Dining"
}

negative_topic_names = {
    0: "Billing and Payment Concerns",
    1: "Pizza Quality and Preferences",
    2: "Disappointing Restaurant Experiences",
    3: "Customer Service and Order Accuracy",
    4: "Dining Disappointments and Preferences",
    5: "Restaurant Reservation and Party Planning",
    6: "Food Quality and Order Mistakes",
    7: "Price and Portion Size Issues",
    8: "Delivery Service Challenges",
    9: "Spanish-Language Dining Reviews",
    10: "Customer and Employee Interactions",
    11: "Declining Food Quality",
    12: "Inconsistent Food and Service",
    13: "Unpleasant Dining Experiences",
    14: "Service Delays and Inattentiveness",
    15: "Sandwich and Bread Quality",
    16: "Operational Issues in Restaurants",
    17: "Customer Service and Ambiance",
    18: "Disappointing Food Quality",
    19: "Underwhelming Sushi and Bagels",
    20: "Poor Food and Service Quality",
    21: "Management and Service Problems",
    22: "Cleanliness and Hygiene Issues",
    23: "COVID-19 Compliance and Customer Service",
    24: "Changing Quality and Service",
    25: "Drive-Thru Service and Employee Conduct",
    26: "Mixed Restaurant Reviews",
    27: "Poorly Cooked and Flavored Food",
    28: "Service and Waiting Times",
    29: "Unsatisfactory Asian Cuisine",
    30: "Poor Customer Service and Management",
    31: "Steakhouse Disappointments",
    32: "Issues with Orders and Service",
    33: "Subpar Dining Experiences",
    34: "Operational Inconsistencies",
    35: "Substandard Food and Service",
    36: "Slow Service at Popular Chains",
    37: "Overpriced and Underwhelming Meals",
    38: "Food Quality Concerns",
    39: "Issues with Closing Times",
    40: "Dissatisfaction with Mexican Cuisine",
    41: "Long Waiting Times for Orders",
    42: "Customer Service and Management Issues",
    43: "Menu Misrepresentations",
    44: "General Restaurant Dissatisfaction",
    45: "Rude Service and Poor Attitude",
    46: "Unsatisfactory Taco Experiences",
    47: "Burger Quality and Cooking Issues",
    48: "Cold and Poorly Prepared Food",
    49: "Long Wait and Order Mistakes"
}

In [None]:
pros_df = pd.DataFrame({
    'Restaurant': top_pros.index,
    'Top 3 Pros Topics': top_pros.values
})
pros_df['Top 3 Pros Topics'] = pros_df['Top 3 Pros Topics'].apply(lambda topics: map_topics_to_descriptions(topics, positive_topic_names))
print(pros_df)

                                  Restaurant  \
0               3 Amigos Mexican Bar & Grill   
1                           5Church Buckhead   
2                                         AG   
3                 ANNUNZIATA'S Pizza & Pasta   
4                           AZN Sandwich Bar   
..                                       ...   
506         Yume Ramen Bar in Peachtree City   
507                        Zafron Restaurant   
508  Zaxby's Chicken Fingers & Buffalo Wings   
509                  il Giallo Osteria & Bar   
510                             la Madeleine   

                                     Top 3 Pros Topics  
0    [Remarkable Service and Recommendations, Great...  
1    [Caribbean and Jamaican Flavors, Friendly and ...  
2    [Asian Cuisine and Spicy Dishes, Best Local an...  
3    [Great Food and Relaxed Ambiance, Remarkable S...  
4    [Great Food and Relaxed Ambiance, Mixed Review...  
..                                                 ...  
506  [Great Food and Rel

In [None]:
cons_df = pd.DataFrame({
    'Restaurant': top_cons.index,
    'Top 3 Cons Topics': top_cons.values
})
cons_df['Top 3 Cons Topics'] = cons_df['Top 3 Cons Topics'].apply(lambda topics: map_topics_to_descriptions(topics, negative_topic_names))
print(cons_df)

                                  Restaurant  \
0               3 Amigos Mexican Bar & Grill   
1                           5Church Buckhead   
2                                         AG   
3                 ANNUNZIATA'S Pizza & Pasta   
4                           AZN Sandwich Bar   
..                                       ...   
504         Yume Ramen Bar in Peachtree City   
505                        Zafron Restaurant   
506  Zaxby's Chicken Fingers & Buffalo Wings   
507                  il Giallo Osteria & Bar   
508                             la Madeleine   

                                     Top 3 Cons Topics  
0    [Unsatisfactory Asian Cuisine, Long Waiting Ti...  
1    [Unsatisfactory Asian Cuisine, Mixed Restauran...  
2    [Service Delays and Inattentiveness, Mixed Res...  
3    [Long Waiting Times for Orders, Unsatisfactory...  
4    [Long Waiting Times for Orders, Pizza Quality ...  
..                                                 ...  
504  [Long Waiting Times

In [None]:
final_df = pd.merge(pros_df, cons_df, on='Restaurant', how='inner')
final_df.head()

Unnamed: 0,Restaurant,Top 3 Pros Topics,Top 3 Cons Topics
0,3 Amigos Mexican Bar & Grill,"[Remarkable Service and Recommendations, Great...","[Unsatisfactory Asian Cuisine, Long Waiting Ti..."
1,5Church Buckhead,"[Caribbean and Jamaican Flavors, Friendly and ...","[Unsatisfactory Asian Cuisine, Mixed Restauran..."
2,AG,"[Asian Cuisine and Spicy Dishes, Best Local an...","[Service Delays and Inattentiveness, Mixed Res..."
3,ANNUNZIATA'S Pizza & Pasta,"[Great Food and Relaxed Ambiance, Remarkable S...","[Long Waiting Times for Orders, Unsatisfactory..."
4,AZN Sandwich Bar,"[Great Food and Relaxed Ambiance, Mixed Review...","[Long Waiting Times for Orders, Pizza Quality ..."
