## Semantic Analysis on Soups Using Yelp Reviews

In [1]:
import pandas as pd
import spacy

In [2]:
soup_reviews = pd.read_csv('reviews_and_soups.csv', na_filter=True, na_values='[]')
soup_reviews.dropna(inplace = True)
soup_reviews.reset_index(drop=True, inplace=True)
soup_reviews

Unnamed: 0,review,soup_names_found
0,"Iconic Soup Kitchen from Seinfeld episode, no ...",['Chicken Bone Broth']
1,"""No soup for you!"" - Totally stumbled across ...","['Chicken Dumpling', 'Lobster Roll']"
2,"Being a Seinfeld fan, I've always wanted to tr...",['Italian Wedding']
3,"""No soup for you!""Found out about this place b...","['Lobster Bisque', 'Chicken Tortilla']"
4,My dad was a big fan of Seinfeld back in the d...,"['Lobster Bisque', 'Chicken Dumpling', 'Chicke..."
...,...,...
456,Went to the 55th st location and got the lobst...,"['Chicken Gumbo', 'Lobster Roll']"
457,Went here on a recent trip to NY after reading...,['Minestrone']
458,Lobster bisque lives up to expectations. Kinda...,['Lobster Bisque']
459,"There are a lot of good food out there, but th...",['Lobster Soup']


In [3]:
soup_reviews['soup_names_found'].dtype

dtype('O')

**Converting all the values in the soup_names_found column into lists so that whole the whole strings can be searched for when doing our semantic analysis**

In [4]:
import ast

soup_reviews['soup_names_found'] = soup_reviews['soup_names_found'].apply(ast.literal_eval)


In [5]:
for value in soup_reviews['soup_names_found']:
    print(type(value))

<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'li

In [6]:
soup_reviews

Unnamed: 0,review,soup_names_found
0,"Iconic Soup Kitchen from Seinfeld episode, no ...",[Chicken Bone Broth]
1,"""No soup for you!"" - Totally stumbled across ...","[Chicken Dumpling, Lobster Roll]"
2,"Being a Seinfeld fan, I've always wanted to tr...",[Italian Wedding]
3,"""No soup for you!""Found out about this place b...","[Lobster Bisque, Chicken Tortilla]"
4,My dad was a big fan of Seinfeld back in the d...,"[Lobster Bisque, Chicken Dumpling, Chicken Tor..."
...,...,...
456,Went to the 55th st location and got the lobst...,"[Chicken Gumbo, Lobster Roll]"
457,Went here on a recent trip to NY after reading...,[Minestrone]
458,Lobster bisque lives up to expectations. Kinda...,[Lobster Bisque]
459,"There are a lot of good food out there, but th...",[Lobster Soup]


In [7]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

In [9]:
def analyze_sentiment(text):

    # Analyze the sentiment of the text using VADER
    sentiment_score = analyzer.polarity_scores(text)
    
    # Determine the overall sentiment based on the compound score
    if sentiment_score['compound'] >= 0.05:
        return "positive"
    elif sentiment_score['compound'] <= -0.05:
        return "negative"
    else:
        return "neutral"

In [10]:
def perform_sentiment_analysis(reviews, extracted_soups):

    sentiment_scores = {}
    for review, soups in zip(reviews, extracted_soups):
        soup_sentiments = {}
        for soup in soups:
            # Analyze the sentiment of the review containing the soup
            sentiment = analyze_sentiment(review)
            soup_sentiments[soup] = sentiment
        sentiment_scores[review] = soup_sentiments
    return sentiment_scores

In [11]:
# Perform sentiment analysis on extracted soups based on the context of the reviews
soup_sentiment_scores = perform_sentiment_analysis(soup_reviews['review'], soup_reviews['soup_names_found'])


In [20]:
# Create an empty list to store the combined soup names and sentiments
combined_data = []

# Iterate over each review and its corresponding sentiment dictionary
for review, sentiment_dict in soup_sentiment_scores.items():
    # Extract the soup names from the sentiment dictionary
    soups = list(sentiment_dict.keys())
    # Extract the sentiments from the sentiment dictionary
    sentiments = list(sentiment_dict.values())
    # Combine the soup names and sentiments into a single string
    combined_data.append({'review': review, 'soups_with_sentiments': ', '.join([f'{soup}: {sentiment}' for soup, sentiment in zip(soups, sentiments)])})

In [21]:
combined_df = pd.DataFrame(combined_data)

In [22]:
# Display the combined DataFrame
combined_df

Unnamed: 0,review,soups_with_sentiments
0,"Iconic Soup Kitchen from Seinfeld episode, no ...",Chicken Bone Broth: positive
1,"""No soup for you!"" - Totally stumbled across ...","Chicken Dumpling: positive, Lobster Roll: posi..."
2,"Being a Seinfeld fan, I've always wanted to tr...",Italian Wedding: positive
3,"""No soup for you!""Found out about this place b...","Lobster Bisque: positive, Chicken Tortilla: po..."
4,My dad was a big fan of Seinfeld back in the d...,"Lobster Bisque: positive, Chicken Dumpling: po..."
...,...,...
456,Went to the 55th st location and got the lobst...,"Chicken Gumbo: negative, Lobster Roll: negative"
457,Went here on a recent trip to NY after reading...,Minestrone: positive
458,Lobster bisque lives up to expectations. Kinda...,Lobster Bisque: neutral
459,"There are a lot of good food out there, but th...",Lobster Soup: positive


In [24]:
from collections import defaultdict

# Initialize a defaultdict to store the aggregated sentiments for each soup
soup_sentiment_counts = defaultdict(lambda: {'positive': 0, 'negative': 0, 'neutral': 0})

# Iterate over each row in the combined DataFrame
for index, row in combined_df.iterrows():
    # Extract the review text and the soups with sentiments from the current row
    review_text = row['review']
    soups_with_sentiments = row['soups_with_sentiments']
    
    # Split the soups with sentiments string into individual soup-sentiment pairs
    soup_sentiment_pairs = [pair.split(': ') for pair in soups_with_sentiments.split(', ')]
    
    # Iterate over each soup-sentiment pair
    for soup, sentiment in soup_sentiment_pairs:
        # Update the sentiment counts for the current soup
        soup_sentiment_counts[soup.strip()][sentiment] += 1

In [25]:
# Convert the defaultdict to a regular dictionary
soup_sentiment_counts = dict(soup_sentiment_counts)

In [26]:
for soup, counts in soup_sentiment_counts.items():
    print(f"Soup: {soup}")
    print(f"Positive: {counts['positive']}, Negative: {counts['negative']}, Neutral: {counts['neutral']}")
    print()

Soup: Chicken Bone Broth
Positive: 2, Negative: 0, Neutral: 0

Soup: Chicken Dumpling
Positive: 5, Negative: 1, Neutral: 0

Soup: Lobster Roll
Positive: 139, Negative: 7, Neutral: 2

Soup: Italian Wedding
Positive: 14, Negative: 2, Neutral: 0

Soup: Lobster Bisque
Positive: 240, Negative: 10, Neutral: 4

Soup: Chicken Tortilla
Positive: 14, Negative: 0, Neutral: 0

Soup: Lentil
Positive: 19, Negative: 0, Neutral: 0

Soup: Jambalaya
Positive: 61, Negative: 2, Neutral: 0

Soup: Potato Bacon
Positive: 6, Negative: 0, Neutral: 0

Soup: Tomato Bisque
Positive: 4, Negative: 0, Neutral: 1

Soup: Butternut Squash
Positive: 12, Negative: 3, Neutral: 0

Soup: Vegetarian Soup
Positive: 1, Negative: 0, Neutral: 1

Soup: Chicken Corn Chowder
Positive: 5, Negative: 0, Neutral: 0

Soup: New England Clam Chowder
Positive: 14, Negative: 0, Neutral: 0

Soup: Buffalo Chicken
Positive: 1, Negative: 0, Neutral: 0

Soup: Cuban Black Bean
Positive: 3, Negative: 0, Neutral: 0

Soup: Chicken Noodle
Positive: 1

In [28]:
sentiment_counts = pd.DataFrame(soup_sentiment_counts)

In [29]:
sentiment_counts

Unnamed: 0,Chicken Bone Broth,Chicken Dumpling,Lobster Roll,Italian Wedding,Lobster Bisque,Chicken Tortilla,Lentil,Jambalaya,Potato Bacon,Tomato Bisque,...,Mushroom Barley,Mushroom Soup,Chicken Gumbo,Garden Vegetable,Chicken Soup,Chicken Barley,Chicken Enchilada,Chicken Chili,Broccoli and Cheese,Tomato Corn Chowder
positive,2,5,139,14,240,14,19,61,6,4,...,2,1,7,1,0,3,3,4,4,1
negative,0,1,7,2,10,0,0,2,0,0,...,0,0,1,0,1,0,0,0,0,0
neutral,0,0,2,0,4,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [30]:
sentiment_counts = sentiment_counts.transpose()

In [31]:
sentiment_counts

Unnamed: 0,positive,negative,neutral
Chicken Bone Broth,2,0,0
Chicken Dumpling,5,1,0
Lobster Roll,139,7,2
Italian Wedding,14,2,0
Lobster Bisque,240,10,4
Chicken Tortilla,14,0,0
Lentil,19,0,0
Jambalaya,61,2,0
Potato Bacon,6,0,0
Tomato Bisque,4,0,1


In [35]:
sentiment_counts.to_csv('soup_sentiment_counts.csv',index_label='menu_item')