In [1]:
# Set up
import os
import csv
import string

from pathlib import Path
from nltk.tokenize import word_tokenize

from textblob import TextBlob

In [7]:
# Organize text by cuisine
current_directory = Path.cwd()
text_folder_path = current_directory / "text"

cuisine_comments = {}

for filename in os.listdir(text_folder_path):
    file_path = os.path.join(text_folder_path, filename)

    cuisine_name = filename.split('_')[0]

    with open(file_path, 'r', encoding='utf-8') as file:
        comments = file.read()
        
    comments = comments.lower()
    comments = comments.translate(str.maketrans("", "", string.punctuation))
    comments = comments.strip()

    # Add comments to the dictionary for the corresponding cuisine
    if cuisine_name in cuisine_comments:
        cuisine_comments[cuisine_name]["Comments"] += comments + " "
    else:
        cuisine_comments[cuisine_name] = {"Comments": comments + " "}
    

In [17]:
# Create dictionary of subjectivity and polarity
# Citations: https://towardsdatascience.com/text-analysis-basics-in-python-443282942ec5
def create_subject_polar(cuisine_comments):
    sentiment_analysis = {}

    # Iterate through each cuisine and its comments
    for cuisine_name, data in cuisine_comments.items():
        comments = data["Comments"]

        blob = TextBlob(comments)

        # Calculate average subjectivity and polarity
        subjectivity = blob.sentiment.subjectivity
        polarity = blob.sentiment.polarity

        # Add sentiment analysis to the new dictionary
        sentiment_analysis[cuisine_name] = {"Subjectivity": round(subjectivity, 5),
                                            "Polarity": round(polarity, 5)}
    
    return sentiment_analysis

sentiment_analysis = create_subject_polar(cuisine_comments)

In [21]:
# Find min and max subjectivity score
max_subject_cuisine = max(sentiment_analysis, key=lambda k: sentiment_analysis[k]["Subjectivity"])
min_subject_cuisine = min(sentiment_analysis, key=lambda k: sentiment_analysis[k]["Subjectivity"])

max_subject_score = sentiment_analysis[max_subject_cuisine]["Subjectivity"]
min_subject_score = sentiment_analysis[min_subject_cuisine]["Subjectivity"]

print(f"{max_subject_cuisine}: {max_subject_score}",
      f"{min_subject_cuisine}: {min_subject_score}")

# Find min and max polarity score
max_polar_cuisine = max(sentiment_analysis, key=lambda k: sentiment_analysis[k]["Polarity"])
min_polar_cuisine = min(sentiment_analysis, key=lambda k: sentiment_analysis[k]["Polarity"])

max_polar_score = sentiment_analysis[max_polar_cuisine]["Polarity"]
min_polar_score = sentiment_analysis[min_polar_cuisine]["Polarity"]

print(f"{max_polar_cuisine}: {max_polar_score}",
      f"{min_polar_cuisine}: {min_polar_score}")

Belgian: 0.57889 Colombian: 0.49665
Argentinian: 0.27126 Indonesian: 0.17954


In [16]:
# Write it to csv file
csv_file_path = current_directory / "sentiment_analysis.csv"

with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
    fieldnames = ['Cuisine', 'Subjectivity', 'Polarity']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    writer.writeheader()

    for cuisine_name, data in sentiment_analysis.items():
        writer.writerow({'Cuisine': cuisine_name,
                         'Subjectivity': data['Subjectivity'],
                         'Polarity': data['Polarity']})