In [12]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from vader import Vader
from normalization import Normalizer
from reviewCleaner import cleaner
sns.set_theme()
vader = Vader()


In [13]:
brands = {
    'apple': 0,
    'google': 0,
    'huawei': 0,
    'motorola': 0,
    'nokia': 0,
    'samsung': 0,
    'sony': 0,
    'xiaomi': 0,
}

aspects = [
    'phone',
    'screen',
    'battery',
    'camera',
    'charger',
    'charge',
    'service',
    'product',
    'device',
    'experience',
    'price',
    'sound',
]

rulings = [
    'favorability',
    'strength',
    'uniqueness',
    'obim'
]


In [None]:
for brand in brands:
    all_reviews = pd.read_csv(f'./assets/reviews/{brand}.csv')['Review']
    normalizer = Normalizer(brand, aspects)
    normalizer.normalize()
    df = pd.DataFrame(index=rulings)

    fig, ax = plt.subplots(figsize=(10, 10), subplot_kw={'projection': '3d'})

    for aspect in aspects:
        reviews = all_reviews[all_reviews.apply(lambda r: aspect in r)]
        favorability = reviews.apply(
            lambda r: (vader.sentiment_analysis(r)[-1] + 1) / 2).mean()
        strength = normalizer.strength[aspect]
        uniqueness = normalizer.uniqueness[aspect]
        obim = favorability * strength * uniqueness
        df[aspect] = [favorability, strength, uniqueness, obim]

        ax.scatter(favorability, uniqueness, strength, label=aspect)

    ax.set_title(brand, fontsize=20)

    ax.set_xlabel('Favorability', fontsize=16)
    ax.set_ylabel('Strength', fontsize=16)
    ax.set_zlabel('Uniqueness', fontsize=16)

    ax.locator_params(nbins=6)
    ax.legend(fontsize=14)
    fig.savefig(brand, dpi=600)
    
    df.transpose().to_csv(f'./{brand}_info.csv')


In [16]:
def count_ratings(ratings):
    rating_count = {}
    for rating in ratings:
        rating_count.update({rating: rating_count.get(rating, 0) + 1})

    return dict(sorted((rating_count.items())))

def count_sentiments(reviews):
    polarity = {
        'negative': 0,
        'neutral': 0,
        'positive': 0
    }
    for review in reviews:
        overall = vader.sentiment_analysis(review)[-2]
        polarity.update({overall: polarity.get(overall, 0) + 1})
    
    return polarity


In [28]:
result = pd.DataFrame()

for brand in brands:
    df = pd.read_csv(f'./assets/reviews/{brand}.csv')
    all_reviews = df['Review'].map(lambda r: r.lower().strip())
    ratings = df['Rating']

    result[brand] = pd.Series(
        {**count_ratings(ratings), **count_sentiments(all_reviews)})

result.transpose().to_csv('distribution_results.csv')


In [None]:
for brand in brands:
    print(f'Analyzing {brand}...')
    df = pd.read_csv(f'./assets/reviews/{brand}.csv')
    all_reviews = df['Review'].map(lambda r: r.lower().strip())
    ratings = df['Rating']

    # * 1st plot - ratings distribution
    fig, ax = plt.subplots(2, 2, figsize=(20, 20))
    fig.suptitle(brand, fontsize=26)

    rc = count_ratings(ratings)
    ax[0, 0].bar(rc.keys(), rc.values())
    ax[0, 0].set_title('Rating Distribution', fontsize=16)
    ax[0, 0].set_xlabel('ratings', fontsize=16)
    ax[0, 0].set_ylabel('count', fontsize=16)

    # * 2nd plot - favorability of aspects
    f = {}
    for aspect in aspects:
        reviews = all_reviews[all_reviews.apply(lambda r: aspect in r)]
        favorability = reviews.apply(
            lambda r: (vader.sentiment_analysis(r)[-1] + 1) / 2).mean()
        f.update({aspect: favorability})

    ax[0, 1].bar(f.keys(), f.values())
    ax[0, 1].set_title('Favorability of Aspects', fontsize=16)
    ax[0, 1].set_xlabel('aspects', fontsize=16)
    ax[0, 1].set_ylabel('compound', fontsize=16)

    # * 3rd plot - sentiment distribution
    polarity = {
        'negative': 0,
        'neutral': 0,
        'positive': 0
    }
    for review in all_reviews:
        overall = vader.sentiment_analysis(review)[-2]
        polarity.update({overall: polarity.get(overall, 0) + 1})

    ax[1, 0].bar(polarity.keys(), polarity.values())
    ax[1, 0].set_title('Sentiment Distribution', fontsize=16)
    ax[1, 0].set_xlabel('polarity', fontsize=16)
    ax[1, 0].set_ylabel('count', fontsize=16)

    # * 4th plot - Sentiment Polarity vs. Ratings Polarity
    r = [0, 0, 0]
    r[0] = rc.get(1) + rc.get(2)
    r[1] = rc.get(3)
    r[2] = rc.get(4) + rc.get(5)

    width = 0.25
    ticks = polarity.keys()
    b1 = np.arange(3)
    b2 = [width + x for x in b1]
    ax[1, 1].bar(b1, polarity.values(), width, label="# of Sentiments")
    ax[1, 1].bar(b2, r, width, label="# of Ratings")
    ax[1, 1].set_xticks([(width / 2) + r for r in range(3)], polarity.keys())
    ax[1, 1].legend()
    ax[1, 1].set_title('Sentiment Polarity vs. Ratings Polarity', fontsize=16)
    ax[1, 1].set_xlabel('polarity', fontsize=16)
    ax[1, 1].set_ylabel('count', fontsize=16)

    fig.savefig(brand, dpi=600)
