# Load Packages & Data

In [1]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from collections import Counter
from itertools import islice

In [2]:
ios = pd.read_csv("../data/ios_data.csv").drop("Unnamed: 0", axis = 1)
ios.head()

Unnamed: 0,review,title,rating,music_app,platform
0,This is by far the best music app I have ever ...,This is the best for me....,5,Amazon,iOS
1,I really like this app but I have tried an tri...,The refreshing is annoying,4,Amazon,iOS
2,"This app is great, i've been using it for a co...",Glitchy songs,4,Amazon,iOS
3,Not a bad music app. Selection is good could b...,Needs Garlic,3,Amazon,iOS
4,"This is one of the most used app on my phone, ...","Love the Alexa integration, Hate the constant ...",2,Amazon,iOS


In [3]:
android = pd.read_csv("../data/android_data.csv").drop("Unnamed: 0", axis = 1)
android.head()

Unnamed: 0,review,rating,music_app,platform
0,I love Amazon music app,5,Amazon,android
1,It moves between devices easily and offers a g...,5,Amazon,android
2,Like it all the music I love!,5,Amazon,android
3,"Amazon always delivers, great quality and endl...",5,Amazon,android
4,Love it!,5,Amazon,android


In [4]:
#split out by app and os so we can get a more in depth analysis
ios_amazon = ios[ios['music_app'] == 'Amazon']
ios_apple = ios[ios['music_app'] == 'Apple']
ios_spotify = ios[ios['music_app'] == 'Spotify']

android_amazon = android[android['music_app'] == 'Amazon']
android_apple = android[android['music_app'] == 'Apple']
android_spotify = android[android['music_app'] == 'Spotify']

# Text Proprocessing

In [5]:
def remove_malformed_chars(dataframes, col):
    for dataset in dataframes:
        dataset[col] = dataset[col].str.replace(r"([^A-Za-z1-9 ])", " ")
    return dataset[col]

In [6]:
remove_malformed_chars([ios_amazon, ios_apple, ios_spotify, android_amazon, android_apple, android_spotify], 'review')

  dataset[col] = dataset[col].str.replace(r"([^A-Za-z1-9 ])", " ")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset[col] = dataset[col].str.replace(r"([^A-Za-z1-9 ])", " ")


31198    Best application for music   This application ...
31199    This is a good app if you have premium however...
31200    I found all the songs i was looking for and so...
31201                                         Help Thank u
31202    Used to have lyrics  then they didn t  now the...
                               ...                        
51392                                                     
51393                                   I hate only ads XD
51394    I can t search and play some songs other than ...
51395                                      Just works well
51396                                              Awesome
Name: review, Length: 20199, dtype: object

In [7]:
remove_malformed_chars([ios_amazon, ios_apple, ios_spotify], 'title')

  dataset[col] = dataset[col].str.replace(r"([^A-Za-z1-9 ])", " ")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset[col] = dataset[col].str.replace(r"([^A-Za-z1-9 ])", " ")


10654                                      Beat app but   
10655                                    It keeps stopping
10656    bluetooth wired connection to car  library format
10657                 Calm down with the long commercials 
10658             not bad music source but could be better
                               ...                        
20649                                         Good  but   
20650                             It needs a little fixing
20651                               ADDS ARE SO REPETITIVE
20652                                  3  second community
20653                             Constant crashing   bugs
Name: title, Length: 10000, dtype: object

In [8]:
#determine stopwords beyond default list
def stopword_update_analysis(dataset, column):
    stopword_list=list(stopwords.words('english'))
    
    #make temp list of lowercase words since this will be done while stemming so I want the most accurate list
    data_lower = pd.Series(dataset[column]).str.lower()
    data_lower = data_lower.dropna()
    
    words = Counter()
    for line in data_lower:
        tokens = nltk.word_tokenize(line)
        for token in tokens:
                words[token] += 1

    words = words.most_common()
    top50full = list(islice(words, 50))
    top50 = []
    for topword in top50full:
        top50.append(topword[0])
        
    nonstopwords = []
    for topword in top50:
        if topword not in stopword_list:
            nonstopwords.append(topword)
    return nonstopwords

In [9]:
stopword_update_analysis(ios_apple, 'title')

['music',
 'apple',
 'app',
 'good',
 'great',
 'songs',
 'needs',
 'update',
 'terrible',
 'ios',
 '15',
 'better',
 'playing',
 'service',
 'spotify',
 'love',
 'please',
 'ui',
 'play',
 'song',
 'playlist',
 'request',
 'bad',
 'could',
 'best',
 'need',
 'buggy']

In [10]:
stopword_update_analysis(ios_apple, 'review')

['music',
 'apple',
 'app',
 'songs',
 'like',
 'song',
 'spotify',
 'play',
 'playlist',
 'library',
 'playlists',
 'time']

In [11]:
stopword_update_analysis(android_apple, 'review')

['app',
 'music',
 'apple',
 'songs',
 'play',
 'good',
 'android',
 'song',
 'great',
 'update',
 'even',
 'like',
 'love',
 'library',
 'download']

In [12]:
stopword_update_analysis(ios_spotify, 'title')

['app',
 'great',
 'good',
 'love',
 'music',
 'update',
 'songs',
 'new',
 'ads',
 'one',
 'best',
 'amazing',
 'please',
 'premium',
 'fix',
 'problem',
 'needs',
 'like',
 'many',
 'better',
 'playlist',
 'shuffle',
 'song',
 'issues']

In [13]:
stopword_update_analysis(ios_spotify, 'review')

['app',
 'songs',
 'music',
 'song',
 'like',
 'playlist',
 'listen',
 'play',
 'want',
 'get',
 'love',
 'would',
 'premium',
 'one']

In [14]:
stopword_update_analysis(android_spotify, 'review')

['app',
 'music',
 'good',
 'spotify',
 'songs',
 'love',
 'song',
 'ads',
 'great',
 'like',
 'best',
 'play',
 'listen',
 'premium',
 'even',
 'nice',
 'use']

In [15]:
stopword_update_analysis(ios_amazon, 'title')

['music',
 'app',
 'great',
 'good',
 'love',
 'better',
 'selection',
 'best',
 'songs',
 'service',
 'prime',
 'could',
 'awesome',
 'use',
 'needs',
 'amazing',
 'like',
 'pretty',
 'easy',
 'unlimited',
 'one']

In [16]:
stopword_update_analysis(ios_amazon, 'title')

['music',
 'app',
 'great',
 'good',
 'love',
 'better',
 'selection',
 'best',
 'songs',
 'service',
 'prime',
 'could',
 'awesome',
 'use',
 'needs',
 'amazing',
 'like',
 'pretty',
 'easy',
 'unlimited',
 'one']

In [17]:
stopword_update_analysis(ios_amazon, 'review')

['music',
 'app',
 'songs',
 'song',
 'like',
 'love',
 'listen',
 'great',
 'play',
 'would',
 'use',
 'prime']

In [18]:
stopword_update_analysis(android_amazon, 'review')

['music',
 'app',
 'songs',
 'amazon',
 'good',
 'great',
 'play',
 'love',
 'song',
 'like',
 'use',
 'even',
 'update']

In [19]:
stopword_list = stopwords.words('english')
stopword_list.extend(['music', 'songs', 'song', 'like', 'app', 'application', 'great','good','even', 'get','use', 'please', 'would', 'apple', 'ios', 'android', 'spotify', 'amazon'])

# Word Frequency Analysis

## Define Functions

In [20]:
def stemmer(dataset, col):
    stemmer = PorterStemmer()
    cleaned_reviews = []
    
    data = dataset[col]
      
    for review in data:
        if review is not None:
            words = nltk.word_tokenize(review)
            new_words = []
            for word in words:
                if word in stopword_list:
                    continue
                else: 
                    word=stemmer.stem(word)
                    new_words.append(word)
            cleaned_review = " ".join(new_words)
            cleaned_reviews.append((cleaned_review))
    return cleaned_reviews

In [21]:
def vectorizer(ngramrange, dataset, col, min_df= .5):
    vectorizer = TfidfVectorizer(ngram_range=ngramrange, stop_words=stopword_list)
    X = vectorizer.fit_transform(dataset[col])
    terms = vectorizer.get_feature_names()
    tf_idf1 = pd.DataFrame(X.toarray().transpose(), index=terms)

    tf_idf1 = tf_idf1.sum(axis=1)
    scores = pd.DataFrame(tf_idf1, columns=["score"]).sort_values('score',ascending = False)
    return scores

In [22]:
#separate negative reviews
#negative reviews
ios_amazon_negative = ios_amazon[ios_amazon['rating'] <= 2]
ios_apple_negative = ios_apple[ios_apple['rating'] <= 2]
ios_spotify_negative = ios_spotify[ios_spotify['rating'] <= 2]

android_amazon_negative = android_amazon[android_amazon['rating'] <= 2]
android_apple_negative = android_apple[android_apple['rating'] <= 2]
android_spotify_negative = android_spotify[android_spotify['rating'] <= 2]

In [23]:
#seperate positive reviews
ios_amazon_positive = ios_amazon[ios_amazon['rating'] >= 4]
ios_apple_positive = ios_apple[ios_apple['rating'] >= 4]
ios_spotify_positive = ios_spotify[ios_spotify['rating'] >= 4]

android_amazon_positive = android_amazon[android_amazon['rating'] >= 4]
android_apple_positive = android_apple[android_apple['rating'] >= 4]
android_spotify_positive = android_spotify[android_spotify['rating'] >= 4]

## Apple Music Analysis

### Negative Reviews

In [24]:
stemmer(ios_apple_negative, 'title')
vectorizer((3,3),ios_apple_negative, 'title').head(15)



Unnamed: 0,score
worst ux player,1.0
poor playlist ability,1.0
offline playback broken,1.0
deleted years ago,1.0
performance animation unstable,1.0
decent service bad,1.0
plagued perfomance issues,1.0
play without permission,1.0
constant requested time,1.0
poorest ever seen,1.0


In [25]:
stemmer(ios_apple_negative, 'review')
vectorizer((3,3),ios_apple_negative, 'review').head(15)



Unnamed: 0,score
keeps crashing try,1.158603
able share playlists,1.0
glitches starts pauses,1.0
keeps downloading playlists,1.0
skips way many,1.0
slow loading time,1.0
stop playing randomly,1.0
stoped working fix,1.0
best upgrade crappy,1.0
fix paying customer,1.0


In [42]:
stemmer(android_apple_negative, 'review')
vectorizer((3,3),android_apple_negative, 'review').head(15)



Unnamed: 0,score
since last update,15.631611
something went wrong,8.029838
listen already purchased,7.870835
listen already paid,7.837425
pay listen already,7.446306
add new library,6.95943
crashes every time,6.35715
every time try,5.399294
since new update,5.352848
new update sucks,5.296272


Some recurring complaints about Apple Music's platform include:
- glitches and bugs making the app slow, crash, or have poor performance overall 
- updates that make the platform performance worse
- issues with playlists

### Positive Reviews

In [27]:
stemmer(ios_apple_positive, 'title')
vectorizer((3,3),ios_apple_positive, 'title').head(15)



Unnamed: 0,score
best streaming service,1.096668
service less ui,1.0
small problem read,1.0
joe ridley jr,1.0
needs preference options,1.0
player could improvements,1.0
one thing ok,1.0
gotta say something,1.0
gets job done,1.0
mines little bugged,1.0


In [28]:
stemmer(ios_apple_positive, 'review')
vectorizer((3,3),ios_apple_positive, 'review').head(15)



Unnamed: 0,score
fix account work,1.0
stream yung kyra,1.0
still favorite dsp,1.0
best ever mean,1.0
best iphone period,1.0
best listen ads,1.0
forever lil tjay,1.0
audio dolby atmos,0.921791
spatial audio dolby,0.921791
best streaming service,0.800598


In [29]:
stemmer(android_apple_positive, 'review')
vectorizer((3,3),android_apple_positive, 'review').head(15)



Unnamed: 0,score
love love love,11.392583
best ever used,6.40893
love new update,5.741727
add sleep timer,5.719239
best streaming service,4.410998
one best apps,3.687554
works really well,3.545535
awesome sound quality,3.262561
love able listen,3.081286
share instagram story,2.848665


Some recurring positive feedback about Apple Music's platform is:
- No Ads
- Spatial audio (dolby atmos)/sound quality
- iPhone homescreen widget capabilities

## Amazon Music Analysis

### Negative Reviews 

In [30]:
stemmer(ios_amazon_negative, 'title')
vectorizer((3,3),ios_amazon_negative, 'title').head(15)



Unnamed: 0,score
new update horrible,2.670533
worse every update,2.12132
gets worse every,2.12132
shuffle work well,2.0
could much better,2.0
crashes every time,1.033538
poor user interface,1.033538
iphone 12 pro,1.002509
12 pro max,1.002509
new format terrible,1.0


In [31]:
stemmer(ios_amazon_negative, 'review')
vectorizer((3,3),ios_amazon_negative, 'review').head(15)



Unnamed: 0,score
every single time,1.404192
since last update,1.297942
every time try,1.253835
already paying prime,1.103422
already pay prime,1.046805
every time open,1.011318
switch offline mode,0.950956
almost every time,0.879442
matter many times,0.856165
content longer available,0.844514


In [32]:
stemmer(android_amazon_negative, 'review')
vectorizer((3,3),android_amazon_negative, 'review').head(15)



Unnamed: 0,score
since last update,8.057387
every time open,3.269406
every time try,2.926727
stops playing time,2.671066
every single time,2.560456
since new update,2.337146
takes forever load,2.216512
crashes every time,2.179308
doesnt work anymore,2.156184
since recent update,2.087647


Some recurring negative feedback about Amazon Music's platform is:
- Poor updates
- Bad user interface
- Platform can be slow, gitchy, or crash
- Offline mode doesn't work as expected

### Positive Reviews

In [33]:
stemmer(ios_amazon_positive, 'title')
vectorizer((3,3),ios_amazon_positive, 'title').head(15)



Unnamed: 0,score
love love love,15.477768
best streaming service,7.284349
worth every penny,6.440742
much better itunes,6.295689
much better pandora,5.654145
best thing ever,4.322506
one favorite apps,4.0
listen every day,4.0
except one thing,3.730607
best service ever,3.0


In [34]:
stemmer(ios_amazon_positive, 'review')
vectorizer((3,3),ios_amazon_positive, 'review').head(15)



Unnamed: 0,score
included prime membership,8.22668
download listen offline,5.268644
already paying prime,5.032458
give five stars,4.908589
ask alexa play,4.353552
love able listen,4.346606
already pay prime,4.241386
without using data,4.161625
one device time,3.950251
make playlists listen,3.231802


In [35]:
stemmer(android_amazon_positive, 'review')
vectorizer((3,3),android_amazon_positive, 'review').head(15)



Unnamed: 0,score
love love love,6.320855
hd ultra hd,3.561663
easy find looking,2.678303
excellent sound quality,2.673547
superb sound quality,2.546609
way better youtube,2.546609
high quality sound,2.356607
best ever used,2.164332
love every day,2.154104
easy create playlists,2.030959


Some recurring positive feedback about Amazon Music's platform is:
- included in prime membership
- office capability is nice to have
- good sound quality
- works with alexa

## Spotify Analysis

### Negative Reviews

In [36]:
stemmer(ios_spotify_negative, 'title')
vectorizer((3,3),ios_spotify_negative, 'title').head(15)



Unnamed: 0,score
new update ruined,6.5022
new update trash,5.0
keeps getting worse,4.0
new update awful,4.0
latest update terrible,3.0
needs lot work,2.673303
way many ads,2.673303
minutes ad free,2.058437
hate new update,2.0
absolutely hate update,2.0


In [37]:
stemmer(ios_spotify_negative, 'review')
vectorizer((3,3),ios_spotify_negative, 'review').head(15)



Unnamed: 0,score
minutes ad free,6.517079
ad free listening,2.336064
skips per hour,2.294834
watch short video,2.008735
every single time,1.784305
min ad free,1.77502
watch video minutes,1.763378
minutes add free,1.743575
minutes uninterrupted listening,1.621853
choose want listen,1.612155


In [38]:
stemmer(android_spotify_negative, 'review')
vectorizer((3,3),android_spotify_negative, 'review').head(15)



Unnamed: 0,score
ads ads ads,6.934288
something went wrong,5.469116
much ads irritating,5.0
way many ads,4.207357
minutes ad free,4.136071
bring back car,3.989147
since last update,3.857175
back car view,3.507699
removed car view,3.369336
tooo many ads,3.07064


Some recurring negative feedback about Spotify's platform is:
- Poor updates
- Ads
- Not being able to choose what to listen to
- No car view/mode

### Positive Reviews

In [39]:
stemmer(ios_spotify_positive, 'title')
vectorizer((3,3),ios_spotify_positive, 'title').head(15)



Unnamed: 0,score
love one thing,8.271063
way many ads,6.455955
hate new update,6.145164
love one problem,6.0
love love love,5.0
except one thing,4.188332
best streaming service,4.158801
best thing ever,4.0
new update horrible,4.0
one small problem,4.0


In [40]:
stemmer(ios_spotify_positive, 'review')
vectorizer((3,3),ios_spotify_positive, 'review').head(20)



Unnamed: 0,score
minutes ad free,12.182929
skips per hour,9.639739
minutes uninterrupted listening,4.853266
watch video minutes,4.240508
ad free listening,4.051657
every time try,3.935418
watch short video,3.737825
one best apps,3.67005
give five stars,3.429797
want listen playlist,3.186195


In [41]:
stemmer(android_spotify_positive, 'review')
vectorizer((3,3),android_spotify_positive, 'review').head(15)



Unnamed: 0,score
best ever used,10.53425
finally added lyrics,8.25068
love love love,7.621479
best experience ever,7.0
new lyrics feature,6.601673
best streaming service,6.393591
best streaming platform,6.025654
one best apps,5.785165
best streaming ever,5.726398
love new update,4.868738


Some recurring positive feedback about Spotify's platform is:
- Allowed to skip a set number of songs per hour
- Lyric viewing capabilities
- Sound quality