# Starting from the Google Play Store dataset

In [1]:
# Caricamento librerie d'interesse
import pandas as pd
import numpy as np
import re
from collections import Counter

In [2]:
#Lettura dataset googleplaystore
playstore = pd.read_csv('googleplaystore.csv')
playstore.head(2)

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up


In [3]:
#Lettura Last Updated
date_re = re.compile('(?P<month>\w+)\s(?P<day>\d+)\,\s(?P<year>\d+)')
month_diz = {'January': 1,'February':2,'March':3,'April':4,'May':5,'June':6,'July':7,'August':8,'September':9,'October':10,
         'November':11,'December':12}

def parse_date(data):
    dm = date_re.match(data)
    if dm:
        day = int(dm.group('day'))
        if (day < 10):
            day = '0' + str(day)   
        month = month_diz[dm.group('month')]   
        year = dm.group('year')
        return (str(day)+str(month)+str(year))
    else:
        return(np.nan)
    
playstore['Last_updated_parsed'] = pd.to_datetime(playstore['Last Updated'].apply(parse_date),format='%d%m%Y')
playstore[['Last Updated','Last_updated_parsed']].head()

Unnamed: 0,Last Updated,Last_updated_parsed
0,"January 7, 2018",2018-01-07
1,"January 15, 2018",2018-01-15
2,"August 1, 2018",2018-08-01
3,"June 8, 2018",2018-06-08
4,"June 20, 2018",2018-06-20


In [4]:
#Lettura dataset googleplaystore_user_reviews.csv
playstore_review = pd.read_csv('googleplaystore_user_reviews.csv')
playstore_review.head(2)

Unnamed: 0,App,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.0,0.533333
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.25,0.288462


# Convert the app sizes to a number

In [5]:
#Ispezioni possibili moltiplicatori
print(playstore['Size'].isnull().any()) # No missing

def moltiplicatori_size(size):
    if (re.search('[a-zA-Z]', size)):
        size = re.sub('[\d\W]', '', size) #elimino tutto tranne lettere
    return(size)

list_molt = list(playstore['Size'].apply(moltiplicatori_size))

print(Counter(list_molt))
#Moltiplicatori sono k e M il '' si ha perchè un valore è pari a 1,000+ (niente moltiplicatore quindi)

playstore[playstore['Size'] == '1,000+'] # Valori traslati di una variabile -->INDEX = 10472

False
Counter({'M': 8829, 'Varieswithdevice': 1695, 'k': 316, '1,000+': 1})


Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Last_updated_parsed
10472,Life Made WI-Fi Touchscreen Photo Frame,1.9,19.0,3.0M,"1,000+",Free,0,Everyone,,"February 11, 2018",1.0.19,4.0 and up,,NaT


In [6]:
re_size = re.compile('^(?P<num>\d+\.?\d*)(?P<moltiplicatore>\w*)$')

def size_tonumber(app_size):
    r = re_size.match(app_size)
    if r:
        molt = r.group('moltiplicatore')
        if (molt == 'M'):
            molt = 1000000
        elif (molt == 'k'):
            molt = 1000
        app_size = float(r.group('num'))*molt
    return(app_size)

playstore['Size_num'] = playstore['Size'].apply(size_tonumber)
playstore[['Size','Size_num']].head()

Unnamed: 0,Size,Size_num
0,19M,19000000.0
1,14M,14000000.0
2,8.7M,8700000.0
3,25M,25000000.0
4,2.8M,2800000.0


# Convert the number of installs to a number

In [7]:
#Ispezione possibili valori attributo Installs
print(playstore['Installs'].isnull().any()) # No missing
print(playstore.groupby('Installs').size())

#presenza modalità 'Free'
playstore[playstore['Installs'] == 'Free'] # Valori traslati di una variabile, stesso record sopra

False
Installs
0                    1
0+                  14
1+                  67
1,000+             907
1,000,000+        1579
1,000,000,000+      58
10+                386
10,000+           1054
10,000,000+       1252
100+               719
100,000+          1169
100,000,000+       409
5+                  82
5,000+             477
5,000,000+         752
50+                205
50,000+            479
50,000,000+        289
500+               330
500,000+           539
500,000,000+        72
Free                 1
dtype: int64


Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Last_updated_parsed,Size_num
10472,Life Made WI-Fi Touchscreen Photo Frame,1.9,19.0,3.0M,"1,000+",Free,0,Everyone,,"February 11, 2018",1.0.19,4.0 and up,,NaT,"1,000+"


In [8]:
def installs_tonumber(serie):
    if (re.search('\d',serie)):
        serie = int(re.sub('\D', '', serie)) #elimino tutti i caratteri esclusi i numerici
    return(serie)
        
playstore['Installs_num'] = playstore['Installs'].apply(installs_tonumber)
playstore[['Installs','Installs_num']].head()

Unnamed: 0,Installs,Installs_num
0,"10,000+",10000
1,"500,000+",500000
2,"5,000,000+",5000000
3,"50,000,000+",50000000
4,"100,000+",100000


# Transform “Varies with device” into a missing value

In [9]:
def missing(valore):
    if (valore == 'Varies with device'):
        valore = np.nan
    return(valore)

variabili = list(playstore.columns)
for variabile in variabili:
    playstore[variabile] = playstore[variabile].apply(missing)
    #print('Numero di missing value nella variabile ' + str(variabile) + ': ' + str(playstore[variabile].isnull().sum()))

# Convert Current Ver and Android Ver into a dotted number (e.g. 4.0.3 or 4.2)

In [10]:
#ispezione Android Ver
print(playstore['Android Ver'].isnull().any()) # Si missing

playstore.groupby('Android Ver').size()
#La presenza del trattino indica due versioni, l'assenza indica una sola versione a cui segue una stringa

True


Android Ver
1.0 and up          2
1.5 and up         20
1.6 and up        116
2.0 and up         32
2.0.1 and up        7
2.1 and up        134
2.2 - 7.1.1         1
2.2 and up        244
2.3 and up        652
2.3.3 and up      281
3.0 and up        241
3.1 and up         10
3.2 and up         36
4.0 and up       1375
4.0.3 - 7.1.1       2
4.0.3 and up     1501
4.1 - 7.1.1         1
4.1 and up       2451
4.2 and up        394
4.3 and up        243
4.4 and up        980
4.4W and up        12
5.0 - 6.0           1
5.0 - 7.1.1         1
5.0 - 8.0           2
5.0 and up        601
5.1 and up         24
6.0 and up         60
7.0 - 7.1.1         1
7.0 and up         42
7.1 and up          3
8.0 and up          6
dtype: int64

In [11]:
vers_re = re.compile('^(?P<vers>\d\.\d\.{0,1}\d{0,1})')

def version(ver):
    if ver is not np.nan:
        temp = ver.split('-')
        if len(temp)>1: #In caso di trattino e quindi due versioni
            ver = str(temp[1].strip()) #Rimuovo spazi iniziali e finali del secondo elemento della lista
        else: 
            ver = vers_re.match(ver).group('vers')
    return(ver)

playstore['Android_Ver_Clean'] = playstore['Android Ver'].apply(version)
print(playstore.groupby('Android_Ver_Clean').size())
playstore[['Android Ver','Android_Ver_Clean']].head()

Android_Ver_Clean
1.0         2
1.5        20
1.6       116
2.0        32
2.0.1       7
2.1       134
2.2       244
2.3       652
2.3.3     281
3.0       241
3.1        10
3.2        36
4.0      1375
4.0.3    1501
4.1      2451
4.2       394
4.3       243
4.4       992
5.0       601
5.1        24
6.0        61
7.0        42
7.1         3
7.1.1       6
8.0         8
dtype: int64


Unnamed: 0,Android Ver,Android_Ver_Clean
0,4.0.3 and up,4.0.3
1,4.0.3 and up,4.0.3
2,4.0.3 and up,4.0.3
3,4.2 and up,4.2
4,4.4 and up,4.4


In [12]:
print(playstore['Current Ver'].isnull().any()) # Si missing
playstore.groupby('Current Ver').size()

True


Current Ver
0.0.0.2                      1
0.0.1                       15
0.0.10                       1
0.0.2                        4
0.0.3                        2
0.0.4                        3
0.0.42                       1
0.0.44                       1
0.0.5                        3
0.0.52                       1
0.0.53                       1
0.0.6                        1
0.0.7                        3
0.0.73                       1
0.0.80                       1
0.0.9                        5
0.1                         13
0.1.0                        2
0.1.1                        3
0.1.10.0                     1
0.1.100944346                2
0.1.11                       1
0.1.187945513                1
0.1.2                        1
0.1.219                      1
0.1.22                       1
0.1.27                       1
0.1.5                        1
0.1.6                        1
0.1.7                        1
                            ..
v150                       

In [13]:
def current_ver_clean(ver):
    if ver is not np.nan:
        ver = re.sub('[^\d\.]', '', ver) #elimino tutte le lettere e i caratteri che non sono punti
        if (len(ver)>1 and ver[0] == '.'): #rimuovere il punto iniziale se presente
             ver = ver[1:]
        if ((ver == '') or (ver == '.')): #tali casi si presentano quando il valore è una parola
            return(np.nan)              
    return(ver)

playstore['Current_Ver_Clean'] = playstore['Current Ver'].apply(current_ver_clean)

print(playstore.groupby('Current_Ver_Clean').size())
playstore[['Current Ver','Current_Ver_Clean']].head()

Current_Ver_Clean
0.0.0.2              1
0.0.1               15
0.0.10               1
0.0.2                4
0.0.3                2
0.0.4                3
0.0.42               1
0.0.44               1
0.0.5                3
0.0.52               1
0.0.53               1
0.0.6                1
0.0.7                3
0.0.73               1
0.0.80               1
0.0.9                5
0.1                 13
0.1.0                2
0.1.1                3
0.1.10.0             1
0.1.100944346        2
0.1.11               1
0.1.187945513        1
0.1.2                1
0.1.219              1
0.1.22               1
0.1.27               1
0.1.5                1
0.1.6                1
0.1.7                1
                    ..
9.2.4                1
9.3.1                3
9.3.3                1
9.3.52               1
9.3.5776             1
9.3.6                1
9.4.0                1
9.4.1.3              1
9.4.2                1
9.4.7.6              1
9.40.3               2
9.5.0           

Unnamed: 0,Current Ver,Current_Ver_Clean
0,1.0.0,1.0.0
1,2.0.0,2.0.0
2,1.2.4,1.2.4
3,,
4,1.1,1.1


# Remove the duplicates

In [14]:
#Da questo punto in poi conviene eliminare il record numero 10472 in quanto traslato
playstore = playstore.drop(10472)
#Per ogni app, viene/vengono selezionato/i i record che presentano meno recensioni in quanto meno recenti
playstore['Reviews'] = playstore['Reviews'].astype(int)
duplicati = playstore.sort_values(by=['App','Reviews'],ascending=True)
duplicati_cut = duplicati[duplicati.duplicated(subset=['App'], keep='last')]
duplicati_cut

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Last_updated_parsed,Size_num,Installs_num,Android_Ver_Clean,Current_Ver_Clean
1393,10 Best Foods for You,HEALTH_AND_FITNESS,4.0,2490,3.8M,"500,000+",Free,0,Everyone 10+,Health & Fitness,"February 17, 2017",1.9,2.3.3 and up,2017-02-17,3.8e+06,500000,2.3.3,1.9
2322,1800 Contacts - Lens Store,MEDICAL,4.7,23160,26M,"1,000,000+",Free,0,Everyone,Medical,"July 27, 2018",7.4.1,5.0 and up,2018-07-27,2.6e+07,1000000,5.0,7.4.1
2256,2017 EMRA Antibiotic Guide,MEDICAL,4.4,12,3.8M,"1,000+",Paid,$16.99,Everyone,Medical,"January 27, 2017",1.0.5,4.0.3 and up,2017-01-27,3.8e+06,1000,4.0.3,1.0.5
1337,21-Day Meditation Experience,HEALTH_AND_FITNESS,4.4,11506,15M,"100,000+",Free,0,Everyone,Health & Fitness,"August 2, 2018",3.0.0,4.1 and up,2018-08-02,1.5e+07,100000,4.1,3.0.0
5415,365Scores - Live Scores,SPORTS,4.6,666246,25M,"10,000,000+",Free,0,Everyone,Sports,"July 29, 2018",5.5.9,4.1 and up,2018-07-29,2.5e+07,10000000,4.1,5.5.9
2522,420 BZ Budeze Delivery,MEDICAL,5.0,2,11M,100+,Free,0,Mature 17+,Medical,"June 6, 2018",1.0.1,4.1 and up,2018-06-06,1.1e+07,100,4.1,1.0.1
3953,8 Ball Pool,SPORTS,4.5,14184910,52M,"100,000,000+",Free,0,Everyone,Sports,"July 31, 2018",4.0.0,4.0.3 and up,2018-07-31,5.2e+07,100000000,4.0.3,4.0.0
1675,8 Ball Pool,GAME,4.5,14198297,52M,"100,000,000+",Free,0,Everyone,Sports,"July 31, 2018",4.0.0,4.0.3 and up,2018-07-31,5.2e+07,100000000,4.0.3,4.0.0
1703,8 Ball Pool,GAME,4.5,14198602,52M,"100,000,000+",Free,0,Everyone,Sports,"July 31, 2018",4.0.0,4.0.3 and up,2018-07-31,5.2e+07,100000000,4.0.3,4.0.0
1755,8 Ball Pool,GAME,4.5,14200344,52M,"100,000,000+",Free,0,Everyone,Sports,"July 31, 2018",4.0.0,4.0.3 and up,2018-07-31,5.2e+07,100000000,4.0.3,4.0.0


In [15]:
playstore = playstore.drop(duplicati_cut.index)

# For each category, compute the number of apps

In [16]:
playstore.groupby('Category').size()

Category
ART_AND_DESIGN           61
AUTO_AND_VEHICLES        85
BEAUTY                   53
BOOKS_AND_REFERENCE     222
BUSINESS                420
COMICS                   56
COMMUNICATION           315
DATING                  170
EDUCATION               105
ENTERTAINMENT            86
EVENTS                   64
FAMILY                 1878
FINANCE                 345
FOOD_AND_DRINK          112
GAME                    945
HEALTH_AND_FITNESS      288
HOUSE_AND_HOME           73
LIBRARIES_AND_DEMO       84
LIFESTYLE               369
MAPS_AND_NAVIGATION     131
MEDICAL                 395
NEWS_AND_MAGAZINES      254
PARENTING                60
PERSONALIZATION         376
PHOTOGRAPHY             281
PRODUCTIVITY            374
SHOPPING                202
SOCIAL                  239
SPORTS                  325
TOOLS                   829
TRAVEL_AND_LOCAL        219
VIDEO_PLAYERS           164
WEATHER                  79
dtype: int64

# For each category, compute the average rating

In [17]:
playstore.groupby('Category')['Rating'].mean()

Category
ART_AND_DESIGN         4.359322
AUTO_AND_VEHICLES      4.190411
BEAUTY                 4.278571
BOOKS_AND_REFERENCE    4.344970
BUSINESS               4.098479
COMICS                 4.181481
COMMUNICATION          4.121484
DATING                 3.980451
EDUCATION              4.349038
ENTERTAINMENT          4.129070
EVENTS                 4.435556
FAMILY                 4.184150
FINANCE                4.115563
FOOD_AND_DRINK         4.171277
GAME                   4.244432
HEALTH_AND_FITNESS     4.243033
HOUSE_AND_HOME         4.140984
LIBRARIES_AND_DEMO     4.178125
LIFESTYLE              4.093355
MAPS_AND_NAVIGATION    4.036441
MEDICAL                4.165862
NEWS_AND_MAGAZINES     4.121569
PARENTING              4.300000
PERSONALIZATION        4.332215
PHOTOGRAPHY            4.155894
PRODUCTIVITY           4.183389
SHOPPING               4.230556
SOCIAL                 4.247291
SPORTS                 4.216154
TOOLS                  4.040278
TRAVEL_AND_LOCAL       4.069519

# Create two dataframes: one for the genres and one bridging apps and genders. So that, for instance, the app Pixel Draw - Number Art Coloring Book appears twice in the bridging table, once for Art & Design, once for Creativity

In [18]:
lista_app = []
lista_genres = []

for index,row in playstore.iterrows():
    l_generi = row['Genres'].split(';')
    for genre in l_generi:
        lista_genres.append(genre.strip())
        lista_app.append(row['App'])

app_genre = pd.DataFrame({'App' : lista_app, 'Genres_one' : lista_genres})
app_genre.head()

Unnamed: 0,App,Genres_one
0,Photo Editor & Candy Camera & Grid & ScrapBook,Art & Design
1,"U Launcher Lite – FREE Live Cool Themes, Hide ...",Art & Design
2,Sketch - Draw & Paint,Art & Design
3,Pixel Draw - Number Art Coloring Book,Art & Design
4,Pixel Draw - Number Art Coloring Book,Creativity


In [19]:
genre_1 = []
genre_2 = []

for index,row in playstore.iterrows():
    l_generi = row['Genres'].split(';')
    genre_1.append(l_generi[0])
    if len(l_generi)<2:
        genre_2.append(np.nan)
    else:
        genre_2.append(l_generi[1])
        
df_2 = pd.DataFrame({'App' : playstore['App'], 'Genre_1' : genre_1, 'Genre_2' : genre_2})
df_2.head()

Unnamed: 0,App,Genre_1,Genre_2
0,Photo Editor & Candy Camera & Grid & ScrapBook,Art & Design,
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",Art & Design,
3,Sketch - Draw & Paint,Art & Design,
4,Pixel Draw - Number Art Coloring Book,Art & Design,Creativity
5,Paper flowers instructions,Art & Design,


In [20]:
df_genres = pd.DataFrame({'Genres' : list(set(lista_genres))})
df_genres.head()

Unnamed: 0,Genres
0,Communication
1,Action & Adventure
2,Beauty
3,Social
4,Photography


# For each genre, create a new column of the original dataframe. The new columns must have boolean values (True if the app has a given genre)

In [21]:
l_false = [False] * playstore.shape[0] #Lista di False
lista_generi = list(set(lista_genres)) #Lista con generi univoci

for genere in lista_generi:
    playstore[genere] = l_false #Inserisco in Playstore un attributo per ogni genere i cui valori sono tutti False
    
for index,row in playstore.iterrows():
    appoggio = row['Genres'].split(';')
    for genre in appoggio:
        playstore.at[index, genre.strip()] = True #Setto a True tutti i valori degli attributi riguardanti il genere/generi dell'app

playstore

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,...,Strategy,Action,Card,Productivity,Books & Reference,Sports,Casino,Video Players & Editors,Pretend Play,Comics
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,...,False,False,False,False,False,False,False,False,False,False
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,...,False,False,False,False,False,False,False,False,False,False
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,...,False,False,False,False,False,False,False,False,False,False
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,...,False,False,False,False,False,False,False,False,False,False
5,Paper flowers instructions,ART_AND_DESIGN,4.4,167,5.6M,"50,000+",Free,0,Everyone,Art & Design,...,False,False,False,False,False,False,False,False,False,False
6,Smoke Effect Photo Maker - Smoke Editor,ART_AND_DESIGN,3.8,178,19M,"50,000+",Free,0,Everyone,Art & Design,...,False,False,False,False,False,False,False,False,False,False
7,Infinite Painter,ART_AND_DESIGN,4.1,36815,29M,"1,000,000+",Free,0,Everyone,Art & Design,...,False,False,False,False,False,False,False,False,False,False
8,Garden Coloring Book,ART_AND_DESIGN,4.4,13791,33M,"1,000,000+",Free,0,Everyone,Art & Design,...,False,False,False,False,False,False,False,False,False,False
9,Kids Paint Free - Drawing Fun,ART_AND_DESIGN,4.7,121,3.1M,"10,000+",Free,0,Everyone,Art & Design;Creativity,...,False,False,False,False,False,False,False,False,False,False
10,Text on Photo - Fonteee,ART_AND_DESIGN,4.4,13880,28M,"1,000,000+",Free,0,Everyone,Art & Design,...,False,False,False,False,False,False,False,False,False,False


# For each genre, compute the average rating. What is the genre with highest average?

In [22]:
dff = pd.merge(app_genre, playstore, on='App')
dff.groupby('Genres_one')['Rating'].mean()

Genres_one
Action                     4.247697
Action & Adventure         4.288542
Adventure                  4.191026
Arcade                     4.277838
Art & Design               4.350000
Auto & Vehicles            4.190411
Beauty                     4.278571
Board                      4.291228
Books & Reference          4.343275
Brain Games                4.358065
Business                   4.098479
Card                       4.082609
Casino                     4.286486
Casual                     4.141014
Comics                     4.181481
Communication              4.121790
Creativity                 4.306250
Dating                     3.980451
Education                  4.290290
Educational                4.100000
Entertainment              4.093028
Events                     4.435556
Finance                    4.115563
Food & Drink               4.171277
Health & Fitness           4.243496
House & Home               4.140984
Libraries & Demo           4.178125
Lifestyle        

In [23]:
#Rating medio massimo
dff.groupby('Genres_one')['Rating'].mean().idxmax()

'Events'

# For each app, compute the approximate income, obtain as a product of number of installs and price.

In [24]:
#ispezione price
print(playstore.Price.isnull().any())#no missing
playstore.groupby('Price').size()
#quindi il valore o è 0 o ha come valuta il $

False


Price
$0.99       145
$1.00         3
$1.04         1
$1.20         1
$1.26         1
$1.29         1
$1.49        46
$1.50         1
$1.59         1
$1.61         1
$1.70         2
$1.75         1
$1.76         1
$1.96         1
$1.97         1
$1.99        73
$10.00        2
$10.99        2
$109.99       1
$11.99        3
$12.99        3
$13.99        2
$14.00        1
$14.99        9
$15.46        1
$15.99        1
$154.99       1
$16.99        2
$17.99        2
$18.99        1
           ... 
$389.99       1
$39.99        2
$394.99       1
$399.99      12
$4.29         1
$4.49         9
$4.59         1
$4.60         1
$4.77         1
$4.80         1
$4.84         1
$4.85         1
$4.99        70
$400.00       1
$46.99        1
$5.00         1
$5.49         5
$5.99        26
$6.49         5
$6.99        10
$7.49         2
$7.99         7
$74.99        1
$79.99        1
$8.49         2
$8.99         5
$89.99        1
$9.00         1
$9.99        19
0          8905
Length: 92, dtype:

In [25]:
playstore['Price_number'] = playstore['Price'].apply(lambda x: float(x.strip('$'))) #Rimuovo eventuale valuta
playstore[playstore['Price'] != '0'][['Price','Price_number']].head()#verifica se prezzo è trasformato correttamento (0 escluso)

Unnamed: 0,Price,Price_number
290,$4.99,4.99
291,$4.99,4.99
427,$3.99,3.99
478,$1.49,1.49
479,$2.99,2.99


In [26]:
playstore['Income'] = playstore['Price_number'] * playstore['Installs_num']
playstore[playstore['Price'] != '0'][['Price','Installs','Income']].head()

Unnamed: 0,Price,Installs,Income
290,$4.99,"100,000+",499000.0
291,$4.99,"100,000+",499000.0
427,$3.99,"100,000+",399000.0
478,$1.49,50+,74.5
479,$2.99,100+,299.0


# For each app, compute its minimum and maximum Sentiment_polarity

In [27]:
sent_max = pd.DataFrame(playstore_review.groupby('App')['Sentiment_Polarity'].max()).reset_index()
sent_min = pd.DataFrame(playstore_review.groupby('App')['Sentiment_Polarity'].min()).reset_index()
sent = pd.merge(sent_max,sent_min, on='App')
sent.rename(columns={'Sentiment_Polarity_x':'Sentiment_Polarity_max','Sentiment_Polarity_y':'Sentiment_Polarity_min'}).head()

Unnamed: 0,App,Sentiment_Polarity_max,Sentiment_Polarity_min
0,10 Best Foods for You,1.0,-0.8
1,104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室,0.91,-0.1125
2,11st,1.0,-1.0
3,1800 Contacts - Lens Store,0.838542,-0.3
4,1LINE – One Line with One Touch,1.0,-0.825
