In [1]:
# Importing the pandas module
import pandas as pd

In [2]:
# Read datasets/apps.csv 
apps = pd.read_csv("datasets/apps.csv")

In [24]:
# QUESTION 1
# Data Cleaning
# List of characters to remove
chars_to_remove = ['+', ',']
# Replace each character with an empty string
for char in chars_to_remove:
    apps['Installs'] = apps['Installs'].apply(lambda x: x.replace(char, ''))
# Convert to int
apps['Installs'] = apps['Installs'].astype(int)

AttributeError: 'int' object has no attribute 'replace'

In [17]:
# QUESTION 2
# Group by app category and apply the aggregate functions
app_category_info = apps.groupby('Category').agg({'App': 'count', 'Price': 'mean', 'Rating': 'mean'})
app_category_info.head()

Unnamed: 0_level_0,App,Price,Rating
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ART_AND_DESIGN,64,0.093281,4.357377
AUTO_AND_VEHICLES,85,0.158471,4.190411
BEAUTY,53,0.0,4.278571
BOOKS_AND_REFERENCE,222,0.539505,4.34497
BUSINESS,420,0.417357,4.098479


In [18]:
# Rename the columns for easier understanding
app_category_info = app_category_info.rename(columns={"App": "Number of apps", "Price": "Average price", "Rating": "Average rating"})
app_category_info.head()

Unnamed: 0_level_0,Number of apps,Average price,Average rating
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ART_AND_DESIGN,64,0.093281,4.357377
AUTO_AND_VEHICLES,85,0.158471,4.190411
BEAUTY,53,0.0,4.278571
BOOKS_AND_REFERENCE,222,0.539505,4.34497
BUSINESS,420,0.417357,4.098479


In [19]:
# QUESTION 3
# Read datasets/user_reviews.csv
reviews = pd.read_csv('datasets/user_reviews.csv')
reviews.head()

Unnamed: 0,App,Review,Sentiment Category,Sentiment Score
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.0
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.25
2,10 Best Foods for You,,,
3,10 Best Foods for You,Works great especially going grocery store,Positive,0.4
4,10 Best Foods for You,Best idea us,Positive,1.0


In [20]:
# Select finance apps
finance_apps = apps[apps['Category'] == 'FINANCE']
# Select free finance apps
free_finance_apps = finance_apps[finance_apps['Type'] == 'Free']
# We can also combine the two conditions in a single line of code using the & operator
# free_finance_apps = apps[(apps['Category'] == 'FINANCE') & (apps['Type'] == 'Free')]
free_finance_apps.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Last Updated
837,K PLUS,FINANCE,4.4,124424,,10000000,Free,0.0,"June 26, 2018"
838,ING Banking,FINANCE,4.4,39041,,1000000,Free,0.0,"August 3, 2018"
839,Citibanamex Movil,FINANCE,3.6,52306,42.0,5000000,Free,0.0,"July 27, 2018"
840,The postal bank,FINANCE,3.7,36718,,5000000,Free,0.0,"July 16, 2018"
841,KTB Netbank,FINANCE,3.8,42644,19.0,5000000,Free,0.0,"June 28, 2018"


In [21]:
# Join the dataframes
merged_df = pd.merge(finance_apps, reviews, on = "App", how = "inner")
# The default value of "how" argument is "inner", so we can skip specifying it. 
# But it is a good practice to specify the type of your join for better code readability.
merged_df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Last Updated,Review,Sentiment Category,Sentiment Score
0,Citibanamex Movil,FINANCE,3.6,52306,42.0,5000000,Free,0.0,"July 27, 2018","Forget paying app, designed make fail payments...",Negative,-0.5
1,Citibanamex Movil,FINANCE,3.6,52306,42.0,5000000,Free,0.0,"July 27, 2018","It's working expected, talking best bank Mexic...",Positive,0.4
2,Citibanamex Movil,FINANCE,3.6,52306,42.0,5000000,Free,0.0,"July 27, 2018",It has many problems with Android 8.1. You can...,Positive,0.25
3,Citibanamex Movil,FINANCE,3.6,52306,42.0,5000000,Free,0.0,"July 27, 2018","I changed my phone to a Xiaomi Redmi Note 5, t...",Positive,0.175
4,Citibanamex Movil,FINANCE,3.6,52306,42.0,5000000,Free,0.0,"July 27, 2018",In her eagerness to make her look pretty with ...,Negative,-0.158333


In [22]:
# Find the average sentiment score for each app
app_sentiment_score = merged_df.groupby('App').agg({'Sentiment Score' :'mean'})
app_sentiment_score.head()

Unnamed: 0_level_0,Sentiment Score
App,Unnamed: 1_level_1
A+ Mobile,0.329592
ACE Elite,0.252171
Acorns - Invest Spare Change,0.046667
Amex Mobile,0.175666
Associated Credit Union Mobile,0.388093


In [23]:
# Sort the average sentiment score from highest to lowest (ie - in decreasing order)
user_feedback = app_sentiment_score.sort_values(by = 'Sentiment Score', ascending = False)
user_feedback.head()

Unnamed: 0_level_0,Sentiment Score
App,Unnamed: 1_level_1
BBVA Spain,0.515086
Associated Credit Union Mobile,0.388093
BankMobile Vibe App,0.353455
A+ Mobile,0.329592
Current debit card and app made for teens,0.327258


In [11]:
# select first 10
top_10_user_feedback = user_feedback[:10]
top_10_user_feedback

Unnamed: 0_level_0,Sentiment Score
App,Unnamed: 1_level_1
BBVA Spain,0.515086
Associated Credit Union Mobile,0.388093
BankMobile Vibe App,0.353455
A+ Mobile,0.329592
Current debit card and app made for teens,0.327258
BZWBK24 mobile,0.326883
"Even - organize your money, get paid early",0.283929
Credit Karma,0.270052
Fortune City - A Finance App,0.266966
Branch,0.26423
