In [None]:
# Loading the necessary datasets
import pandas as pd
import os

In [2]:
pwd = os.getcwd()

In [None]:
# Loading the datasets
apps = pd.read_csv(pwd + "/datasets/apps.csv")
user_reviews = pd.read_csv(pwd + "/datasets/user_reviews.csv")
apps.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Last Updated
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,"10,000+",Free,0.0,"January 7, 2018"
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,"500,000+",Free,0.0,"January 15, 2018"
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,"5,000,000+",Free,0.0,"August 1, 2018"
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,"50,000,000+",Free,0.0,"June 8, 2018"
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,"100,000+",Free,0.0,"June 20, 2018"


In [None]:
# Checking the column types of the "apps" dataset
apps.dtypes

App              object
Category         object
Rating          float64
Reviews           int64
Size            float64
Installs         object
Type             object
Price           float64
Last Updated     object
dtype: object

In [None]:
# Fixing the "Installs" column and converting it into integer
apps["Installs"] = apps["Installs"].str.replace(",", "").str.replace("+", "").astype(int)
apps.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Last Updated
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,10000,Free,0.0,"January 7, 2018"
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,"January 15, 2018"
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,5000000,Free,0.0,"August 1, 2018"
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,50000000,Free,0.0,"June 8, 2018"
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,100000,Free,0.0,"June 20, 2018"


In [None]:
# Doing some summary statistics of various app categories
app_category_info = apps.groupby("Category").agg({"App" : "count", "Price" : "mean", "Rating" : "mean"}).reset_index()
app_category_info = app_category_info.rename(columns = {"App" : "Number of apps", "Price" : "Average price", "Rating" : "Average rating"})
app_category_info


Unnamed: 0,Category,Number of apps,Average price,Average rating
0,ART_AND_DESIGN,64,0.093281,4.357377
1,AUTO_AND_VEHICLES,85,0.158471,4.190411
2,BEAUTY,53,0.0,4.278571
3,BOOKS_AND_REFERENCE,222,0.539505,4.34497
4,BUSINESS,420,0.417357,4.098479
5,COMICS,56,0.0,4.181481
6,COMMUNICATION,315,0.263937,4.121484
7,DATING,171,0.160468,3.970149
8,EDUCATION,119,0.150924,4.364407
9,ENTERTAINMENT,102,0.078235,4.135294


In [None]:
# Taking a look into the user_reviews dataset
user_reviews.head()

Unnamed: 0,App,Review,Sentiment Category,Sentiment Score
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.0
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.25
2,10 Best Foods for You,,,
3,10 Best Foods for You,Works great especially going grocery store,Positive,0.4
4,10 Best Foods for You,Best idea us,Positive,1.0


In [None]:
# Filtering for the free finance apps
free_finance = apps[(apps["Category"] == "FINANCE") & (apps["Type"] == "Free")]
free_finance

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Last Updated
837,K PLUS,FINANCE,4.4,124424,,10000000,Free,0.0,"June 26, 2018"
838,ING Banking,FINANCE,4.4,39041,,1000000,Free,0.0,"August 3, 2018"
839,Citibanamex Movil,FINANCE,3.6,52306,42.0,5000000,Free,0.0,"July 27, 2018"
840,The postal bank,FINANCE,3.7,36718,,5000000,Free,0.0,"July 16, 2018"
841,KTB Netbank,FINANCE,3.8,42644,19.0,5000000,Free,0.0,"June 28, 2018"
...,...,...,...,...,...,...,...,...,...
9540,BankNordik,FINANCE,3.9,28,15.0,5000,Free,0.0,"August 8, 2018"
9564,FP Markets,FINANCE,,1,2.0,100,Free,0.0,"January 30, 2018"
9565,FP Boss,FINANCE,,1,5.8,1,Free,0.0,"July 27, 2018"
9572,FP FCU,FINANCE,3.6,48,26.0,5000,Free,0.0,"April 5, 2018"


In [None]:
# Merging the datasets into one
finance = pd.merge(left = free_finance, right = user_reviews, on = "App", how = "left")
finance.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Last Updated,Review,Sentiment Category,Sentiment Score
0,K PLUS,FINANCE,4.4,124424,,10000000,Free,0.0,"June 26, 2018",,,
1,ING Banking,FINANCE,4.4,39041,,1000000,Free,0.0,"August 3, 2018",,,
2,Citibanamex Movil,FINANCE,3.6,52306,42.0,5000000,Free,0.0,"July 27, 2018","Forget paying app, designed make fail payments...",Negative,-0.5
3,Citibanamex Movil,FINANCE,3.6,52306,42.0,5000000,Free,0.0,"July 27, 2018","It's working expected, talking best bank Mexic...",Positive,0.4
4,Citibanamex Movil,FINANCE,3.6,52306,42.0,5000000,Free,0.0,"July 27, 2018",It has many problems with Android 8.1. You can...,Positive,0.25


In [None]:
# Analysing the sentiment scores from the merged dataset
finance_score = finance.groupby("App")["Sentiment Score"].mean().sort_values(ascending = False).reset_index()
finance_score

Unnamed: 0,App,Sentiment Score
0,BBVA Spain,0.515086
1,Associated Credit Union Mobile,0.388093
2,BankMobile Vibe App,0.353455
3,A+ Mobile,0.329592
4,Current debit card and app made for teens,0.327258
...,...,...
323,loans.com.au Smart Money,
324,m-Faisaa,
325,mon guide au bank populaire -infos et instruct...,
326,my face,


In [None]:
# Taking the top 10 results
top_10_user_feedback = finance_score.iloc[0:10, 0:2]
top_10_user_feedback

Unnamed: 0,App,Sentiment Score
0,BBVA Spain,0.515086
1,Associated Credit Union Mobile,0.388093
2,BankMobile Vibe App,0.353455
3,A+ Mobile,0.329592
4,Current debit card and app made for teens,0.327258
5,BZWBK24 mobile,0.326883
6,"Even - organize your money, get paid early",0.283929
7,Credit Karma,0.270052
8,Fortune City - A Finance App,0.266966
9,Branch,0.26423
