In [210]:
# Plotlyoffline mode
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
init_notebook_mode()
from plotly.graph_objs import *
# Ipython
from IPython.display import Markdown, HTML
from IPython.display import display, Image
# Pandas
import pandas as pd
# python modules
from pandas.io.json import json_normalize
import json

In [129]:
# Opens and normalizes the JSON file.
# Then, it renames our columns and changes the type of the date column to datetime.
def normalizeDataFrame(file_name):
    # 1- Open and normalize the JSON file.
    with open(file_name) as content:
        data = json.load(content)
    df = json_normalize(data)
    # 2- Change the type of the date column to datetime.
    df['listing.rDate'] = pd.to_datetime(df['listing.rDate'])
    return df

In [130]:
df14 = normalizeDataFrame('../data/apps-by-year/released_in_2014.json')
display(HTML('<h1>App versions released in 2014: </h1>'))
len(df14)

56851

In [131]:
df14.head()

Unnamed: 0,id,listing.rDate,listing.rating,packageName,versionCode,versionName
0,NoamStudios.Games.MegaTicTacToeFree-18,2014-09-01,4.0,NoamStudios.Games.MegaTicTacToeFree,18,2.5
1,Air.Lite-2,2014-06-17,4.2,Air.Lite,2,1.1
2,Ibtikar.Applications.Ibaloot-1200009,2014-06-19,3.995383,Ibtikar.Applications.Ibaloot,1200009,4.3
3,Jobkorea.AlbamonMJ-54,2014-01-22,3.869911,Jobkorea.AlbamonMJ,54,2.4.2
4,PocketGest.PocketGest-25,2014-10-07,3.131579,PocketGest.PocketGest,25,2.9.6


In [132]:
df14[df14['packageName'] == 'PocketGest.PocketGest']

Unnamed: 0,id,listing.rDate,listing.rating,packageName,versionCode,versionName
4,PocketGest.PocketGest-25,2014-10-07,3.131579,PocketGest.PocketGest,25,2.9.6
19,PocketGest.PocketGest-21,2014-09-10,3.027778,PocketGest.PocketGest,21,2.9.2


In [133]:
# Group apps by package name and obtain the earlist version
# Sort by release date, group by package name, get the subset.
df14_first = df14.sort_values(by='listing.rDate', ascending=True).groupby('packageName').first()
df14_first.head()

Unnamed: 0_level_0,id,listing.rDate,listing.rating,versionCode,versionName
packageName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Air.Lite,Air.Lite-2,2014-06-17,4.2,2,1.1
Alfasoft.Widgets.SweetYorkshireAnalogClock,Alfasoft.Widgets.SweetYorkshireAnalogClock-2,2014-01-18,3.9,2,1.1
AutomateIt.mainPackage,AutomateIt.mainPackage-84,2014-01-14,4.281773,84,3.0.84
B4A.Agroid_software,B4A.Agroid_software-2010,2014-03-20,3.909091,2010,2.0.10
Busan.Subway.f7key.Cauly,Busan.Subway.f7key.Cauly-78,2014-01-08,4.253991,78,1.5.4


In [134]:
df14_first.loc['PocketGest.PocketGest']['listing.rating']
#df_first.loc[df_first['packageName'] =='PocketGest.PocketGest']

3.0277777000000001

In [135]:
len(df14_first)

44730

In [136]:
df15 = normalizeDataFrame('../data/apps-by-year/released_in_2015.json')
display(HTML('<h1>App versions released in 2015: </h1>'))
len(df15)

122942

In [137]:
df15.head()

Unnamed: 0,id,listing.rDate,listing.rating,packageName,versionCode,versionName
0,Copeland.XRef-7,2015-10-07,3.2,Copeland.XRef,7,3.1.2
1,OCTech.Mobile.Applications.OBDLink-25,2015-09-30,3.8,OCTech.Mobile.Applications.OBDLink,25,3.1.2
2,BacteriologiaFree.Doctor-5,2015-08-24,4.1,BacteriologiaFree.Doctor,5,5.0
3,a.kakao.iconnect.footprint_talk_cat-1,2015-03-28,4.0,a.kakao.iconnect.footprint_talk_cat,1,4.0
4,FriendsInTheCity.Softgames-17,2015-11-13,2.4,FriendsInTheCity.Softgames,17,3.2


In [138]:
# Group apps by package name and obtain the earlist version
# Sort by release date, group by package name, get the subset.
df15_first = df15.sort_values(by='listing.rDate', ascending=True).groupby('packageName').first()
df15_first.head()

Unnamed: 0_level_0,id,listing.rDate,listing.rating,versionCode,versionName
packageName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AdamsDrugs.PocketRx,AdamsDrugs.PocketRx-400,2015-10-08,4.0,400,4.00
Android.WirelessPassword,Android.WirelessPassword-20,2015-08-16,3.5,20,2.3
ArgosHALLYM.Mobile,ArgosHALLYM.Mobile-13,2015-07-17,4.9,13,2.3
ArgosMTU.Mobile,ArgosMTU.Mobile-20,2015-08-26,4.6,20,3.0
AutomateIt.mainPackage,AutomateIt.mainPackage-157,2015-10-29,4.2,157,3.0.157


In [139]:
len(df15_first)

93873

In [140]:
# merge both 2014 and 2015 data frames
df_merged_14_15 = df14_first.join(df15_first, lsuffix='_14',rsuffix='_15', how='inner')
df_merged_14_15.head()

Unnamed: 0_level_0,id_14,listing.rDate_14,listing.rating_14,versionCode_14,versionName_14,id_15,listing.rDate_15,listing.rating_15,versionCode_15,versionName_15
packageName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AutomateIt.mainPackage,AutomateIt.mainPackage-84,2014-01-14,4.281773,84,3.0.84,AutomateIt.mainPackage-157,2015-10-29,4.2,157,3.0.157
CN.MyPrivateMessages,CN.MyPrivateMessages-122,2014-05-27,4.438504,122,2.7.2,CN.MyPrivateMessages-143,2015-10-04,4.3,143,2.7.6.14
Com.sktelecom.minit,Com.sktelecom.minit-35,2014-06-22,3.849466,35,@string/app_version,Com.sktelecom.minit-43,2015-11-12,3.7,43,@string/app_version
ExtremeDevelopers.NeedforDrift,ExtremeDevelopers.NeedforDrift-151,2014-09-16,4.204959,151,1.51,ExtremeDevelopers.NeedforDrift-156,2015-12-08,4.2,156,1.56
InternetRadio.all,InternetRadio.all-109,2014-06-09,4.231142,109,1.5.2.6781,InternetRadio.all-3855,2015-09-25,4.2,3855,3.16.1.3855


In [141]:
len(df_merged_14_15)

6050

In [142]:
# compute the difference of rating between 2014 and 2015
df_merged_14_15['ratingDiff'] = df_merged_14_15['listing.rating_15'] - df_merged_14_15['listing.rating_14']
df_merged_14_15.head()

Unnamed: 0_level_0,id_14,listing.rDate_14,listing.rating_14,versionCode_14,versionName_14,id_15,listing.rDate_15,listing.rating_15,versionCode_15,versionName_15,ratingDiff
packageName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AutomateIt.mainPackage,AutomateIt.mainPackage-84,2014-01-14,4.281773,84,3.0.84,AutomateIt.mainPackage-157,2015-10-29,4.2,157,3.0.157,-0.081773
CN.MyPrivateMessages,CN.MyPrivateMessages-122,2014-05-27,4.438504,122,2.7.2,CN.MyPrivateMessages-143,2015-10-04,4.3,143,2.7.6.14,-0.138504
Com.sktelecom.minit,Com.sktelecom.minit-35,2014-06-22,3.849466,35,@string/app_version,Com.sktelecom.minit-43,2015-11-12,3.7,43,@string/app_version,-0.149466
ExtremeDevelopers.NeedforDrift,ExtremeDevelopers.NeedforDrift-151,2014-09-16,4.204959,151,1.51,ExtremeDevelopers.NeedforDrift-156,2015-12-08,4.2,156,1.56,-0.004959
InternetRadio.all,InternetRadio.all-109,2014-06-09,4.231142,109,1.5.2.6781,InternetRadio.all-3855,2015-09-25,4.2,3855,3.16.1.3855,-0.031142


In [209]:
df_merged_14_15['ratingDiff'].mean()

-0.053695572975206635

In [143]:
rating_down = df_merged_14_15[df_merged_14_15['ratingDiff'] < 0]
rating_up = df_merged_14_15[df_merged_14_15['ratingDiff'] > 0]
rating_remain = df_merged_14_15[df_merged_14_15['ratingDiff'] == 0]

In [174]:
total = len(df_merged_14_15)
print(total)
print(len(rating_down))
print(len(rating_up))
print(len(rating_remain))

6050
3988
1802
260


In [177]:
total = len(df_merged_14_15)
display(Markdown("## Rating differences between 2014 and 2015 for " + "{:,}".format(total) + " apps"))

## Rating differences between 2014 and 2015 for 6,050 apps

In [178]:
fig_14_15 = {
    'data': [{'labels': ['Rating up', 'Rating down', 'No changes'],
              'values': [len(rating_up)/total*100, len(rating_down)/total*100, len(rating_remain)/total*100],
              'type': 'pie',
              'marker': {'colors': ['rgb(44, 160, 44)', 'rgb(214, 39, 40)', 'rgb(145, 145, 143)']}
             }],
    'layout': {'title': 'App Rating Changes between 2014 and 2015'}
     }

iplot(fig_14_15)

In [200]:
df16 = normalizeDataFrame('../data/apps-by-year/released_in_2016.json')
display(HTML('<h1>App versions released in 2016: </h1>'))
len(df16)

40861

In [201]:
# Group apps by package name and obtain the earlist version
# Sort by release date, group by package name, get the subset.
df16_first = df16.sort_values(by='listing.rDate', ascending=True).groupby('packageName').first()
df16_first.head()

Unnamed: 0_level_0,id,listing.rDate,listing.rating,versionCode,versionName
packageName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AutomateIt.mainPackage,AutomateIt.mainPackage-173,2016-02-21,4.2,173,3.0.173
BlueFRom.Consult,BlueFRom.Consult-6,2016-01-29,3.4,6,1.0.6
Face.Sorter,Face.Sorter-94,2016-01-18,3.3,94,9.4
Gecko.Droid.PhysicsHelper,Gecko.Droid.PhysicsHelper-89,2016-01-04,4.1,89,3.0.89-GP
GeoWikiMobile.GeoWikiMobile,GeoWikiMobile.GeoWikiMobile-21,2016-03-14,3.7,21,3.2


In [202]:
# merge both 2015 and 2016 data frames
df_merged_15_16 = df15_first.join(df16_first, lsuffix='_15',rsuffix='_16', how='inner')
df_merged_15_16.head()

Unnamed: 0_level_0,id_15,listing.rDate_15,listing.rating_15,versionCode_15,versionName_15,id_16,listing.rDate_16,listing.rating_16,versionCode_16,versionName_16
packageName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AutomateIt.mainPackage,AutomateIt.mainPackage-157,2015-10-29,4.2,157,3.0.157,AutomateIt.mainPackage-173,2016-02-21,4.2,173,3.0.173
Face.Sorter,Face.Sorter-70,2015-11-09,3.3,70,7.0,Face.Sorter-94,2016-01-18,3.3,94,9.4
GeoWikiMobile.GeoWikiMobile,GeoWikiMobile.GeoWikiMobile-16,2015-09-23,3.4,16,3.0.1,GeoWikiMobile.GeoWikiMobile-21,2016-03-14,3.7,21,3.2
InternetRadio.all,InternetRadio.all-3855,2015-09-25,4.2,3855,3.16.1.3855,InternetRadio.all-6298,2016-03-18,4.2,6298,3.35.0.6298
MyING.be,MyING.be-21,2015-09-08,4.1,21,7.3.0,MyING.be-22,2016-01-18,4.1,22,7.4.0


In [203]:
# compute the difference of rating between 2015 and 2016
df_merged_15_16['ratingDiff'] = df_merged_15_16['listing.rating_16'] - df_merged_15_16['listing.rating_15']
df_merged_15_16.head()

Unnamed: 0_level_0,id_15,listing.rDate_15,listing.rating_15,versionCode_15,versionName_15,id_16,listing.rDate_16,listing.rating_16,versionCode_16,versionName_16,ratingDiff
packageName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AutomateIt.mainPackage,AutomateIt.mainPackage-157,2015-10-29,4.2,157,3.0.157,AutomateIt.mainPackage-173,2016-02-21,4.2,173,3.0.173,0.0
Face.Sorter,Face.Sorter-70,2015-11-09,3.3,70,7.0,Face.Sorter-94,2016-01-18,3.3,94,9.4,0.0
GeoWikiMobile.GeoWikiMobile,GeoWikiMobile.GeoWikiMobile-16,2015-09-23,3.4,16,3.0.1,GeoWikiMobile.GeoWikiMobile-21,2016-03-14,3.7,21,3.2,0.3
InternetRadio.all,InternetRadio.all-3855,2015-09-25,4.2,3855,3.16.1.3855,InternetRadio.all-6298,2016-03-18,4.2,6298,3.35.0.6298,0.0
MyING.be,MyING.be-21,2015-09-08,4.1,21,7.3.0,MyING.be-22,2016-01-18,4.1,22,7.4.0,0.0


In [204]:
len(df_merged_15_16)

16681

In [208]:
df_merged_15_16['ratingDiff'].mean()

0.030226005635153767

In [205]:
rating_down = df_merged_15_16[df_merged_15_16['ratingDiff'] < 0]
rating_up = df_merged_15_16[df_merged_15_16['ratingDiff'] > 0]
rating_remain = df_merged_15_16[df_merged_15_16['ratingDiff'] == 0]

In [206]:
total = len(df_merged_15_16)
display(Markdown("## Rating differences between 2015 and 2016 for " + "{:,}".format(total) + " apps"))

## Rating differences between 2015 and 2016 for 16,681 apps

In [207]:
fig_15_16 = {
    'data': [{'labels': ['Rating up', 'Rating down', 'No changes'],
              'values': [len(rating_up)/total*100, len(rating_down)/total*100, len(rating_remain)/total*100],
              'type': 'pie',
              'marker': {'colors': ['rgb(44, 160, 44)', 'rgb(214, 39, 40)', 'rgb(145, 145, 143)']}
             }],
    'layout': {'title': 'App Rating Changes between 2015 and 2016'}
     }

iplot(fig_15_16)