In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import plotly
plotly.offline.init_notebook_mode(connected = True)

In [None]:
#data loading
game_data=pd.read_csv('/kaggle/input/top-play-store-games/android-games.csv')
game_data.head()

In [None]:
game_data.shape

In [None]:
game_data.info()

In [None]:
game_data.describe()

In [None]:
game_data.isnull().sum()

In [None]:
px.pie(game_data,'paid',labels={'paid':'Paid'},title='Distribution of paid and free apps')

*99.6% of the apps present in the data are free of cost and 0.405% are paid.*

In [None]:
px.pie(game_data,'category',labels={'category':'Genre'},title='Distribution of Game Genres')

*Most of the games are of card genres(7.05%).*

***Does this mean that free games are more popular?***

In [None]:
pd.pivot_table(game_data,values=['growth (30 days)','growth (60 days)'],columns='paid',aggfunc=np.mean)

*It is indeed true that the free apps are more popular than paid apps as the growth rate per 30 and 60 days is high than paid apps.*

***Do free apps guarantee better gaming quality resulting in better ratings according to the users?***

In [None]:
pd.pivot_table(game_data,values='average rating',columns='paid',aggfunc=np.mean)

*Paid apps have higher average rating than free apps which says that although download of paid apps is less but they offer better gaming quality then free apps.* 

In [None]:
#converting installs into numeric column named installs (in M)
game_data['installs (in M)'] = game_data['installs'].str.replace('M','')
game_data['installs (in M)'] = game_data['installs (in M)'].replace({'500.0 k':'0.5','100.0 k':'0.1'})
game_data['installs (in M)'] = game_data['installs (in M)'].astype(float)

In [None]:
game_data['installs (in M)'].value_counts()

In [None]:
game_data.drop('installs',axis=1,inplace=True)

In [None]:
pd.pivot_table(game_data,values='installs (in M)',columns='price',aggfunc=np.mean)

*No surprises ,the free apps have been installed more (29 M on an average) but we can not ignore the fact that amongst the paid apps the app with highest price(7.49) has more or equal installations then apps which has less price.So to conclude we can say the price of the app doesn't affect its installation to large extent if it is an good quality app whether free or paid.*

In [None]:
#correcting the formatting of category values
game_data['category']= game_data['category'].str.replace('GAME','')

***Top Genres according to highest growth in period of 30 days***

In [None]:
top=game_data.groupby(['category'])['growth (30 days)'].max().sort_values(ascending=False)
fig=go.Figure(data=[go.Table(header=dict(values=['Genres','Growth in 30 days'],
                                        font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='lavender'))])
fig.show()

*Word and Action games have the highest growth rate in period of 30 days.*

***Top Genres according to highest growth in period of 60 days***

In [None]:
top=game_data.groupby(['category'])['growth (60 days)'].max().sort_values(ascending=False)
fig=go.Figure(data=[go.Table(header=dict(values=['Genres','Growth in 60 days'],
                                        font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='lavender'))])
fig.show()

*Educational and music games have the highest growth rate in period of 60 days.*

***Top Genres according to highest growth after period of 30 days till 60 days***

In [None]:
game_data['diff']=game_data['growth (60 days)'] - game_data['growth (30 days)']
top=game_data.groupby(['category'])['diff'].max().sort_values(ascending=False)
fig=go.Figure(data=[go.Table(header=dict(values=['Genres','Growth after period of 30 days till 60 days'],
                                         font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='lavender'))])
fig.show()

*Educational games have seen the fastest growth after period of 30 days as all institutions are adopting online study nowadays.* 

***Top Genres according to average ratings***

In [None]:
top=game_data.groupby(['category'])['average rating'].max().sort_values(ascending=False)[:20]
fig=go.Figure(data=[go.Table(header=dict(values=['Game','Average Rating'],
                                        font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='violet',
                                       font=dict(color='white', size=11)))])
fig.show()

***Top Genres according to total ratings***

In [None]:
top=game_data.groupby(['category'])['total ratings'].max().sort_values(ascending=False)
fig=go.Figure(data=[go.Table(header=dict(values=['Genres','Total Ratings'],font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='orange'))])
fig.show()

***Top Genres with best ratings***

In [None]:
top=game_data.groupby(['category'])['5 star ratings'].max().sort_values(ascending=False)
fig=go.Figure(data=[go.Table(header=dict(values=['Genres','5 star ratings'],font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],line_color='white',fill_color='green',
                                       font=dict(color='white', size=11)))])
fig.show()

***Top Genres with worst ratings***

In [None]:
top=game_data.groupby(['category'])['1 star ratings'].max().sort_values(ascending=False)
fig=go.Figure(data=[go.Table(header=dict(values=['Genres','1 star ratings'],font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],line_color='white',fill_color='red',
                                       font=dict(color='white', size=11)))])
fig.show()

***Top Genres with average ratings***

In [None]:
top=game_data.groupby(['category'])['3 star ratings'].max().sort_values(ascending=False)
fig=go.Figure(data=[go.Table(header=dict(values=['Genres','3 star ratings'],font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],line_color='white',fill_color='yellow',
                                       font=dict(color='black', size=11)))])
fig.show()

***Which genre has the most number of downloads?***

In [None]:
top=game_data.groupby(['category'])['installs (in M)'].mean().sort_values(ascending=False)
fig=go.Figure(data=[go.Table(header=dict(values=['Genres','Number of Downloads(in Million)'],
                                         font=dict(color='white', size=11)),
                            cells=dict(values=[top.index,top],line_color='white',fill_color='plum',
                                       font=dict(color='black', size=11)))])
fig.show()

*Arcade(69.31 M) and Casual(67.12 M) games on an average has most number of downloads.*

***Top 20 games with most highest ratings***

In [None]:
top=game_data.groupby(['title'])['5 star ratings'].max().sort_values(ascending=False)[:20]
fig=go.Figure(data=[go.Table(header=dict(values=['Name','5 star ratings'],font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],line_color='white',fill_color='blue',
                                       font=dict(color='white', size=11)))])
fig.show()

***Top 20 games with most worst ratings***

In [None]:
top=game_data.groupby(['title'])['1 star ratings'].max().sort_values(ascending=False)[:20]
fig=go.Figure(data=[go.Table(header=dict(values=['Name','1 star ratings'],font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],line_color='white',fill_color='tomato',
                                       font=dict(color='white', size=11)))])
fig.show()

***Top 20 games with most average ratings.***

In [None]:
top=game_data.groupby(['title'])['3 star ratings'].max().sort_values(ascending=False)[:20]
fig=go.Figure(data=[go.Table(header=dict(values=['Name','3 star ratings'],font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],line_color='white',fill_color='khaki',
                                       font=dict(color='black', size=11)))])
fig.show()

***Top 20 Downloaded games***

In [None]:
top=game_data.groupby(['title'])['installs (in M)'].max().sort_values(ascending=False)[:20]
fig=go.Figure(data=[go.Table(header=dict(values=['Name','Number of Downloads(in Million)'],
                                         font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],line_color='white',fill_color='aquamarine',
                                       font=dict(color='black', size=11)))])
fig.show()

***Top 20 Least Downloaded games***

In [None]:
top=game_data.groupby(['title'])['installs (in M)'].max().sort_values(ascending=True)[:20]
fig=go.Figure(data=[go.Table(header=dict(values=['Game','Number of Downloads in Million'],
                                         font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],line_color='white',fill_color='brown',
                                       font=dict(color='white', size=11)))])
fig.show()

***Top 20 games with highest growth rate in 30 days.***

In [None]:
top=game_data.groupby(['title'])['growth (30 days)'].max().sort_values(ascending=False)[:20]
fig=go.Figure(data=[go.Table(header=dict(values=['Game','Growth in 30 days'],
                                        font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='blueviolet',
                                       font=dict(color='white', size=11)))])
fig.show()

***Top 20 games with highest growth rate in 60 days.***

In [None]:
top=game_data.groupby(['title'])['growth (60 days)'].max().sort_values(ascending=False)[:20]
fig=go.Figure(data=[go.Table(header=dict(values=['Game','Growth in 60 days'],
                                        font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='blueviolet',
                                       font=dict(color='white', size=11)))])
fig.show()

***Top 20 games with highest growth rate after 30 days***

In [None]:
top=game_data.groupby(['title'])['diff'].max().sort_values(ascending=False)[:20]
fig=go.Figure(data=[go.Table(header=dict(values=['Game','Growth after period of 30 days'],
                                        font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='blueviolet',
                                       font=dict(color='white', size=11)))])
fig.show()

***Top 20 games with highest average ratings***

In [None]:
top=game_data.groupby(['title'])['average rating'].max().sort_values(ascending=False)[:20]
fig=go.Figure(data=[go.Table(header=dict(values=['Game','Average Rating'],
                                        font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='aqua',
                                       font=dict(color='black', size=11)))])
fig.show()

In [None]:
#for separating paid game data
paid_game_data=game_data[game_data['paid']]

In [None]:
paid_game_data.paid.value_counts()

***Which paid game has most number of ratings?***

In [None]:
top=paid_game_data.groupby(['title'])['total ratings'].max().sort_values(ascending=False)
fig=go.Figure(data=[go.Table(header=dict(values=['Game','Total Ratings'],
                                        font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='black',
                                       font=dict(color='white', size=11),height=50))])
fig.show()

*Minecraft has most number of ratings amongst other paid games.*

***Which paid game has the highest 5 star ratings?***

In [None]:
top=paid_game_data.groupby(['title'])['5 star ratings'].max().sort_values(ascending=False)
fig=go.Figure(data=[go.Table(header=dict(values=['Game','5 star ratings'],
                                        font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='black',
                                       font=dict(color='white', size=11),height=50))])
fig.show()

*Minecraft is having the highest rating amongst 7 paid games in the dataset.*

***Which paid game has the most one star rating?***

In [None]:
top=paid_game_data.groupby(['title'])['1 star ratings'].max().sort_values(ascending=False)
fig=go.Figure(data=[go.Table(header=dict(values=['Game','1 star ratings'],
                                        font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='black',
                                       font=dict(color='white', size=11),height=50))])
fig.show()

*Minecraft has also the most 1 star ratings amongst other paid games.*

***Which paid game has most number of downloads?***

In [None]:
top=paid_game_data.groupby(['title'])['installs (in M)'].max().sort_values(ascending=False)
fig=go.Figure(data=[go.Table(header=dict(values=['Game','Number of Downloads(in Million)'],
                                        font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='black',
                                       font=dict(color='white', size=11),height=50))])
fig.show()

*Hitman Sniper and Minecraft have most number of downloads(10 M).*

***Which is the most expensive game?***

In [None]:
top=paid_game_data.groupby(['title'])['price'].max().sort_values(ascending=False)
fig=go.Figure(data=[go.Table(header=dict(values=['Game','Price (in dollars)'],
                                        font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='black',
                                       font=dict(color='white', size=11),height=50))])
fig.show()

*Minecraft is the most expensive amongst other paid games(7.49 dollars).*

***Which paid game has the highest average rating?***

In [None]:
top=paid_game_data.groupby(['title'])['average rating'].max().sort_values(ascending=False)
fig=go.Figure(data=[go.Table(header=dict(values=['Game','Average rating'],
                                        font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='black',
                                       font=dict(color='white', size=11),height=50))])
fig.show()

*Terraria has the highest average rating amongst other paid games.*

***Top Paid games having highest growth rate in 30 days.***

In [None]:
top=paid_game_data.groupby(['title'])['growth (30 days)'].max().sort_values(ascending=False)
fig=go.Figure(data=[go.Table(header=dict(values=['Game','Growth in period of 30 days'],
                                        font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='black',
                                       font=dict(color='white', size=11),height=50))])
fig.show()

***Top Paid games having highest growth rate in 60 days.***

In [None]:
top=paid_game_data.groupby(['title'])['growth (60 days)'].max().sort_values(ascending=False)
fig=go.Figure(data=[go.Table(header=dict(values=['Game','Growth in period of 60 days'],
                                        font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='black',
                                       font=dict(color='white', size=11),height=50))])
fig.show()

***Top Paid games having highest growth rate after 30 days till 60 days.***

In [None]:
top=paid_game_data.groupby(['title'])['diff'].max().sort_values(ascending=False)
fig=go.Figure(data=[go.Table(header=dict(values=['Game','Growth after 30 days'],
                                        font=dict(color='black', size=11)),
                            cells=dict(values=[top.index,top],fill_color='black',
                                       font=dict(color='white', size=11),height=50))])
fig.show()

*If you like it then upvote and share it.*

*Do provide your valuable feedback.*