In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

In [None]:
raw_df= pd.read_csv('../input/top-play-store-games/android-games.csv')
raw_df.head()

In [None]:
raw_df.shape

In [None]:
raw_df.info()

In [None]:
raw_df.describe().round(2)

In [None]:
raw_df.isna().sum()

In [None]:
for i in raw_df.columns:
    print(i)
    print(raw_df[i].dtype)
    print('Unique values are: ', raw_df[i].unique())
    print('-' * 100)

In [None]:
df= raw_df.copy()

In [None]:
sns.set_style('whitegrid')

plt.figure(figsize= (13, 10))

ax= sns.countplot(y= 'category', data= df, order= df['category'].value_counts().index, palette= 'GnBu_r')
plt.title('Category', fontsize= 20)

for patch in ax.patches:
    heigh= patch.get_height()
    width= patch.get_width()
    left_coord= patch.get_x()
    bot_coord= patch.get_y()
    percent= width/len(df)*100
    
    ax.text(left_coord + width, bot_coord + 0.7, '{}\n({:.1f}%)'.format(width, percent))

In [None]:
plt.figure(figsize= (10, 10))

ax= sns.countplot(x= 'price', data= df, order= df['price'].value_counts().index, palette= 'GnBu_r')
plt.title('Price', fontsize= 20)

for patch in ax.patches:
    height= patch.get_height()
    width= patch.get_width()
    left_coord= patch.get_x()
    percent= height/len(df)*100
    
    ax.text(left_coord + width/2, height + 10,
           '{}\n({:.1f}%)'.format(height, percent), ha= 'center')

In [None]:
plt.figure(figsize= (10, 10))

ax= sns.countplot(x= 'installs', data= df, 
                  order= [ '1000.0 M', '500.0 M', '100.0 M', '50.0 M', '10.0 M', '5.0 M', '1.0 M', '500.0 k', '100.0 k'],
                  palette= 'GnBu_r')
plt.title('Installs', fontsize= 20)

for patch in ax.patches:
    height= patch.get_height()
    width= patch.get_width()
    left_coord= patch.get_x()
    percent= height/len(df)*100
    
    ax.text(left_coord + width/2, height + 5,
           '{}\n({:.1f}%)'.format(height, percent), ha= 'center')

In [None]:
plt.figure(figsize= (10,10))

ax= sns.barplot(x= 'title', y= 'total ratings', data= df, 
                order= df.sort_values('total ratings', ascending= False).head(10).title, palette= 'GnBu_r')
plt.title('Total Ratings', fontsize= 20)
plt.xticks(rotation= 90)

for patch in ax.patches:
    height= patch.get_height()
    width= patch.get_width()
    left_coord= patch.get_x()
    percent= height/sum(df['total ratings'])*100
    
    ax.text(left_coord + width/2, height + 700000,
           '{:.1f}M\n({:.1f}%)'.format(height/1000000, percent), ha= 'center')
    

In [None]:
# Considering 5 and 4 star rating as good rate and the rest as bad rate, let's create total column rate (good and bad) and percent.

# Total of ratings
df['good ratings']= df['4 star ratings'] + df['5 star ratings']
df['bad ratings']= df['1 star ratings'] + df['2 star ratings'] + df['3 star ratings']

# Percent of ratings
df['good ratings percent']= df['good ratings'] / (df['good ratings'] + df['bad ratings'])
df['bad ratings percent']= df['bad ratings'] / (df['good ratings'] + df['bad ratings'])

df.head(2)

In [None]:
# Let's see top 10 games good ratings percent with more than 1 milion of ratings
df_1m= df[df['total ratings'] > 1000000]

df_1m.sort_values('good ratings percent', ascending= False).head(10)[['title', 'good ratings percent', 'total ratings']]

In [None]:
plt.figure(figsize= (15, 150))

n= 1

for i in df_1m['category'].unique():
    plt.subplot(17, 1, n)
    
    w= df_1m[df['category'] == i].sort_values('good ratings percent', ascending= False).head(10)[['title', 'good ratings percent', 'total ratings']]
    
    ax= sns.barplot(y= 'title', x= 'good ratings percent', data= w, palette= 'GnBu_r')
    plt.title('Top 10 {} good ratings percent with more than 1 milion of ratings'.format(i.upper()), fontsize= 20)
    plt.xticks(rotation= 75)
    plt.xlabel('')
    plt.ylabel('')
    
    for patch in ax.patches:
        height= patch.get_height()
        width= patch.get_width()
        bot_coord= patch.get_y()
        left_coord= patch.get_x()
    
        ax.text(left_coord + width*1.0285 , bot_coord + 0.5,
               '{:.1f}%'.format(width*100), ha= 'center')
    
    n+= 1

In [None]:
# Let's see the 10 worst good reviews with over 1 million reviews

df_1m.sort_values('bad ratings percent', ascending= False).head(10)[['title', 'good ratings percent', 'total ratings']]