![](https://mir-s3-cdn-cf.behance.net/project_modules/fs/88511075350285.5c49f21260c3e.jpg)

In [None]:
import numpy as np 
import pandas as pd
import plotly as py
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)
import seaborn as sns

import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_columns', None)
#################################################################
top = pd.read_csv('../input/top-play-store-games/android-games.csv')

# Basic information

In [None]:
top.head()

In [None]:
top.info()

# Preprocessing

In [None]:
# Removing the extra word
top['category'] = top['category'].apply(lambda x: ' '.join(x.split()[1:]))

# Converting all values to millions
def installs(x):
    if x[-1] == 'M':
        return(float(x[:-2]))
    else:
        return(float(x[:-2]) / 1000)
        
top['installs'] = top['installs'].apply(installs)

# EDA

**WordCloud**

In [None]:
from wordcloud import WordCloud
from PIL import Image
import random

text = str(list(top['title'])).replace(',', '').replace('[', '').replace("'", '').replace(']', '').replace('.', '').replace(':', '')

def blue_color_func(word, font_size, position, orientation, random_state = None, **kwargs):
    return "hsl(209, 60%%, %f%%)" % random.randint(25, 40)

mask = np.array(Image.open('../input/google-logo/google-logo-perfekt.jpg'))

plt.rcParams['figure.figsize'] = (20, 20)
wordcloud = WordCloud(background_color = 'white', width = 1400,  height = 1400, max_words = 100, mask = mask).generate(text)

wordcloud.recolor(color_func = blue_color_func)

plt.imshow(wordcloud, interpolation = 'bilinear')
plt.axis('off')
plt.show()

In [None]:
plt.figure(figsize = (15, 9))
sns.set_style("white")
plt.title('Count of paid and free top games', fontname = 'monospace', fontsize = 35, color = '#32384D', x = 0.5, y = 1.05)
a = sns.countplot(y = top['paid'], palette = (['#217CA3', '#E29930']))
a.set_yticklabels(['Free', 'Paid'])
plt.axhline(0.5, 0, 0.951, color = '#211F30')
plt.xticks([])
plt.yticks(fontname = 'monospace', fontsize = 18)
plt.ylabel('')
plt.xlabel('')

a.text(750, 0.05, '99,6%', fontname = 'monospace', fontsize = 40, color = 'white')
a.text(40, 1.05, '0,4%', fontname = 'monospace', fontsize = 40, color = '#32384D')
a.text(350, 0.7, '''                        Why so?''', 
       fontname = 'monospace', fontsize = 18, color = '#32384D')
a.text(400, 1.3, '''                         

1. People don't want to pay, and for children this is a big
problem in general.

2. Developers earn more on free games and it is more 
profitable for them to earn on advertising and additional paid content.

3. Most paid games can be completed in a few hours, good free games can 
be played for years.''', fontname = 'monospace', fontsize = 14, color = '#32384D')

a.spines['left'].set_linewidth(1.5)
for w in ['right', 'top', 'bottom']:
    a.spines[w].set_visible(False)
        
plt.show()

In [None]:
top_games_10m = top.query("installs >= 10").sort_values('average rating').tail(5)
top_games_100m = top.query("installs >= 100").sort_values('average rating').tail(5)
top_games_paid = top.query("paid == True").sort_values('average rating').tail(5)
top_games = pd.concat([top_games_10m, top_games_100m, top_games_paid], ignore_index = True)


In [None]:
plt.figure(figsize = (8, 15))
sns.set_style("white")
plt.title('TOP games with the best rating', fontname = 'monospace', fontsize = 35, color = '#32384D', x = 0.7, y = 1.1)
clrs = []
for i in range(len(top_games)):
    if top_games.iloc[i,3] == 10:
        clrs.append('#217CA3')
    elif top_games.iloc[i,3] == 100:
        clrs.append('#E29930')
    else:
        clrs.append('#32384D')
        
a = sns.barplot(data = top_games, x = 'average rating', y = 'title', palette = clrs)
plt.axhline(4.5, 0, 1, color = '#211F30')
plt.axhline(9.5, 0, 1, color = '#211F30')
plt.xticks([])
plt.yticks([])
plt.ylabel('')
plt.xlabel('')

for p in a.patches:
    width = p.get_width()
    plt.text(0.05 + p.get_width(), p.get_y() + 0.6*p.get_height(), f'{width: 0.2f}',
             ha = 'left', va = 'center', fontname = 'monospace', color = '#32384D')
    
cat = top_games['category'].to_list()    
for i in range(len(a.patches)):  
    width = a.patches[i].get_width()
    if i == 1 or i == 2:
        plt.text(width - 0.5, a.patches[i].get_y() + 0.6*a.patches[i].get_height(), cat[i], ha = 'center', va = 'center',
                 fontname = 'monospace', fontsize = 10, color = 'white')
    else:
        plt.text(width - 0.3, a.patches[i].get_y() + 0.6*a.patches[i].get_height(), cat[i], ha = 'center', va = 'center',
                 fontname = 'monospace', fontsize = 10, color = 'white')
    
titles = top_games['title'].to_list()
for i in range(len(cat)):
    a.text(0.1, i+0.1, titles[i], fontname = 'monospace', fontsize = 12, color = 'white')
    
for q in [a]:
    q.spines['left'].set_linewidth(1.5)
    for w in ['right', 'top', 'bottom']:
        q.spines[w].set_visible(False)
        
a.text(6.5, 2.5, '''TOP-5 games 
with more than 
10M installs''', ha = 'center', fontname = 'monospace', fontsize = 14, color = '#217CA3')

a.text(6.5, 7.5, '''TOP-5 games 
with more than 
100M installs''', ha = 'center', fontname = 'monospace', fontsize = 14, color = '#E29930')

a.text(6.5, 12.5, '''TOP-5 paid 
games''', ha = 'center', fontname = 'monospace', fontsize = 14, color = '#32384D')

plt.show()

In [None]:
fig = plt.figure(figsize = (12, 10))

plt.subplot(211)
sns.set_style('white')
plt.title('30 days', size = 23, y = 1.03, fontname = 'monospace', x = 0.935)
plt.grid(color = 'gray', linestyle = ':', axis = 'x', alpha = 0.8, zorder = 0,  dashes = (1,7))
a = sns.kdeplot(top.query("installs < 100 & installs >= 10 & `growth (30 days)` < 100")['growth (30 days)'], 
                color = '#32384D', shade = True, label = 'Games with <100M installs', alpha = 0.7)
sns.kdeplot(top.query("installs >= 100 & `growth (30 days)` < 100")['growth (30 days)'], 
            color = '#E29930', shade = True, label = 'Games with 100M< installs', alpha = 0.7)
plt.ylabel('')
plt.xlabel('')
plt.xticks(fontname = 'monospace')
plt.yticks([])
a.text(31.5, 0.07, 'median 0.5%', ha = 'center', fontname = 'monospace', fontsize = 14, color = '#32384D')
a.text(31.5, 0.03, 'median 0.8%', ha = 'center', fontname = 'monospace', fontsize = 14, color = '#E29930')

for q in [a]:
    q.spines['bottom'].set_linewidth(1.5)
    for w in ['right', 'top', 'left']:
        q.spines[w].set_visible(False)
        
plt.subplot(212)
sns.set_style('white')
plt.title('60 days', size = 23, y = 1.03, fontname = 'monospace', x = 0.935)
plt.grid(color = 'gray', linestyle = ':', axis = 'x', alpha = 0.8, zorder = 0,  dashes = (1,7))
a = sns.kdeplot(top.query("installs < 100 & installs >= 10 & `growth (60 days)` < 100")['growth (60 days)'], 
                color = '#32384D', shade = True, label = 'Games with <100M installs', alpha = 0.7)
sns.kdeplot(top.query("installs >= 100 & `growth (60 days)` < 100")['growth (60 days)'], 
            color = '#E29930', shade = True, label = 'Games with 100M< installs', alpha = 0.7)
plt.ylabel('')
plt.xlabel('')
plt.xticks(fontname = 'monospace')
plt.yticks([])
plt.legend(frameon = False, loc = 'upper left', bbox_to_anchor = (0.01, 1.4))
a.text(92, 0.03, 'median 1.0%', ha = 'center', fontname = 'monospace', fontsize = 14, color = '#32384D')
a.text(92, 0.015, 'median 1.7%', ha = 'center', fontname = 'monospace', fontsize = 14, color = '#E29930')

for q in [a]:
    q.spines['bottom'].set_linewidth(1.2)
    for w in ['right', 'top', 'left']:
        q.spines[w].set_visible(False)
        
fig.tight_layout(pad = 3)
plt.figtext(0.3, 0.98, 'Dynamics of installs', fontsize = 30, fontname = 'monospace')
        
plt.show()

# End