In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**1. Import the Required Libraries**

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

**2. Look for the basic information about the dataset**

In [None]:
data = pd.read_csv("../input/top-play-store-games/android-games.csv")
data.head()

**3. Knowing about the columns in the dataset**

In [None]:
pd.DataFrame({"Columns": data.columns})

In [None]:
print("Data shape: ", data.shape)

**4. Dataset Info**

In [None]:
data.info()

In [None]:
data.describe()

**5. Checking of the dataset, whether it is clean or not**

In [None]:
data.isnull().sum()

In [None]:
data['installs'].value_counts()

**6. Checking whether there is any relationship between paid and price**

In [None]:
data['price'].value_counts()

In [None]:
data['paid'].value_counts()

**7. Dropping prices as most of the games are free**

In [None]:
data.drop('price', axis=1, inplace=True)

In [None]:
data.info()

**8. Game Categories**

In [None]:
data['category'].value_counts(normalize=True)

In [None]:
plt.figure(figsize = (30, 5))
sns.countplot(data.category)
plt.title("Number of Games by category")

**9. Total Ratings**

In [None]:
data['total ratings'].describe()

In [None]:
data = data.rename(columns={'total ratings':'Total_ratings'})

In [None]:
plt.figure(figsize = (20, 2))
sns.displot(data.Total_ratings)
plt.title("Total Ratings of the Games")

**10. Making installs into numerical values**

In [None]:
def in_thousand (inst):
    if inst == '500.0 k':
        return '0.5 M' 
    elif inst == '100.0 k':
        return '0.1 M'
    else:
        return inst
data['installs']= data['installs'].apply(in_thousand)

data['installs']= data['installs'].str.replace( 'M', '').str.strip().astype('float')

data= data.rename(columns={'installs': 'installs_in_million'})
data['installs_in_million'].value_counts()

In [None]:
data['installs_in_million'].describe

In [None]:
plt.figure(figsize = (10, 5))
sns.displot(data.installs_in_million)
plt.title("Number of Game Install in Millions")

**11. Paid and free games**

In [None]:
data['paid'].value_counts(normalize=True)

In [None]:
paid_free= data['paid'].value_counts()
label =['Free','Paid']
fig = px.pie(paid_free, values=data['paid'].value_counts().values, names=label,
             title='Paid & Free Games')
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

**12.Total Ratings by Category**

In [None]:
total_ratings_by_category = data.groupby('category')['Total_ratings'].mean()
total_ratings_by_category

In [None]:
fig = px.bar(total_ratings_by_category, x= total_ratings_by_category.index, y=total_ratings_by_category.values, labels={'y':'Total_Ratings'})
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

**13.Number of Game Installations by Game Category**

In [None]:
install_by_category = data.groupby('category')['installs_in_million'].mean()
install_by_category

In [None]:
fig = px.bar(install_by_category, x= install_by_category.index, y=install_by_category.values, labels={'y':'Install in Millions'})
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

In [None]:
growth_by_category_30 = data.groupby('category')['growth (30 days)'].mean()
growth_by_category_30

In [None]:
fig = px.bar(growth_by_category_30, x= growth_by_category_30.index, y=growth_by_category_30, labels={'y':'Growth in 30 days'})
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

In [None]:
growth_by_category_60 = data.groupby('category')['growth (60 days)'].mean()
growth_by_category_60

In [None]:
fig = px.bar(growth_by_category_60, x= growth_by_category_60.index, y=growth_by_category_60, labels={'y':'Growth in 60 days'})
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

**14.Top 3 Ranked Games by Category**

In [None]:
top_ranked_games = data[data['rank']<4][['rank','title','category', 'Total_ratings', 'installs_in_million', '5 star ratings']]
top_ranked_games

**15.Top 3 Games by Category and Their Total Ratings**

In [None]:
fig = px.scatter(top_ranked_games, y= 'title', x='Total_ratings', 
                 hover_data = top_ranked_games[['category','rank']], color='category', 
                 title = "Top 3 Games by Their Total Ratings")
fig.show()

In [None]:
top_20 = data.sort_values(by='installs_in_million', ascending=False).head(20)
top_20

In [None]:
fig = px.bar(top_20, x= 'title', y='installs_in_million', hover_data = top_20[['5 star ratings']], color='category')
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()