Importing Libraries 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import datetime as dt
from datetime import datetime, time
from IPython.display import display


Load Dataset

In [2]:
apps_df = pd.read_csv("Play Store Data.csv")
apps_df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite ‚Äì FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


Data Cleaning

In [3]:
# Clean Installs
apps_df['Installs'] = (
    apps_df['Installs']
    .astype(str)
    .str.replace('[+,]', '', regex=True)
)
apps_df['Installs'] = pd.to_numeric(apps_df['Installs'], errors='coerce')

# Clean Reviews
apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')

# Clean Rating
apps_df['Rating'] = pd.to_numeric(apps_df['Rating'], errors='coerce')

# Clean Size (MB)
apps_df['Size_MB'] = (
    apps_df['Size']
    .astype(str)
    .str.replace('M', '', regex=False)
    .str.replace('k', '', regex=False)
)
apps_df['Size_MB'] = pd.to_numeric(apps_df['Size_MB'], errors='coerce')

# Create clean dataset for plotting
plot_df = apps_df.dropna(subset=['Installs','Rating','Reviews','Size_MB'])

plot_df.shape

(7729, 14)

Sentiment Subjectivity (Python NLP)

In [4]:
%pip install textblob

Note: you may need to restart the kernel to use updated packages.


In [5]:
from textblob import TextBlob

In [6]:
from textblob import TextBlob

apps_df['Sentiment_Subjectivity'] = apps_df['App'].astype(str).apply(
    lambda x: TextBlob(x).sentiment.subjectivity
)

Category Translation

In [7]:
translations = {
    "Beauty": "‡§∏‡•Å‡§Ç‡§¶‡§∞‡§§‡§æ",
    "Business": "‡Æµ‡Æ£‡Æø‡Æï‡ÆÆ‡Øç",
    "Dating": "Verabredung"
}

apps_df['Translated_Category'] = apps_df['Category'].replace(translations)

Apply All Business Filters

In [8]:
apps_df_filtered = apps_df[
    (apps_df['Rating'] > 3.5) &
    (apps_df['Installs'] > 50000) &
    (apps_df['Reviews'] > 500) &
    (apps_df['App'].str.contains("S", case=False, na=False)) &
    (apps_df['Sentiment_Subjectivity'] > 0.5) &
    (apps_df['Category'].isin(['Game','Beauty','Business','Comics','Communication','Dating','Entertainment','Social','Events']))
]

Time Based Visibility (5 PM ‚Äì 7 PM IST)

In [9]:
from datetime import datetime
import pytz

ist = pytz.timezone('Asia/Kolkata')
now = datetime.now(ist).time()

show_chart = (now.hour >= 1) and (now.hour < 19)

show_chart

True

In [19]:
import plotly.express as px

color_map = {
    "GAME": "deeppink"
}

fig = px.scatter(
    plot_df.sample(800),
    x="Size_MB",
    y="Rating",
    size="Installs",
    color="Category",
    hover_name="App",
    size_max=60,
    opacity=0.75,
    color_discrete_map=color_map,
    title="App Size vs Rating Bubble Chart"
)

fig.update_layout(template="plotly_white", title_x=0.5)
fig.show()