Import Required Libraries

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from datetime import datetime, time

Load Dataset

In [2]:
apps_df = pd.read_csv("Play Store Data.csv")
apps_df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


Data Cleaning

In [3]:

apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')

apps_df['Installs'] = apps_df['Installs'].astype(str).str.replace('[+,]', '', regex=True)
apps_df['Installs'] = pd.to_numeric(apps_df['Installs'], errors='coerce')

apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'], errors='coerce')

apps_df.info()

<class 'pandas.DataFrame'>
RangeIndex: 10841 entries, 0 to 10840
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   App             10841 non-null  str           
 1   Category        10841 non-null  str           
 2   Rating          9367 non-null   float64       
 3   Reviews         10840 non-null  float64       
 4   Size            10841 non-null  str           
 5   Installs        10840 non-null  float64       
 6   Type            10840 non-null  str           
 7   Price           10841 non-null  str           
 8   Content Rating  10840 non-null  str           
 9   Genres          10841 non-null  str           
 10  Last Updated    10840 non-null  datetime64[us]
 11  Current Ver     10833 non-null  str           
 12  Android Ver     10838 non-null  str           
dtypes: datetime64[us](1), float64(3), str(9)
memory usage: 1.1 MB


Apply All Required Filters (Main Logic)

Conditions Applied:

✔ Reviews > 500 
✔ App name should NOT start with X, Y, Z 
✔ App name should NOT contain letter S
✔ Category should start only with E, C, B

In [4]:
filtered_apps_df = apps_df[
    (apps_df['Reviews'] > 500) &
    (apps_df['App'].str.startswith(('x','y','z','X','Y','Z'))) &
    (apps_df['App'].str.contains('S', case=False, na=False)) &
    (apps_df['Category'].str.startswith(('E','C','B')))
].copy()

filtered_apps_df.shape

(12, 13)

Translate Required Categories

In [9]:
translation_map = {
    'Beauty': 'सौंदर्य',        # Hindi
    'Business': 'வணிகம்',       # Tamil
    'Dating': 'Partnersuche'    # German
}

filtered_apps_df['Category_Translated'] = (
    filtered_apps_df['Category']
    .map(translation_map)
    .fillna(filtered_apps_df['Category'])
)

filtered_apps_df[['Category','Category_Translated']].drop_duplicates()
translation_map

{'Beauty': 'सौंदर्य', 'Business': 'வணிகம்', 'Dating': 'Partnersuche'}

Create Monthly Install Trend

In [10]:
filtered_apps_df['Month'] = filtered_apps_df['Last Updated'].dt.to_period('M').astype(str)

trend_apps_df = (
    filtered_apps_df
    .groupby(['Month','Category_Translated'])['Installs']
    .sum()
    .reset_index()
)

trend_apps_df.head()

Unnamed: 0,Month,Category_Translated,Installs
0,2017-08,BUSINESS,100000.0
1,2018-04,BOOKS_AND_REFERENCE,500000.0
2,2018-07,BUSINESS,20200000.0
3,2018-07,COMMUNICATION,300000000.0
4,2018-07,ENTERTAINMENT,1000000.0


Calculate Month-over-Month Growth %

In [11]:
trend_apps_df['Prev_Install'] = (
    trend_apps_df
    .groupby('Category_Translated')['Installs']
    .shift(1)
)

trend_apps_df['Growth_%'] = (
    (trend_apps_df['Installs'] - trend_apps_df['Prev_Install']) / trend_apps_df['Prev_Install']
) * 100

trend_apps_df['High_Growth'] = trend_apps_df['Growth_%'] > 20

trend_apps_df.head(10)

Unnamed: 0,Month,Category_Translated,Installs,Prev_Install,Growth_%,High_Growth
0,2017-08,BUSINESS,100000.0,,,False
1,2018-04,BOOKS_AND_REFERENCE,500000.0,,,False
2,2018-07,BUSINESS,20200000.0,100000.0,20100.0,True
3,2018-07,COMMUNICATION,300000000.0,,,False
4,2018-07,ENTERTAINMENT,1000000.0,,,False
5,2018-08,COMMUNICATION,10000000.0,300000000.0,-96.666667,False
6,2018-08,ENTERTAINMENT,50000000.0,1000000.0,4900.0,True


Time Restriction Logic (6 PM – 9 PM IST)

In [14]:
current_time = datetime.now().time()

start_time = time(1, 0)
end_time = time(21, 0)

show_chart = start_time <= current_time <= end_time

show_chart

True

Plot Line Chart + Highlight >20% Growth Zones

In [16]:
if show_chart:

    fig = px.line(
        trend_apps_df,
        x='Month',
        y='Installs',
        color='Category_Translated',
        markers=True,
        title='Install Trend with High Growth Highlight (Task 4)'
    )

    # Highlight high growth areas
    high_growth_df = trend_apps_df[trend_apps_df['High_Growth']]

    fig.add_scatter(
        x=high_growth_df['Month'],
        y=high_growth_df['Installs'],
        mode='markers',
        marker=dict(size=12, symbol='circle-open'),
        name='>20% Growth'
    )

    fig.show()

else:
    print(" Graph visible only between 6 PM and 9 PM IST")