Check whether your file exist in Jupyter Notebook 

In [None]:
import os
os.listdir()

['lec40-checkpoint.ipynb',
 'Play Store Data.csv',
 'Task1-checkpoint.ipynb',
 'Untitled-checkpoint.ipynb',
 'untitled-checkpoint.txt',
 'Untitled1-checkpoint.ipynb',
 'Untitled2-checkpoint.ipynb',
 'Untitled3-checkpoint.ipynb',
 'Untitled4-checkpoint.ipynb',
 'Untitled5-checkpoint.ipynb',
 'Untitled6-checkpoint.ipynb']

Importing all Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import pytz

Load the Dataset 

In [3]:
# Step 1: Load the Dataset
apps_df = pd.read_csv('Play Store Data.csv')

In [5]:
apps_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10841 entries, 0 to 10840
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   App             10841 non-null  object 
 1   Category        10841 non-null  object 
 2   Rating          9367 non-null   float64
 3   Reviews         10841 non-null  object 
 4   Size            10841 non-null  object 
 5   Installs        10841 non-null  object 
 6   Type            10840 non-null  object 
 7   Price           10841 non-null  object 
 8   Content Rating  10840 non-null  object 
 9   Genres          10841 non-null  object 
 10  Last Updated    10841 non-null  object 
 11  Current Ver     10833 non-null  object 
 12  Android Ver     10838 non-null  object 
dtypes: float64(1), object(12)
memory usage: 1.1+ MB


In [7]:
apps_df.describe()

Unnamed: 0,Rating
count,9367.0
mean,4.193338
std,0.537431
min,1.0
25%,4.0
50%,4.3
75%,4.5
max,19.0


Data Cleaning and Transformation

In [10]:
# Convert Size to Size_MB
def convert_size(size):
    if isinstance(size, str):
        if size.endswith('M'):
            return float(size.replace('M',''))
        elif size.endswith('k'):
            return float(size.replace('k',''))/1024
    return np.nan

apps_df['Size_MB'] = apps_df['Size'].apply(convert_size)


Convert Installs into Numeric 

In [11]:
apps_df['Installs'] = apps_df['Installs'].str.replace('[+,]','', regex=True)
apps_df['Installs'] = pd.to_numeric(apps_df['Installs'], errors='coerce')

Convert Reviews into Numeric 

In [12]:
apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')

Convert Last Updated to Date

In [None]:
apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'], errors='coerce')

Applying Filters 

In [15]:
apps_df_filtered = apps_df[
    (apps_df['Rating'] >= 4) &
    (apps_df['Size_MB'] >= 10) &
    (apps_df['Last Updated'].dt.month == 1)
]

Get Top 10 Categories by Total Installs

In [16]:
top10_categories = (
    apps_df_filtered
    .groupby('Category')['Installs']
    .sum()
    .sort_values(ascending=False)
    .head(10)
)

top10_categories


Category
FAMILY             182494820.0
SPORTS             120511000.0
GAME               115691000.0
ENTERTAINMENT       51000000.0
PERSONALIZATION     15060000.0
PHOTOGRAPHY         10500000.0
LIFESTYLE            5071000.0
EDUCATION            2000000.0
SHOPPING             2000000.0
TOOLS                1010000.0
Name: Installs, dtype: float64

Aggregate Metrics for Visualization

In [25]:
final_df = apps_df_filtered[apps_df_filtered['Category'].isin(top10_categories.index)]

summary = (
    final_df
    .groupby('Category')
    .agg({
        'Rating':'mean',
        'Reviews':'sum'
    })
    .reset_index()
)

summary


Unnamed: 0,Category,Rating,Reviews
0,EDUCATION,4.4,57645.0
1,ENTERTAINMENT,4.25,1238948.0
2,FAMILY,4.395455,4544623.0
3,GAME,4.313333,2397589.0
4,LIFESTYLE,4.38,42809.0
5,PERSONALIZATION,4.475,155996.0
6,PHOTOGRAPHY,4.15,563720.0
7,SHOPPING,4.2,19950.0
8,SPORTS,4.342857,1982017.0
9,TOOLS,4.2,8010.0


Time-Based Logic (3 PM – 5 PM IST)

In [26]:
ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist).time()

start = datetime.strptime("15:00", "%H:%M").time()
end   = datetime.strptime("17:00", "%H:%M").time()

show_chart = start <= current_time <= end

print("Current IST time:", current_time)
print("Show Chart:", show_chart)


Current IST time: 20:13:28.431080
Show Chart: False


Plot Grouped Bar Chart


In [None]:
if show_chart:
    x = np.arange(len(summary))
    width = 0.35

    plt.figure(figsize=(12,6))
    plt.bar(x - width/2, summary['Rating'], width, label='Avg Rating')
    plt.bar(x + width/2, summary['Reviews']/1e6, width, label='Total Reviews (Millions)')

    plt.xticks(x, summary['Category'], rotation=45)
    plt.xlabel("App Category")
    plt.ylabel("Values")
    plt.title("Top 10 Categories: Avg Rating vs Total Reviews")
    plt.legend()
    plt.tight_layout()
    plt.show()
else:
    print("Chart visible only between 3 PM – 5 PM IST")


Chart visible only between 3 PM – 5 PM IST
