In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
from datetime import datetime
import os
import pytz

In [2]:
df = pd.read_csv("googleplaystore.csv")


In [3]:
def convert_size(size):

    if 'M' in size:
        return float(size.replace('M', '')) 
    elif 'k' in size or 'K' in size:
        return float(size.replace('k', '').replace('K', ''))/1024
    else:
    
        return np.nan


In [4]:
df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [5]:
df = df.dropna(subset=['Rating'])
for column in df.columns:
    df[column].fillna(df[column].mode()[0], inplace=True)
df.drop_duplicates(inplace=True)
df = df[df['Rating'] <= 5]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[column].fillna(df[column].mode()[0], inplace=True)


In [6]:
# Step 3: Data Transformation
df['Reviews'] = df['Reviews'].astype(int)
df['Installs'] = df['Installs'].str.replace(',', '').str.replace('+', '').astype(int)
df['Price'] = df['Price'].str.replace('$', '').astype(float)

In [7]:
def convert_size(size):
    if 'M' in size:
        return float(size.replace('M', ''))
    elif 'k' in size:
        return float(size.replace('k', '')) / 1024
    else:
        return np.nan

df['Size'] =df['Size'].apply(convert_size)

In [8]:
# Extract year from 'Last Updated' and create 'Year' column
df['Last Updated'] = pd.to_datetime(df['Last Updated'], errors='coerce')
df['Year'] = df['Last Updated'].dt.year

In [9]:
df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Year
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,10000,Free,0.0,Everyone,Art & Design,2018-01-07,1.0.0,4.0.3 and up,2018
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,2018-01-15,2.0.0,4.0.3 and up,2018
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,5000000,Free,0.0,Everyone,Art & Design,2018-08-01,1.2.4,4.0.3 and up,2018
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,50000000,Free,0.0,Teen,Art & Design,2018-06-08,Varies with device,4.2 and up,2018
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,100000,Free,0.0,Everyone,Art & Design;Creativity,2018-06-20,1.1,4.4 and up,2018


In [10]:
df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')
df['Reviews'] = pd.to_numeric(df['Reviews'], errors='coerce')
df['Installs'] = pd.to_numeric(df['Installs'], errors='coerce')


In [11]:
# Filter: size ≥ 10 MB
df = df[df['Size'] >= 10_000_000]

In [12]:
# Filter: average rating ≥ 4.0 (apply later per category)
# Filter: Last Updated month = January
df = df[df['Last Updated'].dt.month == 1]

In [13]:
category_group = df.groupby('Category').agg({
    'Rating': 'mean',
    'Reviews': 'sum',
    'Installs': 'sum'
}).reset_index()

In [14]:
# Keep only categories with avg rating ≥ 4.0
category_group = category_group[category_group['Rating'] >= 4.0]

In [16]:
# Select Top 10 categories by installs
main_df= category_group.sort_values('Installs', ascending=False).head(10)


In [18]:
# Get current IST time  ( Time Restriction (3PM - 5PM IST))
ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist)

start_time = current_time.replace(hour=15, minute=0, second=0, microsecond=0)
end_time   = current_time.replace(hour=17, minute=0, second=0, microsecond=0)


In [19]:

if start_time <= current_time <= end_time:
    # Reshape data for px.bar (long format)
    main_df = main_df.melt(
        id_vars='Category',
        value_vars=['Rating', 'Reviews'],
        var_name='Metric',
        value_name='Value'
    )

    # Category Analysis Plot (fig1 using px but working like fig)
    fig1 = px.bar(
        main_df,
        x='Category',
        y='Value',
        color='Metric',
        barmode='group',
        title='Top 10 App Categories by Installs (Filtered & Time Restricted)',
        width=plot_width,
        height=plot_height,
        color_discrete_sequence=px.colors.sequential.Plasma
    )

    fig1.update_layout(
        plot_bgcolor=plot_bg_color,
        paper_bgcolor=plot_bg_color,
        font_color=text_color,
        title_font=title_font,
        xaxis=dict(title_font=axis_font),
        yaxis=dict(title_font=axis_font),
        margin=dict(l=10, r=10, t=30, b=10)
    )

    fig1.update_traces(marker=dict(line=dict(color=text_color, width=1)))

    fig1.show()

else:
    print(" Chart is hidden. Visible only between 3 PM - 5 PM IST.")


⏰ Chart is hidden. Visible only between 3 PM - 5 PM IST.
