In [4]:
import pandas as pd
import plotly.express as px
from datetime import datetime
import pytz

# -----------------------------
# Load Dataset
# -----------------------------
apps_df = pd.read_csv("googleplaystore.csv")

# -----------------------------
# Data Cleaning
# -----------------------------

# Clean 'Installs' column
apps_df['Installs'] = apps_df['Installs'].str.replace('[+,]', '', regex=True)
apps_df['Installs'] = pd.to_numeric(apps_df['Installs'], errors='coerce')

# Remove null values
apps_df = apps_df.dropna(subset=['Category', 'Installs'])

# -----------------------------
# Apply Category Filters
# -----------------------------

# Remove categories starting with A, C, G, S
apps_df = apps_df[~apps_df['Category'].str.startswith(('A', 'C', 'G', 'S'))]

# -----------------------------
# Top 5 Categories by Installs
# -----------------------------

top_categories = (
    apps_df.groupby('Category')['Installs']
    .sum()
    .sort_values(ascending=False)
    .head(5)
)

top_df = top_categories.reset_index()

# Highlight condition (>1 Million installs)
top_df['Highlight'] = top_df['Installs'] > 1_000_000

# -----------------------------
# Time Condition (6PM–8PM IST)
# -----------------------------

ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist)
current_hour = current_time.hour

if 18 <= current_hour < 20:

    # -----------------------------
    # Bar Chart Visualization
    # -----------------------------
    
    fig = px.bar(
        top_df,
        x="Category",
        y="Installs",
        color="Installs",
        text="Installs",
        title="Top 5 Categories by Total Installs",
        color_continuous_scale="Blues"
    )

    fig.update_layout(
        title_x=0.5,
        xaxis_title="Category",
        yaxis_title="Total Installs"
    )

    fig.update_traces(textposition='outside')

    fig.show()

else:
    print("⛔ This visualization is available only between 6 PM and 8 PM IST.")
