In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from datetime import datetime
import pytz

# -----------------------------
# LOAD DATA
# -----------------------------
df = pd.read_csv("Play Store Data.csv")   # change filename if needed

# -----------------------------
# DATA CLEANING
# -----------------------------
df['Last Updated'] = pd.to_datetime(df['Last Updated'], errors='coerce')
df['Reviews'] = pd.to_numeric(df['Reviews'], errors='coerce')

# Clean Installs column (remove + and ,)
df['Installs'] = (
    df['Installs']
    .astype(str)
    .str.replace('+','')
    .str.replace(',','')
)
df['Installs'] = pd.to_numeric(df['Installs'], errors='coerce')

df = df.dropna(subset=['Last Updated','Installs','Reviews'])

# -----------------------------
# FILTER CONDITIONS
# -----------------------------

# category starts with E, C, B
df = df[df['Category'].str.startswith(('E','C','B'), na=False)]

# app name not starting with x y z
df = df[~df['App'].str.lower().str.startswith(('x','y','z'))]

# app name should NOT contain letter "s"
df = df[~df['App'].str.contains('s', case=False, na=False)]

# reviews more than 500
df = df[df['Reviews'] > 500]

# -----------------------------
# CATEGORY TRANSLATION
# -----------------------------
category_map = {
    "BEAUTY": "सौंदर्य",      # Hindi
    "BUSINESS": "வணிகம்",     # Tamil
    "DATING": "Dating_DE"     # German placeholder
}

df['Category'] = df['Category'].replace(category_map)

# -----------------------------
# TIME SERIES PREPARATION
# -----------------------------
df['Month'] = df['Last Updated'].dt.to_period('M').dt.to_timestamp()

monthly = df.groupby(['Month','Category'])['Installs'].sum().reset_index()
monthly = monthly.sort_values('Month')

# -----------------------------
# MONTH OVER MONTH GROWTH
# -----------------------------
monthly['MoM_Growth'] = monthly.groupby('Category')['Installs'].pct_change()

# growth > 20%
growth_df = monthly[monthly['MoM_Growth'] > 0.20]

# -----------------------------
# TIME WINDOW CHECK (6–9 PM IST)
# -----------------------------
ist = pytz.timezone("Asia/Kolkata")
current_time = datetime.now(ist)

if not (18 <= current_time.hour < 21):
    print("Graph hidden — only visible between 6 PM and 9 PM IST")

else:
    # -----------------------------
    # PLOT
    # -----------------------------
    fig = px.line(
        monthly,
        x="Month",
        y="Installs",
        color="Category",
        title="Install Trend by Category"
    )

    # highlight >20% growth
    for _, row in growth_df.iterrows():
        fig.add_vrect(
            x0=row['Month'],
            x1=row['Month'],
            line_width=0,
            fillcolor="green",
            opacity=0.25
        )

    fig.show()


Graph hidden — only visible between 6 PM and 9 PM IST
