In [1]:
import pandas as pd
import plotly.express as px
import pytz
from datetime import datetime, time


In [2]:
df = pd.read_csv("googleplaystore.csv")


In [3]:
df['Reviews'] = pd.to_numeric(df['Reviews'], errors='coerce')

In [4]:
df['Installs'] = (
    df['Installs'].astype(str)
    .str.replace(r'\D', '', regex=True)
    .replace('', '0')
    .astype(float)
)


In [5]:
df['Size_MB'] = (
    df['Size']
    .replace('Varies with device', None)
    .str.replace('M', '', regex=False)
    .str.replace('k', '', regex=False)
)

df['Size_MB'] = pd.to_numeric(df['Size_MB'], errors='coerce')

In [6]:
df['Last Updated'] = pd.to_datetime(df['Last Updated'], errors='coerce')
df['Month'] = df['Last Updated'].dt.to_period("M").dt.to_timestamp()


In [7]:
filtered = df[
    (df['Rating'] >= 4.2) &
    (~df['App'].str.contains(r'\d', na=False)) &
    (df['Category'].str.startswith(('T', 'P'))) &
    (df['Reviews'] > 1000) &
    (df['Size_MB'].between(20, 80))
].copy()


In [8]:
translations = {
    "Travel & Local": "Voyage et local",   # French
    "Productivity": "Productividad",       # Spanish
    "Photography": "写真"                  # Japanese
}
filtered['Category'] = filtered['Category'].replace(translations)


In [9]:
monthly_installs = (
    filtered.groupby(['Month', 'Category'])['Installs']
    .sum()
    .reset_index()
)

In [10]:
monthly_installs['Prev'] = monthly_installs.groupby('Category')['Installs'].shift(1)
monthly_installs['Growth'] = (
    (monthly_installs['Installs'] - monthly_installs['Prev']) / monthly_installs['Prev']
) * 100
monthly_installs['Highlight'] = monthly_installs['Growth'] > 25


In [11]:
ist = pytz.timezone("Asia/Kolkata")
current_time = datetime.now(ist).time()
start_time = time(16, 0)  # 4 PM
end_time = time(18, 0)    # 6 PM

In [12]:
if start_time <= current_time <= end_time:
    # Create stacked area chart
    fig = px.area(
        monthly_installs,
        x="Month",
        y="Installs",
        color="Category",
        title="Cumulative Installs Over Time by Category (Stacked Area)",
    )

    # Highlight growth >25% with markers
    highlight_points = monthly_installs[monthly_installs['Highlight']]
    if not highlight_points.empty:
        fig.add_scatter(
            x=highlight_points['Month'],
            y=highlight_points['Installs'],
            mode='markers',
            marker=dict(size=10, color='red', opacity=0.8, symbol="star"),
            name=">25% Growth"
        )

    fig.show()
    
else:
    print("This chart is only visible between 4 PM and 6 PM IST.")


This chart is only visible between 4 PM and 6 PM IST.
