In [1]:
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime
import pytz
import re

# -----------------------------
# Load Dataset
# -----------------------------
apps_df = pd.read_csv("googleplaystore.csv")

# -----------------------------
# Data Cleaning
# -----------------------------

# Clean Installs
apps_df['Installs'] = apps_df['Installs'].str.replace('[+,]', '', regex=True)
apps_df['Installs'] = pd.to_numeric(apps_df['Installs'], errors='coerce')

# Clean Rating
apps_df['Rating'] = pd.to_numeric(apps_df['Rating'], errors='coerce')

# Clean Reviews
apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')

# Clean Size (convert to MB)
def convert_size(size):
    if isinstance(size, str):
        if 'M' in size:
            return float(size.replace('M', ''))
        elif 'k' in size:
            return float(size.replace('k', '')) / 1024
    return None

apps_df['Size'] = apps_df['Size'].apply(convert_size)

# Convert Last Updated to datetime
apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'], errors='coerce')

# -----------------------------
# Apply Filters (FIX: Added .copy())
# -----------------------------

filtered_df = apps_df[
    (apps_df['Rating'] >= 4.2) &
    (apps_df['Reviews'] > 1000) &
    (apps_df['Size'].between(20, 80)) &
    (apps_df['Category'].str.startswith(('T', 'P'), na=False)) &
    (~apps_df['App'].str.contains(r'\d', regex=True, na=False))
].copy()

# -----------------------------
# Create Monthly Time Series (FIX: Use .loc)
# -----------------------------

filtered_df.loc[:, 'Month'] = filtered_df['Last Updated'].dt.to_period('M')

monthly_data = (
    filtered_df.groupby(['Month', 'Category'])['Installs']
    .sum()
    .reset_index()
)

monthly_data['Month'] = monthly_data['Month'].astype(str)

# Pivot for stacked area
pivot_df = monthly_data.pivot(
    index='Month',
    columns='Category',
    values='Installs'
).fillna(0)

# Sort months properly
pivot_df = pivot_df.sort_index()

# Calculate cumulative installs
cumulative_df = pivot_df.cumsum()

# -----------------------------
# Translate Legend Labels
# -----------------------------

translation_map = {
    "Travel & Local": "Voyage et Local",   # French
    "Productivity": "Productividad",       # Spanish
    "Photography": "写真"                  # Japanese
}

cumulative_df.rename(columns=translation_map, inplace=True)

# -----------------------------
# Calculate Month-over-Month Growth
# -----------------------------

growth_df = pivot_df.pct_change()

# Identify months where ANY category growth > 25%
highlight_months = growth_df[(growth_df > 0.25).any(axis=1)].index.tolist()

# -----------------------------
# Time Restriction (4PM–6PM IST)
# -----------------------------

ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist)
current_hour = current_time.hour

if 16 <= current_hour < 18:

    fig = go.Figure()

    for category in cumulative_df.columns:

        fig.add_trace(go.Scatter(
            x=cumulative_df.index,
            y=cumulative_df[category],
            mode='lines',
            stackgroup='one',
            name=category,
            opacity=0.7
        ))

    # Highlight high-growth months with vertical lines
    for month in highlight_months:
        fig.add_vline(
            x=month,
            line_width=2,
            line_dash="dash",
            line_color="red"
        )

    fig.update_layout(
        title="Cumulative Installs Over Time by Category",
        xaxis_title="Month",
        yaxis_title="Cumulative Installs",
        hovermode="x unified"
    )

    fig.show()

else:
    print("⛔ This stacked area chart is available only between 4 PM to 6 PM IST.")


⛔ This stacked area chart is available only between 4 PM to 6 PM IST.
