In [1]:
import pandas as pd
import plotly.express as px
from datetime import datetime
import pytz
import re

# ---------------------------
# Load dataset
# ---------------------------
apps_df = pd.read_csv("googleplaystore.csv")

# ---------------------------
# Data Cleaning
# ---------------------------
apps_df['Installs'] = apps_df['Installs'].str.replace('[+,]', '', regex=True)
apps_df['Installs'] = pd.to_numeric(apps_df['Installs'], errors='coerce')

apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')
apps_df['Rating'] = pd.to_numeric(apps_df['Rating'], errors='coerce')

# Convert Size to MB
def convert_size(size):
    if 'M' in str(size):
        return float(size.replace('M', ''))
    elif 'k' in str(size):
        return float(size.replace('k', '')) / 1024
    else:
        return None

apps_df['Size_MB'] = apps_df['Size'].apply(convert_size)

# Convert Last Updated to datetime
apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'], errors='coerce')

# ---------------------------
# Apply Filters
# ---------------------------
filtered_df = apps_df[
    (apps_df['Rating'] >= 4.2) &
    (~apps_df['App'].str.contains(r'\d', regex=True)) &
    (apps_df['Category'].str.startswith(('T', 'P'))) &
    (apps_df['Reviews'] > 1000) &
    (apps_df['Size_MB'].between(20, 80))
].copy()

# ---------------------------
# Aggregate installs per month
# ---------------------------
filtered_df['Month'] = filtered_df['Last Updated'].dt.to_period('M').dt.to_timestamp()
monthly_df = filtered_df.groupby(['Month', 'Category'])['Installs'].sum().reset_index()

# ---------------------------
# Calculate month-over-month growth
# ---------------------------
monthly_df.sort_values(['Category','Month'], inplace=True)
monthly_df['MoM_Growth'] = monthly_df.groupby('Category')['Installs'].pct_change()
# Highlight months where growth > 25%
monthly_df['Highlight'] = monthly_df['MoM_Growth'].apply(lambda x: True if x is not None and x > 0.25 else False)

# ---------------------------
# Translate legend categories
# ---------------------------
translation_map = {
    "Travel & Local": "Voyage & Local",    # French
    "Productivity": "Productividad",       # Spanish
    "Photography": "写真"                  # Japanese
}

monthly_df['Category_Display'] = monthly_df['Category'].map(translation_map).fillna(monthly_df['Category'])

# ---------------------------
# Time Restriction (4 PM – 6 PM IST)
# ---------------------------
ist = pytz.timezone("Asia/Kolkata")
current_time = datetime.now(ist).time()

start_time = datetime.strptime("16:00", "%H:%M").time()
end_time = datetime.strptime("18:00", "%H:%M").time()

# ---------------------------
# Conditional Display
# ---------------------------
if start_time <= current_time <= end_time:

    # Define color scale
    colors = px.colors.qualitative.Set3

    # Increase intensity for highlighted months
    def color_mapper(row):
        idx = list(monthly_df['Category_Display'].unique()).index(row['Category_Display'])
        base_color = colors[idx % len(colors)]
        if row['Highlight']:
            # Darken color for highlight
            return 'rgba(0,0,0,0.6)'  # example darker overlay, can be replaced with any intense color
        else:
            return base_color

    monthly_df['Color'] = monthly_df.apply(color_mapper, axis=1)

    # Stacked area chart
    fig = px.area(
        monthly_df,
        x='Month',
        y='Installs',
        color='Category_Display',
        line_group='Category_Display',
        labels={'Installs': 'Cumulative Installs', 'Month': 'Month', 'Category_Display': 'Category'},
        title='Cumulative App Installs Over Time by Category'
    )

    # Optionally, update colors
    fig.update_traces(marker=dict(color=monthly_df['Color']))

    fig.show()

else:
    print("Stacked area chart available only between 4:00 PM and 6:00 PM IST.")


Stacked area chart available only between 4:00 PM and 6:00 PM IST.
