In [1]:
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime
import pytz

# ---------------------------
# Load dataset
# ---------------------------
apps_df = pd.read_csv("googleplaystore.csv")

# ---------------------------
# Data Cleaning
# ---------------------------
apps_df['Installs'] = apps_df['Installs'].str.replace('[+,]', '', regex=True)
apps_df['Installs'] = pd.to_numeric(apps_df['Installs'], errors='coerce')

apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')

# Convert Last Updated to datetime
apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'], errors='coerce')

# ---------------------------
# Apply Filters
# ---------------------------
filtered_df = apps_df[
    (~apps_df['App'].str.startswith(('X','Y','Z'), na=False)) &
    (~apps_df['App'].str.contains('S', case=False, na=False)) &
    (apps_df['Category'].str.startswith(('E','C','B'))) &
    (apps_df['Reviews'] > 500)
].copy()

# ---------------------------
# Aggregate total installs per month
# ---------------------------
filtered_df['Month'] = filtered_df['Last Updated'].dt.to_period('M').dt.to_timestamp()

monthly_df = filtered_df.groupby(['Month','Category'])['Installs'].sum().reset_index()
monthly_df.sort_values(['Category','Month'], inplace=True)

# ---------------------------
# Month-over-month growth
# ---------------------------
monthly_df['MoM_Growth'] = monthly_df.groupby('Category')['Installs'].pct_change()
monthly_df['Highlight'] = monthly_df['MoM_Growth'].apply(lambda x: True if x is not None and x > 0.2 else False)

# ---------------------------
# Translate categories for display
# ---------------------------
translation_map = {
    'Beauty': 'ब्यूटी',        # Hindi
    'Business': 'வணிகம்',     # Tamil
    'Dating': 'Dating'         # German (replace if desired)
}

monthly_df['Category_Display'] = monthly_df['Category'].map(translation_map).fillna(monthly_df['Category'])

# ---------------------------
# Time Restriction (6 PM – 9 PM IST)
# ---------------------------
ist = pytz.timezone("Asia/Kolkata")
current_time = datetime.now(ist).time()

start_time = datetime.strptime("18:00","%H:%M").time()
end_time = datetime.strptime("21:00","%H:%M").time()

# ---------------------------
# Conditional Line Chart Rendering
# ---------------------------
if start_time <= current_time <= end_time:

    fig = go.Figure()

    categories = monthly_df['Category_Display'].unique()

    for cat in categories:
        cat_df = monthly_df[monthly_df['Category_Display']==cat]

        # Plot line for installs
        fig.add_trace(go.Scatter(
            x=cat_df['Month'],
            y=cat_df['Installs'],
            mode='lines+markers',
            name=cat,
            line=dict(width=2),
        ))

        # Highlight periods with MoM growth > 20%
        highlight_df = cat_df[cat_df['Highlight']]
        if not highlight_df.empty:
            fig.add_trace(go.Scatter(
                x=highlight_df['Month'],
                y=highlight_df['Installs'],
                fill='tozeroy',
                mode='none',
                showlegend=False,
                fillcolor='rgba(255,0,0,0.3)'  # semi-transparent red
            ))

    fig.update_layout(
        title='Total App Installs Over Time by Category',
        xaxis_title='Month',
        yaxis_title='Total Installs',
        hovermode='x unified'
    )

    fig.show()

else:
    print("Time series chart available only between 6:00 PM and 9:00 PM IST.")


Time series chart available only between 6:00 PM and 9:00 PM IST.
