In [1]:
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime
import pytz

# -----------------------------
# Load Dataset
# -----------------------------
apps_df = pd.read_csv("googleplaystore.csv")

# -----------------------------
# Data Cleaning
# -----------------------------

# Clean Installs
apps_df['Installs'] = apps_df['Installs'].str.replace('[+,]', '', regex=True)
apps_df['Installs'] = pd.to_numeric(apps_df['Installs'], errors='coerce')

# Clean Reviews
apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')

# Convert Last Updated to datetime
apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'], errors='coerce')

# Remove important null values
apps_df = apps_df.dropna(subset=['Category', 'Installs', 'Last Updated'])

# -----------------------------
# Apply Filters (FIX: Added .copy())
# -----------------------------

filtered_df = apps_df[
    (apps_df['Reviews'] > 500) &
    (apps_df['Category'].str.startswith(('E', 'C', 'B'), na=False)) &
    (~apps_df['App'].str.startswith(('X', 'Y', 'Z'), na=False)) &
    (~apps_df['App'].str.contains('S', case=False, na=False))
].copy()

# -----------------------------
# Create Monthly Aggregation (FIX: Use .loc)
# -----------------------------

filtered_df.loc[:, 'Month'] = filtered_df['Last Updated'].dt.to_period('M')

monthly_data = (
    filtered_df.groupby(['Month', 'Category'])['Installs']
    .sum()
    .reset_index()
)

monthly_data['Month'] = monthly_data['Month'].astype(str)

# -----------------------------
# Translate Category Names
# -----------------------------

translation_map = {
    "BEAUTY": "सौंदर्य",        # Hindi
    "BUSINESS": "வணிகம்",       # Tamil
    "DATING": "Partnersuche"    # German
}

monthly_data.loc[:, 'Category'] = monthly_data['Category'].replace(translation_map)

# -----------------------------
# Pivot for Time Series
# -----------------------------

pivot_df = monthly_data.pivot(
    index='Month',
    columns='Category',
    values='Installs'
).fillna(0)

# Sort months correctly
pivot_df = pivot_df.sort_index()

# -----------------------------
# Calculate Month-over-Month Growth
# -----------------------------

growth_df = pivot_df.pct_change()

# -----------------------------
# Time Restriction (6PM–9PM IST)
# -----------------------------

ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist)
current_hour = current_time.hour

if 18 <= current_hour < 21:

    fig = go.Figure()

    for category in pivot_df.columns:

        # Add line trace
        fig.add_trace(go.Scatter(
            x=pivot_df.index,
            y=pivot_df[category],
            mode='lines',
            name=category
        ))

        # Highlight only months where growth > 20%
        significant_months = growth_df.index[growth_df[category] > 0.20]

        for month in significant_months:
            fig.add_vline(
                x=month,
                line_width=2,
                line_dash="dash",
                line_color="red"
            )

    fig.update_layout(
        title="Trend of Total Installs Over Time by Category",
        xaxis_title="Month",
        yaxis_title="Total Installs",
        hovermode="x unified"
    )

    fig.show()

else:
    print("⛔ This Time Series Chart is available only between 6 PM and 9 PM IST.")


⛔ This Time Series Chart is available only between 6 PM and 9 PM IST.
