In [1]:
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime
import pytz
import re

# -----------------------------
# Load Dataset
# -----------------------------
apps_df = pd.read_csv("googleplaystore.csv")

# -----------------------------
# Data Cleaning
# -----------------------------

# Clean Installs
apps_df['Installs'] = apps_df['Installs'].str.replace('[+,]', '', regex=True)
apps_df['Installs'] = pd.to_numeric(apps_df['Installs'], errors='coerce')

# Clean Reviews
apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')

# Clean Rating
apps_df['Rating'] = pd.to_numeric(apps_df['Rating'], errors='coerce')

# Clean Price
apps_df['Price'] = apps_df['Price'].str.replace('$', '', regex=False)
apps_df['Price'] = pd.to_numeric(apps_df['Price'], errors='coerce')

# Calculate Revenue
apps_df['Revenue'] = apps_df['Installs'] * apps_df['Price']

# Clean Size (convert to MB)
def convert_size(size):
    if isinstance(size, str):
        if 'M' in size:
            return float(size.replace('M', ''))
        elif 'k' in size:
            return float(size.replace('k', '')) / 1024
    return None

apps_df['Size'] = apps_df['Size'].apply(convert_size)

# Clean Android Version
apps_df['Android Ver'] = apps_df['Android Ver'].str.extract(r'(\d+\.?\d*)')
apps_df['Android Ver'] = pd.to_numeric(apps_df['Android Ver'], errors='coerce')

# Remove app names > 30 characters
apps_df = apps_df[apps_df['App'].str.len() <= 30]

# -----------------------------
# Apply Filters
# -----------------------------

filtered_df = apps_df[
    (apps_df['Installs'] >= 10000) &
    (apps_df['Revenue'] >= 10000) &
    (apps_df['Android Ver'] > 4.0) &
    (apps_df['Size'] > 15) &
    (apps_df['Content Rating'] == 'Everyone')
]

# -----------------------------
# Top 3 Categories by Installs
# -----------------------------

top_categories = (
    filtered_df.groupby('Category')['Installs']
    .sum()
    .sort_values(ascending=False)
    .head(3)
    .index
)

final_df = filtered_df[filtered_df['Category'].isin(top_categories)]

# -----------------------------
# Aggregation (Free vs Paid)
# -----------------------------

grouped_data = final_df.groupby(['Category', 'Type']).agg({
    'Installs': 'mean',
    'Revenue': 'sum'
}).reset_index()

# -----------------------------
# Time Restriction (1PM–2PM IST)
# -----------------------------

ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist)
current_hour = current_time.hour

if 13 <= current_hour < 14:

    fig = go.Figure()

    # Average Installs (Primary Y Axis)
    fig.add_trace(go.Bar(
        x=grouped_data['Category'] + " - " + grouped_data['Type'],
        y=grouped_data['Installs'],
        name="Average Installs",
        yaxis='y1'
    ))

    # Revenue (Secondary Y Axis)
    fig.add_trace(go.Scatter(
        x=grouped_data['Category'] + " - " + grouped_data['Type'],
        y=grouped_data['Revenue'],
        name="Revenue",
        yaxis='y2',
        mode='lines+markers'
    ))

    fig.update_layout(
        title="Avg Installs vs Revenue (Free vs Paid) - Top 3 Categories",
        xaxis_title="Category - App Type",
        yaxis=dict(title="Average Installs"),
        yaxis2=dict(
            title="Revenue ($)",
            overlaying='y',
            side='right'
        ),
        legend=dict(x=0.01, y=0.99)
    )

    fig.show()

else:
    print("⛔ This graph is available only between 1 PM and 2 PM IST.")


⛔ This graph is available only between 1 PM and 2 PM IST.
