In [1]:
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime
import pytz
import re

# -----------------------------
# Load Dataset
# -----------------------------
apps_df = pd.read_csv("googleplaystore.csv")

# -----------------------------
# Data Cleaning
# -----------------------------

# Clean Installs
apps_df['Installs'] = apps_df['Installs'].str.replace('[+,]', '', regex=True)
apps_df['Installs'] = pd.to_numeric(apps_df['Installs'], errors='coerce')

# Clean Reviews
apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')

# Clean Rating
apps_df['Rating'] = pd.to_numeric(apps_df['Rating'], errors='coerce')

# Clean Size (convert to MB)
def convert_size(size):
    if isinstance(size, str):
        if 'M' in size:
            return float(size.replace('M', ''))
        elif 'k' in size:
            return float(size.replace('k', '')) / 1024
    return None

apps_df['Size'] = apps_df['Size'].apply(convert_size)

# Convert Last Updated to datetime
apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'], errors='coerce')

# -----------------------------
# Apply Filters
# -----------------------------

# Filter conditions
filtered_df = apps_df[
    (apps_df['Rating'] >= 4.0) &
    (apps_df['Size'] >= 10) &
    (apps_df['Last Updated'].dt.month == 1)
]

# -----------------------------
# Top 10 Categories by Installs
# -----------------------------

category_installs = (
    filtered_df.groupby('Category')['Installs']
    .sum()
    .sort_values(ascending=False)
    .head(10)
)

top_categories = category_installs.index

final_df = filtered_df[filtered_df['Category'].isin(top_categories)]

# -----------------------------
# Calculate Metrics
# -----------------------------

grouped_data = final_df.groupby('Category').agg({
    'Rating': 'mean',
    'Reviews': 'sum'
}).reset_index()

# -----------------------------
# Time Restriction (3PM–5PM IST)
# -----------------------------

ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist)
current_hour = current_time.hour

if 15 <= current_hour < 17:

    fig = go.Figure()

    # Average Rating
    fig.add_trace(go.Bar(
        x=grouped_data['Category'],
        y=grouped_data['Rating'],
        name='Average Rating'
    ))

    # Total Reviews
    fig.add_trace(go.Bar(
        x=grouped_data['Category'],
        y=grouped_data['Reviews'],
        name='Total Reviews'
    ))

    fig.update_layout(
        title="Top 10 Categories: Avg Rating vs Total Reviews (Jan, Size ≥10M, Rating ≥4)",
        xaxis_title="Category",
        yaxis_title="Values",
        barmode='group'
    )

    fig.show()

else:
    print("⛔ This graph is available only between 3 PM and 5 PM IST.")


⛔ This graph is available only between 3 PM and 5 PM IST.
