In [1]:
import pandas as pd
import plotly.express as px
from datetime import datetime
import pytz

# -----------------------------
# Load Datasets
# -----------------------------
apps_df = pd.read_csv("googleplaystore.csv")
reviews_df = pd.read_csv("googleplaystore_user_reviews.csv")

# -----------------------------
# Data Cleaning
# -----------------------------

# Clean Installs
apps_df['Installs'] = apps_df['Installs'].str.replace('[+,]', '', regex=True)
apps_df['Installs'] = pd.to_numeric(apps_df['Installs'], errors='coerce')

# Clean Rating
apps_df['Rating'] = pd.to_numeric(apps_df['Rating'], errors='coerce')

# Clean Reviews
apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')

# Clean Size (convert to MB)
def convert_size(size):
    if isinstance(size, str):
        if 'M' in size:
            return float(size.replace('M', ''))
        elif 'k' in size:
            return float(size.replace('k', '')) / 1024
    return None

apps_df['Size'] = apps_df['Size'].apply(convert_size)

# -----------------------------
# Sentiment Subjectivity (NLP Part)
# -----------------------------

# Take average subjectivity per app
sentiment_df = (
    reviews_df.groupby('App')['Sentiment_Subjectivity']
    .mean()
    .reset_index()
)

# Merge with main dataset
apps_df = apps_df.merge(sentiment_df, on='App', how='left')

# -----------------------------
# Category Filter List
# -----------------------------

categories_required = [
    "GAME", "BEAUTY", "BUSINESS", "COMICS",
    "COMMUNICATION", "DATING", "ENTERTAINMENT",
    "SOCIAL", "EVENTS"
]

# -----------------------------
# Apply Filters (Using .copy() to avoid warning)
# -----------------------------

filtered_df = apps_df[
    (apps_df['Rating'] > 3.5) &
    (apps_df['Category'].isin(categories_required)) &
    (apps_df['Reviews'] > 500) &
    (apps_df['Sentiment_Subjectivity'] > 0.5) &
    (apps_df['Installs'] > 50000) &
    (~apps_df['App'].str.contains('S', case=False, na=False))
].copy()

# -----------------------------
# Translate Category Names
# -----------------------------

translation_map = {
    "BEAUTY": "सौंदर्य",      # Hindi
    "BUSINESS": "வணிகம்",     # Tamil
    "DATING": "Partnersuche"  # German
}

filtered_df.loc[:, 'Category'] = filtered_df['Category'].replace(translation_map)

# -----------------------------
# Time Restriction (5PM–7PM IST)
# -----------------------------

ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist)
current_hour = current_time.hour

if 17 <= current_hour < 19:

    # Create Bubble Chart
    fig = px.scatter(
        filtered_df,
        x="Size",
        y="Rating",
        size="Installs",
        color="Category",
        hover_name="App",
        title="App Size vs Rating (Bubble Size = Installs)"
    )

    # Highlight GAME category in Pink
    fig.for_each_trace(
        lambda trace: trace.update(marker=dict(color="pink"))
        if trace.name == "GAME" else ()
    )

    fig.update_layout(
        xaxis_title="App Size (MB)",
        yaxis_title="Average Rating",
        title_x=0.5
    )

    fig.show()

else:
    print("⛔ This Bubble Chart is available only between 5 PM and 7 PM IST.")


⛔ This Bubble Chart is available only between 5 PM and 7 PM IST.
