In [1]:
import pandas as pd
import plotly.express as px
from datetime import datetime
import pytz

# ---------------------------
# Load dataset
# ---------------------------
apps_df = pd.read_csv("googleplaystore.csv")

# ---------------------------
# Data Cleaning
# ---------------------------
apps_df['Installs'] = apps_df['Installs'].str.replace('[+,]', '', regex=True)
apps_df['Installs'] = pd.to_numeric(apps_df['Installs'], errors='coerce')

apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')
apps_df['Rating'] = pd.to_numeric(apps_df['Rating'], errors='coerce')
apps_df['Size_MB'] = apps_df['Size'].str.replace('M','', regex=False)
apps_df['Size_MB'] = pd.to_numeric(apps_df['Size_MB'], errors='coerce')

# Ensure Sentiment_Subjectivity column exists
if 'Sentiment_Subjectivity' not in apps_df.columns:
    # For demo purposes, assume random values (replace with real column if exists)
    import numpy as np
    apps_df['Sentiment_Subjectivity'] = np.random.rand(len(apps_df))

# ---------------------------
# Apply Filters
# ---------------------------
allowed_categories = ['Game', 'Beauty', 'Business', 'Comics', 'Communication',
                      'Dating', 'Entertainment', 'Social', 'Event']

filtered_df = apps_df[
    (apps_df['Rating'] > 3.5) &
    (apps_df['Reviews'] > 500) &
    (apps_df['Installs'] > 50000) &
    (~apps_df['App'].str.contains('S', case=False, na=False)) &
    (apps_df['Sentiment_Subjectivity'] > 0.5) &
    (apps_df['Category'].isin(allowed_categories))
].copy()

# ---------------------------
# Translate categories for display
# ---------------------------
translation_map = {
    'Beauty': 'ब्यूटी',          # Hindi
    'Business': 'வணிகம்',       # Tamil
    'Dating': 'Dating'           # German (same as original, replace if needed)
}

filtered_df['Category_Display'] = filtered_df['Category'].map(translation_map).fillna(filtered_df['Category'])

# ---------------------------
# Time Restriction (5 PM – 7 PM IST)
# ---------------------------
ist = pytz.timezone("Asia/Kolkata")
current_time = datetime.now(ist).time()

start_time = datetime.strptime("17:00", "%H:%M").time()
end_time = datetime.strptime("19:00", "%H:%M").time()

# ---------------------------
# Conditional Bubble Chart Rendering
# ---------------------------
if start_time <= current_time <= end_time:

    # Define color mapping (Game = pink)
    color_map = {cat:'pink' if cat=='Game' else None for cat in filtered_df['Category'].unique()}

    fig = px.scatter(
        filtered_df,
        x='Size_MB',
        y='Rating',
        size='Installs',
        color='Category',
        hover_name='App',
        hover_data=['Category_Display','Installs','Reviews','Size_MB','Sentiment_Subjectivity'],
        title='App Size vs Rating Bubble Chart',
        color_discrete_map=color_map,
        size_max=60
    )

    fig.update_layout(
        xaxis_title='App Size (MB)',
        yaxis_title='Average Rating',
        legend_title='Category'
    )

    fig.show()

else:
    print("Bubble chart available only between 5:00 PM and 7:00 PM IST.")


Bubble chart available only between 5:00 PM and 7:00 PM IST.
