In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import pytz
import seaborn as sns

# --- Time Constraint Check ---
IST = pytz.timezone('Asia/Kolkata')
now_ist = datetime.now(IST)

if 17 <= now_ist.hour < 19:
    try:
        # --- Load Data ---
        df_apps = pd.read_csv("C:/Users/SYED SAAD/Downloads/Play Store Data.csv")
        df_reviews = pd.read_csv("C:/Users/SYED SAAD/Downloads/User Reviews.csv")

        # --- Preprocessing and Merging ---
        # 1. Calculate average sentiment subjectivity per app from reviews data
        avg_subjectivity = df_reviews.groupby('App')['Sentiment_Subjectivity'].mean().reset_index()
        avg_subjectivity.rename(columns={'Sentiment_Subjectivity': 'Mean_Sentiment_Subjectivity'}, inplace=True)

        # 2. Merge with the main apps dataframe
        df = pd.merge(df_apps, avg_subjectivity, on='App', how='left')

        # --- Data Cleaning (on merged dataframe) ---
        # Clean 'Installs'
        df['Installs'] = df['Installs'].str.replace('[,+]', '', regex=True)
        df = df[df['Installs'] != 'Free']
        df['Installs'] = pd.to_numeric(df['Installs'])

        # Clean 'Size'
        def convert_size(size):
            if isinstance(size, str):
                if 'M' in size:
                    return float(size.replace('M', ''))
                elif 'k' in size:
                    return float(size.replace('k', '')) / 1024
            return np.nan
        df['Size'] = df['Size'].apply(convert_size)

        # Clean other numeric columns
        df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')
        df['Reviews'] = pd.to_numeric(df['Reviews'], errors='coerce')

        # Drop rows with NaN in critical columns for this plot
        df.dropna(subset=['Rating', 'Size', 'Installs', 'Category', 'Mean_Sentiment_Subjectivity'], inplace=True)

        # --- Filtering ---
        # Correcting potential typos in user-provided categories
        categories_to_filter = ['GAME', 'BEAUTY', 'BUSINESS', 'COMICS', 'COMMUNICATION', 'DATING', 'ENTERTAINMENT', 'SOCIAL', 'EVENTS']

        df_filtered = df[
            (df['Rating'] > 3.5) &
            (df['Category'].isin(categories_to_filter)) &
            (df['Reviews'] > 500) &
            (~df['App'].str.lower().str.contains('s')) &
            (df['Mean_Sentiment_Subjectivity'] > 0.5) &
            (df['Installs'] > 50000)
        ].copy()


        # --- Translations ---
        if not df_filtered.empty:
            translation_map = {
                'BEAUTY': 'सुंदरता',
                'BUSINESS': 'வணிகம்',
                'DATING': 'Dating'
            }
            # Apply translations, keeping original name if not in map
            df_filtered['Category'] = df_filtered['Category'].apply(lambda x: translation_map.get(x, x))

            # --- Plotting ---
            plt.style.use('seaborn-v0_8-whitegrid')
            fig, ax = plt.subplots(figsize=(16, 10))
            
            # Use a font that supports the translated characters
            try:
                plt.rcParams['font.family'] = 'Nirmala UI'
            except:
                print("Warning: 'Nirmala UI' font not found. Non-English characters might not display correctly.")
                plt.rcParams['font.family'] = 'sans-serif'

            # Define colors
            unique_categories = df_filtered['Category'].unique()
            palette = sns.color_palette("husl", len(unique_categories))
            color_map = {cat: color for cat, color in zip(unique_categories, palette)}
            if 'GAME' in unique_categories:
                color_map['GAME'] = 'pink' # Specific color for GAME category

            # Plot each category to control color and legend
            for category, color in color_map.items():
                subset = df_filtered[df_filtered['Category'] == category]
                ax.scatter(subset['Size'], subset['Rating'],
                           s=subset['Installs'] / 10000, # Scale bubble size
                           c=[color], # Use a list for color
                           alpha=0.6,
                           label=category,
                           edgecolors='black',
                           linewidth=0.5)

            # --- Final Touches ---
            ax.set_xlabel('App Size (MB)', fontsize=14)
            ax.set_ylabel('Average Rating', fontsize=14)
            ax.set_title('App Analysis: Size vs. Rating (Bubble Size = Installs)', fontsize=18)
            ax.tick_params(axis='both', which='major', labelsize=12)
            
            # Create a legend for categories
            ax.legend(title='Categories', scatterpoints=1, markerscale=1, fontsize=10)

            # Create a legend for bubble size
            for installs in [100000, 1000000, 10000000]:
                ax.scatter([], [], s=installs/10000, c='gray', alpha=0.6, label=f'{int(installs/1e6)}M Installs', edgecolors='black')
            
            ax.legend(title='Categories & Install Size', scatterpoints=1, markerscale=1, fontsize=10)


            plt.grid(True)
            plt.tight_layout()
            plt.savefig('app_bubble_chart.png')
            
            print("Bubble chart generated successfully and saved as app_bubble_chart.png")

        else:
            print("No data available to plot after applying all filters.")

    except FileNotFoundError:
        print("Error: Required CSV file not found. Please ensure 'Play Store Data.csv' and 'Play Store Data reviews.csv' are present.")
    except Exception as e:
        print(f"An error occurred: {e}")

else:
    print("Chart generation is only available between 5 PM and 7 PM IST.")
    print(f"Current IST time: {now_ist.strftime('%Y-%m-%d %H:%M:%S')}")

Chart generation is only available between 5 PM and 7 PM IST.
Current IST time: 2025-08-20 19:52:47
