5. Plot a bubble chart to analyze the relationship between app size (in MB) and average rating,
with the bubble size representing the number of installs. Include a 
filter to show only apps with a rating higher than 3.5 and that belong to the 
"Games" category and installs should be more than 50k as well as this graph should
work only between 5 PM IST to 7 PM IST apart from that time we should not show this graph in dashboard itself.


In [3]:
import pandas as pd

# Load the dataset to inspect its structure
data = pd.read_csv('Play Store Data.csv')

# Display the first few rows of the dataset to understand its structure
data.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [7]:
import numpy as np

# Data cleaning: Convert 'Size' to numeric (in MB)
def convert_size(size):
    if 'M' in size:
        return float(size.replace('M', ''))
    elif 'k' in size:
        return float(size.replace('k', '')) / 1024  # Convert kB to MB
    else:
        return np.nan  # Handle 'Varies with device'

data['Size_MB'] = data['Size'].apply(convert_size)

In [13]:
# Data cleaning: Convert 'Installs' to numeric
#data['Installs'] = data['Installs'].str.replace('[+,]', '', regex=True).astype(float)

# Step 1: Replace non-numeric values with '0' and convert to string for safe regex operations
data['Installs'] = data['Installs'].fillna('0').astype(str)

# Step 2: Remove non-numeric characters (commas, plus signs)
data['Installs'] = data['Installs'].replace(r'[\+,]', '', regex=True)

# Step 3: Convert to integers
data['Installs'] = pd.to_numeric(data['Installs'], errors='coerce').fillna(0).astype(int)



In [15]:
# Filter data based on the given conditions
filtered_data = data[
    (data['Rating'] > 3.5) &
    (data['Category'] == 'GAME') &
    (data['Installs'] > 50000)
]

# Display the first few rows of the filtered dataset to verify
filtered_data.head()


Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Size_MB
1653,ROBLOX,GAME,4.5,4447388,67M,100000000,Free,0,Everyone 10+,Adventure;Action & Adventure,"July 31, 2018",2.347.225742,4.1 and up,67.0
1654,Subway Surfers,GAME,4.5,27722264,76M,1000000000,Free,0,Everyone 10+,Arcade,"July 12, 2018",1.90.0,4.1 and up,76.0
1655,Candy Crush Saga,GAME,4.4,22426677,74M,500000000,Free,0,Everyone,Casual,"July 5, 2018",1.129.0.2,4.1 and up,74.0
1656,Solitaire,GAME,4.7,254258,23M,10000000,Free,0,Everyone,Card,"August 1, 2018",2.137.0,4.1 and up,23.0
1657,Bubble Shooter,GAME,4.5,148897,46M,10000000,Free,0,Everyone,Casual,"July 17, 2018",1.20.1,4.0.3 and up,46.0


In [17]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10841 entries, 0 to 10840
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   App             10841 non-null  object 
 1   Category        10841 non-null  object 
 2   Rating          9367 non-null   float64
 3   Reviews         10841 non-null  object 
 4   Size            10841 non-null  object 
 5   Installs        10841 non-null  int32  
 6   Type            10840 non-null  object 
 7   Price           10841 non-null  object 
 8   Content Rating  10840 non-null  object 
 9   Genres          10841 non-null  object 
 10  Last Updated    10841 non-null  object 
 11  Current Ver     10833 non-null  object 
 12  Android Ver     10838 non-null  object 
 13  Size_MB         9145 non-null   float64
dtypes: float64(2), int32(1), object(11)
memory usage: 1.1+ MB


In [21]:
import pytz
from datetime import datetime

# Define function to check if the current time is between 5 PM to 7 PM IST
def is_within_time_window():
    # Get current time in IST
    ist = pytz.timezone('Asia/Kolkata')
    current_time = datetime.now(ist)
    # Define start and end times
    start_time = current_time.replace(hour=17, minute=0, second=0, microsecond=0)
    end_time = current_time.replace(hour=19, minute=0, second=0, microsecond=0)
    return start_time <= current_time <= end_time

# Check if current time is within the specified range
if is_within_time_window():
    # Plotting the bubble chart
    plt.figure(figsize=(12, 8))

    # Define bubble sizes scaled by the number of installs
    bubble_size = filtered_data['Installs'] / 1e6  # Scale down for visualization

    # Create the bubble chart
    plt.scatter(
        filtered_data['Size_MB'],  # X-axis: App size in MB
        filtered_data['Rating'],   # Y-axis: Average rating
        s=bubble_size,             # Bubble size: Number of installs
        alpha=0.6,                 # Transparency for better visualization
        color='skyblue',
        edgecolors="w",
        linewidth=0.5
    )

    # Add labels and title
    plt.title('Bubble Chart: App Size vs. Rating (Games)', fontsize=16)
    plt.xlabel('App Size (MB)', fontsize=14)
    plt.ylabel('Average Rating', fontsize=14)
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.colorbar(label='Number of Installs (scaled)', orientation='vertical')
    plt.tight_layout()

    # Show the plot
    plt.show()
else:
    print("The chart is not available outside 5 PM to 7 PM IST.")


The chart is not available outside 5 PM to 7 PM IST.
