In [3]:
import pandas as pd
import plotly.express as px
from datetime import datetime
import pytz

# -------------------------------
# Load dataset
# -------------------------------
apps_df = pd.read_csv("googleplaystore.csv")

# -------------------------------
# Data Cleaning
# -------------------------------
# Convert Installs
apps_df['Installs'] = apps_df['Installs'].str.replace('[+,]', '', regex=True)
apps_df['Installs'] = pd.to_numeric(apps_df['Installs'], errors='coerce')

# Convert Reviews
apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')

# Convert Rating
apps_df['Rating'] = pd.to_numeric(apps_df['Rating'], errors='coerce')

# Convert Size to MB
def convert_size(size):
    if 'M' in str(size):
        return float(size.replace('M', ''))
    elif 'k' in str(size):
        return float(size.replace('k', '')) / 1024
    else:
        return None

apps_df['Size_MB'] = apps_df['Size'].apply(convert_size)

# Convert Last Updated to datetime
apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'], errors='coerce')

# -------------------------------
# Filtering Conditions
# -------------------------------
filtered_df = apps_df[
    (apps_df['Rating'] >= 4.0) &
    (apps_df['Size_MB'] >= 10) &
    (apps_df['Last Updated'].dt.month == 1)
]

# -------------------------------
# Top 10 Categories by Installs
# -------------------------------
top_categories = (
    filtered_df.groupby('Category')['Installs']
    .sum()
    .sort_values(ascending=False)
    .head(10)
    .index
)

final_df = filtered_df[filtered_df['Category'].isin(top_categories)]

# -------------------------------
# Aggregation
# -------------------------------
summary_df = final_df.groupby('Category').agg(
    Avg_Rating=('Rating', 'mean'),
    Total_Reviews=('Reviews', 'sum')
).reset_index()

# -------------------------------
# Time Restriction (3 PM â€“ 5 PM IST)
# -------------------------------
ist = pytz.timezone("Asia/Kolkata")
current_time = datetime.now(ist).time()

start_time = datetime.strptime("15:00", "%H:%M").time()
end_time = datetime.strptime("17:00", "%H:%M").time()

# -------------------------------
# Conditional Graph Display
# -------------------------------
if start_time <= current_time <= end_time:
    fig = px.bar(
        summary_df,
        x='Category',
        y=['Avg_Rating', 'Total_Reviews'],
        barmode='group',
        title='Average Rating vs Total Reviews (Top 10 Categories)',
        labels={'value': 'Metric Value', 'Category': 'App Category'}
    )
    fig.show()
else:
    print("Graph is available only between 3:00 PM and 5:00 PM IST.")


Graph is available only between 3:00 PM and 5:00 PM IST.
