In [2]:
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime
import pytz

# --------------------------------
# Load dataset
# --------------------------------
apps_df = pd.read_csv("googleplaystore.csv")

# --------------------------------
# Data Cleaning
# --------------------------------
apps_df['Installs'] = apps_df['Installs'].str.replace('[+,]', '', regex=True)
apps_df['Installs'] = pd.to_numeric(apps_df['Installs'], errors='coerce')

apps_df['Price'] = apps_df['Price'].str.replace('$', '', regex=True)
apps_df['Price'] = pd.to_numeric(apps_df['Price'], errors='coerce')

apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')

apps_df['Rating'] = pd.to_numeric(apps_df['Rating'], errors='coerce')

# Revenue calculation
apps_df['Revenue'] = apps_df['Price'] * apps_df['Installs']

# Size conversion
def convert_size(size):
    if 'M' in str(size):
        return float(size.replace('M', ''))
    elif 'k' in str(size):
        return float(size.replace('k', '')) / 1024
    else:
        return None

apps_df['Size_MB'] = apps_df['Size'].apply(convert_size)

# Android version cleaning
apps_df['Android Ver'] = apps_df['Android Ver'].str.extract(r'(\d+\.\d+)')
apps_df['Android Ver'] = pd.to_numeric(apps_df['Android Ver'], errors='coerce')

# App type
apps_df['App_Type'] = apps_df['Type'].replace({'Free': 'Free', 'Paid': 'Paid'})

# --------------------------------
# Apply Filters
# --------------------------------
filtered_df = apps_df[
    (apps_df['Installs'] >= 10000) &
    (apps_df['Revenue'] >= 10000) &
    (apps_df['Android Ver'] > 4.0) &
    (apps_df['Size_MB'] > 15) &
    (apps_df['Content Rating'] == 'Everyone') &
    (apps_df['App'].str.len() <= 30)
]

# --------------------------------
# Top 3 Categories by Installs
# --------------------------------
top_categories = (
    filtered_df.groupby('Category')['Installs']
    .sum()
    .sort_values(ascending=False)
    .head(3)
    .index
)

final_df = filtered_df[filtered_df['Category'].isin(top_categories)]

# --------------------------------
# Aggregation
# --------------------------------
summary_df = final_df.groupby(['Category', 'App_Type']).agg(
    Avg_Installs=('Installs', 'mean'),
    Avg_Revenue=('Revenue', 'mean')
).reset_index()

# --------------------------------
# Time Restriction (1 PM â€“ 2 PM IST)
# --------------------------------
ist = pytz.timezone("Asia/Kolkata")
current_time = datetime.now(ist).time()

start_time = datetime.strptime("13:00", "%H:%M").time()
end_time = datetime.strptime("14:00", "%H:%M").time()

# --------------------------------
# Conditional Chart Rendering
# --------------------------------
if start_time <= current_time <= end_time:

    fig = go.Figure()

    fig.add_trace(
        go.Bar(
            x=summary_df['Category'] + " (" + summary_df['App_Type'] + ")",
            y=summary_df['Avg_Installs'],
            name='Average Installs',
            yaxis='y1'
        )
    )

    fig.add_trace(
        go.Scatter(
            x=summary_df['Category'] + " (" + summary_df['App_Type'] + ")",
            y=summary_df['Avg_Revenue'],
            name='Average Revenue',
            yaxis='y2',
            mode='lines+markers'
        )
    )

    fig.update_layout(
        title='Average Installs vs Revenue (Free vs Paid Apps)',
        xaxis=dict(title='Category (App Type)'),
        yaxis=dict(title='Average Installs'),
        yaxis2=dict(
            title='Average Revenue ($)',
            overlaying='y',
            side='right'
        ),
        legend=dict(x=0.01, y=0.99)
    )

    fig.show()

else:
    print("This chart is available only between 1:00 PM and 2:00 PM IST.")


This chart is available only between 1:00 PM and 2:00 PM IST.
