In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from datetime import datetime
import pytz
import plotly.express as px


In [2]:
df = pd.read_csv("googleplaystore.csv")


In [3]:
df = df.dropna(subset=['Installs', 'Category', 'Type', 'Android Ver', 'Size', 'Content Rating', 'App'])
df['Installs'] = df['Installs'].str.replace(r'[+,]', '', regex=True).astype(int)

if 'Revenue' not in df.columns:
    np.random.seed(42)
    df['Revenue'] = df['Installs'] * np.where(df['Type'] == 'Paid', np.random.randint(1, 5), 0.1)

df['Size'] = df['Size'].replace('Varies with device', np.nan)
df['Size'] = df['Size'].str.replace('M', '', regex=False).str.replace('k', '', regex=False)
df['Size'] = pd.to_numeric(df['Size'], errors='coerce')


In [4]:
df = df[
    (df['Installs'] >= 10000) &
    (df['Revenue'] >= 10000) &
    (df['Content Rating'] == 'Everyone') &
    (df['Android Ver'].str.extract(r'(\d+\.?\d*)').astype(float) > 4.0).any(axis=1) &
    (df['Size'] > 15) &
    (df['App'].str.len() <= 30)
]


In [5]:
top_categories = df.groupby('Category')['Installs'].sum().nlargest(3).index
df = df[df['Category'].isin(top_categories)]


In [6]:
summary = df.groupby(['Category', 'Type']).agg(
    Avg_Installs=('Installs', 'mean'),
    Avg_Revenue=('Revenue', 'mean')
).reset_index()


In [7]:
ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist).time()
start_time = datetime.strptime("13:00", "%H:%M").time()
end_time = datetime.strptime("14:00", "%H:%M").time()


In [8]:

if start_time <= current_time <= end_time:
    # Bar chart (Average Installs)
    fig1 = px.bar(
        summary,
        x=summary['Category'] + " (" + summary['Type'] + ")",
        y="Avg_Installs",
        title="Average Installs vs Revenue (Free vs Paid Apps)",
        labels={"y": "Average Installs"}
    )

    # Line chart (Average Revenue)
    fig2 = px.line(
        summary,
        x=summary['Category'] + " (" + summary['Type'] + ")",
        y="Avg_Revenue"
    )

    # Convert to go traces
    fig = go.Figure(data=fig1.data + fig2.data)

    # Update axis
    fig.update_traces(yaxis="y1", selector=dict(type="bar"))
    fig.update_traces(yaxis="y2", selector=dict(type="scatter"))

    fig.update_layout(
        xaxis=dict(title="Category & Type"),
        yaxis=dict(title="Average Installs", side="left"),
        yaxis2=dict(title="Average Revenue", side="right", overlaying="y", showgrid=False),
        legend=dict(x=0.5, y=1.1, orientation="h")
    )

    fig.show()
else:
    print("The chart is only visible between 1 PM and 2 PM IST.")

The chart is only visible between 1 PM and 2 PM IST.
