<a href="https://colab.research.google.com/github/parth-u/NullClass-App-Store-Data-Analytics/blob/main/Analysis%202.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime
import pytz

# Set the timezone to IST
ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist).time()

# Define the allowed time window (3 PM - 5 PM IST)
start_time = datetime.strptime("15:00:00", "%H:%M:%S").time()
end_time = datetime.strptime("17:00:00", "%H:%M:%S").time()

# Check if the current time is within the allowed time range
if start_time <= current_time <= end_time:
    # Load the dataset
    file_path = '/content/sample_data/Play Store Data.csv'  # Update path if needed
    data = pd.read_csv(file_path)

    # Data Cleaning
    data['Installs'] = (
        data['Installs']
        .str.replace('[+,]', '', regex=True)
        .str.strip()
    )

    # Remove invalid installs
    data = data[data['Installs'].str.isnumeric()]
    data['Installs'] = data['Installs'].astype(float)

    # Convert 'Size' column
    def convert_size(size):
        if isinstance(size, str):
            if 'M' in size:
                return float(size.replace('M', '')) * 1_000_000
            elif 'k' in size:
                return float(size.replace('k', '')) * 1_000
            elif 'G' in size:
                return float(size.replace('G', '')) * 1_000_000_000
        return None

    data['Size'] = data['Size'].apply(convert_size)

    # Convert 'Last Updated' to datetime
    data['Last Updated'] = pd.to_datetime(data['Last Updated'], errors='coerce')

    # Convert 'Reviews' to numeric
    data['Reviews'] = pd.to_numeric(data['Reviews'], errors='coerce')

    # Apply Filters (Rating ≥ 4.0, Size ≥ 10MB, Last Updated in January)
    filtered_data = data.loc[
        (data['Rating'] >= 4.0) &
        (data['Size'] >= 10_000_000) &
        (data['Last Updated'].dt.month == 1)
    ]

    # Aggregate Data
    aggregated_data = (
        filtered_data.groupby('Category')
        .agg({'Rating': 'mean', 'Reviews': 'sum', 'Installs': 'sum'})
        .reset_index()
    )

    # Select Top 10 Categories by Installs
    top_categories = aggregated_data.nlargest(10, 'Installs').copy()

    # Apply Log Scaling to Reviews
    top_categories['Reviews'] = top_categories['Reviews'].fillna(1).astype(float)
    top_categories['Reviews'] = top_categories['Reviews'].apply(lambda x: max(1, x))

    # Create Grouped Bar Chart
    fig = go.Figure()

    # Add bars for average rating
    fig.add_trace(
        go.Bar(
            x=top_categories['Category'],
            y=top_categories['Rating'],
            name='Average Rating',
            marker_color='#1f77b4',
            text=top_categories['Rating'].round(2),
            textposition='auto'
        )
    )

    # Add bars for total reviews (log scale applied)
    fig.add_trace(
        go.Bar(
            x=top_categories['Category'],
            y=top_categories['Reviews'],
            name='Total Reviews (Log Scale)',
            marker_color='#ff7f0e',
            text=top_categories['Reviews'].apply(lambda x: f"{x:.0f}"),
            textposition='auto'
        )
    )

    # Update layout for dark theme
    fig.update_layout(
        title={
            'text': 'Comparison of Average Rating and Total Reviews for Top 10 Categories',
            'x': 0.5,
            'font': {'size': 20, 'color': 'white'}
        },
        xaxis_title='Category',
        yaxis_title='Value',
        barmode='group',
        plot_bgcolor='black',
        paper_bgcolor='black',
        font=dict(color='white'),
        xaxis=dict(
            tickfont=dict(color='white'),
            gridcolor='gray'
        ),
        yaxis=dict(
            tickfont=dict(color='white'),
            gridcolor='gray',
            type='log'
        ),
        legend=dict(
            font=dict(color='white')
        )
    )

    # Show the figure
    fig.show()
else:
    print("⚠️ This graph can only be viewed between 3 PM and 5 PM IST.")


⚠️ This graph can only be viewed between 3 PM and 5 PM IST.
