<a href="https://colab.research.google.com/github/parth-u/NullClass-App-Store-Data-Analytics/blob/main/Analysis%204.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import pytz

# Load dataset
file_path = "/content/sample_data/Play Store Data.csv"
df = pd.read_csv(file_path)

# Convert 'Installs' column: Remove non-numeric values
df['Installs'] = df['Installs'].astype(str).str.replace(r'[^\d]', '', regex=True)

# Convert to numeric and drop NaNs (invalid rows)
df['Installs'] = pd.to_numeric(df['Installs'], errors='coerce')
df = df.dropna(subset=['Installs'])

# Convert 'Last Updated' to datetime
df['Last Updated'] = pd.to_datetime(df['Last Updated'], errors='coerce')

# Filter data based on conditions
filtered_df = df[
    (df['Content Rating'] == 'Teen') &
    (df['App'].str.startswith('E', na=False)) &
    (df['Installs'] > 10000)
]

# Group by Month & Category
filtered_df['Year-Month'] = filtered_df['Last Updated'].dt.to_period('M')
grouped_df = filtered_df.groupby(['Year-Month', 'Category'])['Installs'].sum().reset_index()

# Convert Year-Month to datetime
grouped_df['Year-Month'] = grouped_df['Year-Month'].astype(str)
grouped_df['Year-Month'] = pd.to_datetime(grouped_df['Year-Month'])

# Calculate MoM Growth
grouped_df['Prev Month Installs'] = grouped_df.groupby('Category')['Installs'].shift(1)
grouped_df['MoM Growth %'] = ((grouped_df['Installs'] - grouped_df['Prev Month Installs']) / grouped_df['Prev Month Installs']) * 100
grouped_df['Highlight Growth'] = grouped_df['MoM Growth %'] > 20  # Identify periods with >20% growth

# Get current time in IST
ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.datetime.now(ist).time()
start_time = datetime.time(18, 0)  # 6 PM IST
end_time = datetime.time(21, 0)  # 9 PM IST

if start_time <= current_time <= end_time:
    # Create plot
    plt.figure(figsize=(12, 6), facecolor='black')
    categories = grouped_df['Category'].unique()
    colors = plt.cm.jet(np.linspace(0, 1, len(categories)))

    # Plot each category
    for cat, color in zip(categories, colors):
        category_data = grouped_df[grouped_df['Category'] == cat]
        plt.plot(category_data['Year-Month'], category_data['Installs'], label=cat, color=color, linewidth=2)
        plt.fill_between(category_data['Year-Month'], category_data['Installs'], where=category_data['Highlight Growth'], color='red', alpha=0.3)

    # Customize plot
    plt.xlabel("Date", fontsize=14, color='white')
    plt.ylabel("Total Installs", fontsize=14, color='white')
    plt.title("Total Installs Over Time by Category", fontsize=16, color='white')
    plt.xticks(color='white', rotation=45)
    plt.yticks(color='white')
    plt.grid(alpha=0.3)
    plt.legend(fontsize=12)
    plt.gca().set_facecolor("black")  # Black background

    # Show plot
    plt.show()
else:
    print("Graph can only be viewed between 6 PM IST and 9 PM IST.")


Graph can only be viewed between 6 PM IST and 9 PM IST.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Year-Month'] = filtered_df['Last Updated'].dt.to_period('M')
