<a href="https://colab.research.google.com/github/parth-u/NullClass-App-Store-Data-Analytics/blob/main/Analysis%205.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
import pytz  # For timezone conversion

# Load the dataset
file_path = '/content/sample_data/Play Store Data.csv'  # Replace with your actual file path
data = pd.read_csv(file_path)

# Data Cleaning
# Convert Installs to numeric
data['Installs'] = data['Installs'].str.replace('[+,]', '', regex=True)
data = data[data['Installs'].str.isnumeric()]  # Remove invalid rows
data['Installs'] = data['Installs'].astype(float)

# Convert Reviews to numeric
data['Reviews'] = pd.to_numeric(data['Reviews'], errors='coerce')

# 📊 Check data distribution before filtering
print("\n📌 Dataset Overview Before Filtering")
print(data[['Installs', 'Reviews']].describe())

# 🚀 Set Dynamic Filters Based on Data
min_installs = max(1000, data['Installs'].quantile(0.05))  # Ensures reasonable threshold
min_reviews = max(50, data['Reviews'].quantile(0.05))  # Ensures reasonable threshold

print(f"\n🔍 Filter Values: Installs ≥ {min_installs}, Reviews ≥ {min_reviews}")

# 🔹 Step 1: Remove date filter completely
filtered_data = data.copy()
print(f"After removing date filter: {filtered_data.shape}")

# 🔹 Step 2: Apply install & review filters
filtered_data = filtered_data[(filtered_data['Installs'] >= min_installs) & (filtered_data['Reviews'] >= min_reviews)]
print(f"After installs & reviews filtering: {filtered_data.shape}")

# 🔹 Step 3: Exclude only a few genres
excluded_letters = ('X', 'Y', 'Z')  # Excluding rare genres only
filtered_data = filtered_data[~filtered_data['Genres'].str.startswith(excluded_letters, na=False)]
print(f"After genres filtering: {filtered_data.shape}")

# Select relevant columns for correlation matrix
filtered_data = filtered_data[['Installs', 'Rating', 'Reviews']].dropna()
print(f"Final dataset size before correlation: {filtered_data.shape}")

# ⏰ Time Restriction: Show Graph Only Between 2 PM - 4 PM IST
ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist).time()
start_time = datetime.strptime("14:00:00", "%H:%M:%S").time()
end_time = datetime.strptime("16:00:00", "%H:%M:%S").time()

if start_time <= current_time <= end_time:
    if filtered_data.empty:
        print("❌ No data left after filtering! Try reducing the filters further.")
    else:
        # Generate the correlation matrix
        corr_matrix = filtered_data.corr()
        print("📊 Correlation Matrix:\n", corr_matrix)

        # Plot the heatmap with a dark theme
        plt.figure(figsize=(8, 6), facecolor='black')  # Dark background
        sns.set_style("dark")  # Dark style for aesthetics

        heatmap = sns.heatmap(
            corr_matrix,
            annot=True,
            cmap='magma',  # Vibrant color map
            fmt='.2f',
            linewidths=1,
            linecolor='black',
            annot_kws={"size": 12, "color": "white"}  # White text
        )

        plt.title('Correlation Matrix for Filtered Data', fontsize=14, color='white')  # White title
        plt.xticks(color='white')  # White axis labels
        plt.yticks(color='white')  # White axis labels
        plt.show()
else:
    print("⏳ Graph not available outside allowed hours (2 PM - 4 PM IST).")



📌 Dataset Overview Before Filtering
           Installs       Reviews
count  1.084000e+04  1.084000e+04
mean   1.546434e+07  4.441529e+05
std    8.502936e+07  2.927761e+06
min    0.000000e+00  0.000000e+00
25%    1.000000e+03  3.800000e+01
50%    1.000000e+05  2.094000e+03
75%    5.000000e+06  5.477550e+04
max    1.000000e+09  7.815831e+07

🔍 Filter Values: Installs ≥ 1000, Reviews ≥ 50
After removing date filter: (10840, 13)
After installs & reviews filtering: (7923, 13)
After genres filtering: (7923, 13)
Final dataset size before correlation: (7859, 3)
⏳ Graph not available outside allowed hours (2 PM - 4 PM IST).
