In [6]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
import pytz

df = pd.read_csv("play store data.csv")

print(f"Total rows before processing: {len(df)}")


df["Installs"] = (
    df["Installs"]
    .astype(str)
    .str.replace(r"[^\d]", "", regex=True)  
    .replace("", np.nan)  
    .astype(float)  
    .fillna(0)
    .astype(int)  
)


df["Reviews"] = pd.to_numeric(df["Reviews"], errors="coerce").fillna(0).astype(int)


df["Last Updated"] = pd.to_datetime(df["Last Updated"], errors="coerce")

df = df.dropna(subset=["Last Updated"])
print(f"Rows after removing invalid dates: {len(df)}")


one_year_ago = pd.Timestamp.now() - pd.DateOffset(years=1)
df = df[df["Last Updated"] >= one_year_ago]
print(f"Rows after filtering last 1 year updates: {len(df)}")


df = df[(df["Installs"] >= 100_000) & (df["Reviews"] > 1_000)]
print(f"Rows after filtering installs & reviews: {len(df)}")


df = df[~df["Genres"].astype(str).str.startswith(("A", "F", "E", "G", "I", "K"))]
print(f"Rows after filtering genres: {len(df)}")


ist = pytz.timezone("Asia/Kolkata")
current_time = datetime.now(ist).time()


start_time = datetime.strptime("14:00", "%H:%M").time()
end_time = datetime.strptime("16:00", "%H:%M").time()


if start_time <= current_time <= end_time:
    if df.empty:
        print("No data available after applying filters.")
    else:
      
        correlation_data = df[["Installs", "Rating", "Reviews"]].dropna()
        correlation_matrix = correlation_data.corr()

        if correlation_matrix.isnull().values.all():
            print("Correlation matrix contains only NaN values. No valid data to display.")
        else:
           
            plt.figure(figsize=(8, 6))
            sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
            plt.title("Correlation Matrix of Installs, Ratings, and Reviews")
            plt.show()
else:
    print("graph works only between 2 PM IST to 4 PM.")


Total rows before processing: 10841
Rows after removing invalid dates: 10840
Rows after filtering last 1 year updates: 0
Rows after filtering installs & reviews: 0
Rows after filtering genres: 0
No data available after applying filters.
