In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
import plotly.express as px

In [4]:
import plotly.graph_objects as go


In [5]:
from datetime import datetime, timedelta

In [6]:
import dash

In [7]:
from dash import dcc, html
from dash.dependencies import Input, Output
import pytz

In [8]:
# Load the dataset
apps_df = pd.read_csv('Play Store Data_1.csv')

In [9]:
apps_df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [10]:
#Step 2 : Data Cleaning
apps_df = apps_df.dropna(subset=['Rating'])
for column in apps_df.columns :
    apps_df[column].fillna(apps_df[column].mode()[0],inplace=True)
apps_df.drop_duplicates(inplace=True)
apps_df=apps_df=apps_df[apps_df['Rating']<=5]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  apps_df[column].fillna(apps_df[column].mode()[0],inplace=True)


In [11]:
#Convert the Installs columns to numeric by removing commas and +
apps_df['Installs']=apps_df['Installs'].str.replace(',','').str.replace('+','').astype(int)

In [12]:
# Convert 'Reviews' to int
apps_df['Reviews'] = apps_df['Reviews'].astype(int)

In [13]:
#Convert Price column to numeric after removing $
apps_df['Price']=apps_df['Price'].str.replace('$','').astype(float)

In [14]:
# Convert 'Last Updated' to datetime
apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'])

In [61]:
# Filter apps updated within the last year
one_year_ago = datetime.now() - timedelta(days=365)
filtered_data = apps_df[
    (apps_df['Last Updated'] >= one_year_ago) & 
    (apps_df['Installs'] >= 100000) & 
    (apps_df['Reviews'] > 1000) &
    (apps_df['Genres'].str.startswith(('A', 'F', 'E', 'G', 'I', 'K')))
]
apps_df
filtered_data

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Last Updated Year


In [65]:
# Calculating correlation matrix
corr_matrix = filtered_data[['Installs', 'Rating', 'Reviews']].corr()

# Defining the heatmap
heatmap_fig = go.Figure(data=go.Heatmap(
    z=corr_matrix.values,
    x=corr_matrix.columns,
    y=corr_matrix.columns,
    colorscale='Viridis'
))

heatmap_fig.update_layout(
    title='Correlation Matrix Heatmap',
    xaxis_nticks=36
)

# Dash Application for the HTML Dashboard
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Play Store Data Analysis Dashboard"),
    html.Div("Heatmap will only display between 3 PM and 6 PM Indian Standard Time (IST)."),
    dcc.Graph(id='heatmap')
])

# Callback to check the time and only show graph between 3 PM to 6 PM IST
@app.callback(
    Output('heatmap', 'figure'),
    [Input('heatmap', 'id')]
)
def update_graph(input_value):
    # Get the current time in UTC and convert to Indian Standard Time (IST)
    current_time = datetime.now(pytz.timezone('UTC')).astimezone(pytz.timezone('Asia/Kolkata'))
    current_hour = current_time.hour
    
    # Checking if current time is between 3 PM to 6 PM IST
    if 15 <= current_hour < 18:
        return heatmap_fig
    else:
        # Returning empty figure outside the time range
        return go.Figure()

# Displaying the plot
if __name__ == '__main__':
    # Save the dashboard as an HTML file
    app.run_server(debug=True, port=8061)  # Change the port if needed

    # Saving the HTML dashboard
    with open('Correlation_Matrix_Heatmap.html', 'w') as f:
        f.write(app.index_string)