In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
import webbrowser
import os


In [2]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\BIT\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [3]:
#loading the data 
apps_df=pd.read_csv('Play Store Data.csv')
reviews_df=pd.read_csv('User Reviews.csv')

In [4]:
#giving commands that are essential in project 
#data cleaning
apps_df=apps_df.dropna(subset=['Rating'])
for column in apps_df.columns :
    apps_df[column].fillna(apps_df[column].mode()[0],inplace=True)
apps_df.drop_duplicates(inplace=True)
apps_df=apps_df=apps_df[apps_df['Rating']<=5]
reviews_df.dropna(subset=['Translated_Review'],inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  apps_df[column].fillna(apps_df[column].mode()[0],inplace=True)


In [5]:
#converting installs column to numeric by removing commas and +
apps_df['Installs']=apps_df['Installs'].str.replace(',','').str.replace('+','').astype(int)

#convert Price column to numeric after removing $
apps_df['Price']=apps_df['Price'].str.replace('$','').astype(float)

In [6]:
#merging datasets
merged_df=pd.merge(apps_df,reviews_df,on='App',how='inner')

In [7]:
#data transmission
apps_df['Reviews']=apps_df['Reviews'].astype(int)

In [8]:
def convert_size(size):
    if 'M' in size:
        return float(size.replace('M',''))
    elif 'K' in size:
        return float(size.replace('k',''))/1024
    else:
        return np.nan
apps_df['Size']=apps_df['Size'].apply(convert_size)


In [9]:
#logarithmic analysis
apps_df['Log_Installs']=np.log(apps_df['Installs'])
apps_df['Log_Reviews']=np.log(apps_df['Reviews'])

In [10]:
def rating_group(rating):
    if rating >=4:
        return 'Top rated app'
    elif rating >=3:
        return 'Above average'
    elif rating >=2:
        return 'Average'
    else:
        return 'Below Average'
apps_df['Rating_Group']=apps_df['Rating'].apply(rating_group)

In [11]:
#revenue column
apps_df['Revenue']=apps_df['Price']*apps_df['Installs']

In [12]:
#Sentiment Analysis NLP
sia= SentimentIntensityAnalyzer()

In [13]:
reviews_df['Sentiment_Score']=reviews_df['Translated_Review'].apply(lambda x: sia.polarity_scores(str(x))['compound'])

In [14]:
apps_df['Last Updated']=pd.to_datetime(apps_df['Last Updated'],errors='coerce')

In [15]:
apps_df['Year']=apps_df['Last Updated'].dt.year

In [16]:
html_files_path="./"
if not os.path.exists(html_files_path):
    os.makedirs(html_files_path)

In [17]:
plot_containers=""

In [18]:
# Save each Plotly figure to an HTML file
def save_plot_as_html(fig, filename, insight):
    global plot_containers
    filepath = os.path.join(html_files_path, filename)
    html_content = pio.to_html(fig, full_html=False, include_plotlyjs='inline')
    # Append the plot and its insight to plot_containers
    plot_containers += f"""
    <div class="plot-container" id="{filename}" onclick="openPlot('{filename}')">
        <div class="plot">{html_content}</div>
        <div class="insights">{insight}</div>
    </div>
    """
    fig.write_html(filepath, full_html=False, include_plotlyjs='inline')

plot_width=400
plot_height=300
plot_bg_color='black'
text_color='white'
title_font={'size':16}
axis_font={'size':12}

In [19]:
#Figure 1
category_counts=apps_df['Category'].value_counts().nlargest(10)
fig1=px.bar(
    x=category_counts.index,
    y=category_counts.values,
    labels={'x':'Category','y':'Count'},
    title='Top Categories on Play Store',
    color=category_counts.index,
    color_discrete_sequence=px.colors.sequential.Plasma,
    width=400,
    height=300
)
fig1.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig1,"Category Graph 1.html","The top categories on the Play Store are dominated by tools, entertainment, and productivity apps")

In [20]:
#Figure 2
type_counts=apps_df['Type'].value_counts()
fig2=px.pie(
    values=type_counts.values,
    names=type_counts.index,
    title='App Type Distribution',
    color_discrete_sequence=px.colors.sequential.RdBu,
    width=400,
    height=300
)
fig2.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig2,"Type Graph 2.html","Most apps on the Playstore are free, indicating a strategy to attract users first and monetize through ads or in app purchases")

In [21]:
#Figure 3
fig3=px.histogram(
    apps_df,
    x='Rating',
    nbins=20,
    title='Rating Distribution',
    color_discrete_sequence=['#636EFA'],
    width=400,
    height=300
)
fig3.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig3,"Rating Graph 3.html","Ratings are skewed towards higher values, suggesting that most apps are rated favorably by users")

In [22]:
#Figure 4
sentiment_counts=reviews_df['Sentiment_Score'].value_counts()
fig4=px.bar(
    x=sentiment_counts.index,
    y=sentiment_counts.values,
    labels={'x':'Sentiment Score','y':'Count'},
    title='Sentiment Distribution',
    color=sentiment_counts.index,
    color_discrete_sequence=px.colors.sequential.RdPu,
    width=400,
    height=300
)
fig4.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig4,"Sentiment Graph 4.html","Sentiments in reviews show a mix of positive and negative feedback, with a slight lean towards positive sentiments")

In [23]:
#Figure 5
installs_by_category=apps_df.groupby('Category')['Installs'].sum().nlargest(10)
fig5=px.bar(
    x=installs_by_category.index,
    y=installs_by_category.values,
    orientation='h',
    labels={'x':'Installs','y':'Category'},
    title='Installs by Category',
    color=installs_by_category.index,
    color_discrete_sequence=px.colors.sequential.Blues,
    width=400,
    height=300
)
fig5.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig5,"Installs Graph 5.html","The categories with the most installs are social and communication apps, reflecting their broad appeal and daily usage")

In [24]:
# Updates Per Year Plot
updates_per_year = apps_df['Last Updated'].dt.year.value_counts().sort_index()
fig6 = px.line(
    x=updates_per_year.index,
    y=updates_per_year.values,
    labels={'x': 'Year', 'y': 'Number of Updates'},
    title='Number of Updates Over the Years',
    color_discrete_sequence=['#AB63FA'],
    width=plot_width,
    height=plot_height
)
fig6.update_layout(
    plot_bgcolor=plot_bg_color,
    paper_bgcolor=plot_bg_color,
    font_color=text_color,
    title_font=title_font,
    xaxis=dict(title_font=axis_font),
    yaxis=dict(title_font=axis_font),
    margin=dict(l=10, r=10, t=30, b=10)
)
save_plot_as_html(fig6, "Updates Graph 6.html", "Updates have been increasing over the years, showing that developers are actively maintaining and improving their apps.")

In [25]:
#Figure 7
revenue_by_category=apps_df.groupby('Category')['Revenue'].sum().nlargest(10)
fig7=px.bar(
    x=installs_by_category.index,
    y=installs_by_category.values,
    labels={'x':'Category','y':'Revenue'},
    title='Revenue by Category',
    color=installs_by_category.index,
    color_discrete_sequence=px.colors.sequential.Greens,
    width=400,
    height=300
)
fig7.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig7,"Revenue Graph 7.html","Categories such as Business and Productivity lead in revenue generation, indicating their monetization potential")

In [26]:
#Figure 8
genre_counts=apps_df['Genres'].str.split(';',expand=True).stack().value_counts().nlargest(10)
fig8=px.bar(
    x=genre_counts.index,
    y=genre_counts.values,
    labels={'x':'Genre','y':'Count'},
    title='Top Genres',
    color=installs_by_category.index,
    color_discrete_sequence=px.colors.sequential.OrRd,
    width=400,
    height=300
)
fig8.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig8,"Genre Graph 8.html","Action and Casual genres are the most common, reflecting users' preference for engaging and easy-to-play games")

In [27]:
#Figure 9
fig9=px.scatter(
    apps_df,
    x='Last Updated',
    y='Rating',
    color='Type',
    title='Impact of Last Update on Rating',
    color_discrete_sequence=px.colors.qualitative.Vivid,
    width=400,
    height=300
)
fig9.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig9,"Update Graph 9.html","The Scatter Plot shows a weak correlation between the last update and ratings, suggesting that more frequent updates dont always result in better ratings.")

In [28]:
#Figure 10
fig10=px.box(
    apps_df,
    x='Type',
    y='Rating',
    color='Type',
    title='Rating for Paid vs Free Apps',
    color_discrete_sequence=px.colors.qualitative.Pastel,
    width=400,
    height=300
)
fig10.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig10,"Paid Free Graph 10.html","Paid apps generally have higher ratings compared to free apps, suggesting that users expect higher quality from apps they pay for")

In [29]:
#task1
import plotly.graph_objects as go

# STEP 1: Load the datasets
apps_df = pd.read_csv("Play Store Data.csv")
user_reviews_df = pd.read_csv("User Reviews.csv")

# STEP 2: Clean and preprocess
apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')
apps_df = apps_df.dropna(subset=['Rating', 'Category'])

# Filter to apps with >1000 reviews
apps_over_1000 = apps_df[apps_df['Reviews'] > 1000]

# Merge with user reviews on 'App'
merged_df = pd.merge(user_reviews_df, apps_over_1000, on='App')
merged_df = merged_df.dropna(subset=['Sentiment', 'Rating'])

# STEP 3: Create rating group
def rating_group(r):
    if r <= 2:
        return '1-2'
    elif r <= 4:
        return '3-4'
    else:
        return '4-5'

merged_df['Rating_Group'] = merged_df['Rating'].apply(rating_group)

# STEP 4: Top 5 categories by review count
top_categories = merged_df['Category'].value_counts().nlargest(5).index
merged_top = merged_df[merged_df['Category'].isin(top_categories)]

# STEP 5: Group by Category, Rating_Group, and Sentiment
grouped = merged_top.groupby(['Category', 'Rating_Group', 'Sentiment']).size().reset_index(name='Count')

# Pivot for plotting
pivot_df = grouped.pivot_table(index=['Category', 'Rating_Group'], columns='Sentiment', values='Count', fill_value=0).reset_index()

# Create label for x-axis
pivot_df['Label'] = pivot_df['Category'] + "\n" + pivot_df['Rating_Group']
pivot_df = pivot_df.sort_values(by=['Category', 'Rating_Group'])

# Plot using Plotly
sentiments = ['Positive', 'Neutral', 'Negative']
colors = {'Positive': 'green', 'Neutral': 'gray', 'Negative': 'red'}

fig = go.Figure()

for sentiment in sentiments:
    fig.add_trace(go.Bar(
        x=pivot_df['Label'],
        y=pivot_df[sentiment],
        name=sentiment,
        marker_color=colors[sentiment]
    ))

# Layout styling
fig.update_layout(
    barmode='stack',
    title='Sentiment Distribution',
    xaxis_title='Category and Rating Group',
    yaxis_title='Number of Reviews',
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font=dict(size=20),
    xaxis=dict(title_font=dict(size=12)),
    yaxis=dict(title_font=dict(size=12)),
    margin=dict(l=10, r=10, t=30, b=10)
)
#fig.update_traces(marker=dict(pattern=dict(line=dict(color='white',width=1))))
# Save as HTML
save_plot_as_html(fig,"Sentiment Distribution.html","sentiment disturbution of top% categories which has more than 1000 reviews")

In [30]:
#task2
import plotly.graph_objects as go
from datetime import datetime
import pytz

# Load dataset
apps_df = pd.read_csv("Play Store Data.csv")

# Convert size
def convert_size(size):
    if isinstance(size, str):
        if 'M' in size:
            return float(size.replace('M', ''))
        elif 'k' in size:
            return float(size.replace('k', '')) / 1024
    return None

# Clean and preprocess
apps_df = apps_df[apps_df['Installs'].str.contains(r'^\d+[+,]?$', na=False)]
apps_df['Installs'] = apps_df['Installs'].str.replace('[+,]', '', regex=True).astype(int)
apps_df['Size_MB'] = apps_df['Size'].apply(convert_size)
apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'], errors='coerce')
apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')

# Filter
filtered_apps = apps_df[
    (apps_df['Rating'] >= 4.0) &
    (apps_df['Size_MB'] >= 10) &
    (apps_df['Last Updated'].dt.month == 1)
]

# Top 10 categories by installs
top_categories = filtered_apps.groupby('Category')['Installs'].sum().nlargest(10).index
top_df = filtered_apps[filtered_apps['Category'].isin(top_categories)]

# Group summary
summary_df = top_df.groupby('Category').agg({
    'Rating': 'mean',
    'Reviews': 'sum'
}).reset_index()

summary_df.rename(columns={'Rating': 'Average Rating', 'Reviews': 'Total Reviews'}, inplace=True)

# Check current time in IST
ist = pytz.timezone('Asia/Kolkata')
now = datetime.now(ist)

if 15 <= now.hour < 17:
    # Plot grouped bar chart
    fig = go.Figure(data=[
        go.Bar(name='Average Rating', x=summary_df['Category'], y=summary_df['Average Rating']),
        go.Bar(name='Total Reviews', x=summary_df['Category'], y=summary_df['Total Reviews'])
    ])
    fig.update_layout(
        title='Average Rating vs Total Reviews',
        xaxis_title='Category',
        barmode='group',
        template='plotly_dark'
    )
    fig.show()
    
    # Save to HTML
    save_plot_as_html(fig,"Top10_Categories_Chart.html", "grouped bar chart to compare the average rating and total review count for the top 10 app categories by number of installs")
else:
    print("Chart is hidden. This chart can only be displayed between 3PM and 5PM IST.")

Chart is hidden. This chart can only be displayed between 3PM and 5PM IST.


In [31]:
#task3 
#note imports already imported above
#Load the dataset
df = pd.read_csv("Play Store Data.csv")

#Clean the 'Installs' column
df['Installs'] = df['Installs'].astype(str)
df = df[df['Installs'].str.contains(r'^\d{1,3}(?:,\d{3})*\+$', regex=True)]
df['Installs'] = df['Installs'].str.replace(r'[+,]', '', regex=True).astype(int)

#Filter out categories starting with A, C, G, or S
df_filtered = df[~df['Category'].str.startswith(('A', 'C', 'G', 'S'))]

#Get top 5 categories by total installs
category_installs = df_filtered.groupby('Category')['Installs'].sum().reset_index()
top5_categories = category_installs.sort_values(by='Installs', ascending=False).head(5)

# Add highlight flag for categories > 1 million installs
top5_categories['Highlight'] = top5_categories['Installs'] > 1_000_000

# Map categories to countries for fake geo mapping
country_map = {
    'FAMILY': 'USA',
    'TOOLS': 'CAN',
    'PRODUCTIVITY': 'GBR',
    'ENTERTAINMENT': 'AUS',
    'PHOTOGRAPHY': 'IND'
}
top5_categories['Country'] = top5_categories['Category'].map(country_map)

# Check current IST time
ist = pytz.timezone('Asia/Kolkata')
now_ist = datetime.now(ist)
start_time = now_ist.replace(hour=18, minute=0, second=0, microsecond=0)
end_time = now_ist.replace(hour=20, minute=0, second=0, microsecond=0)

# Create and save Choropleth map if within time range
if start_time <= now_ist <= end_time:
    fig = px.choropleth(
        top5_categories,
        locations="Country",
        locationmode="ISO-3",
        color="Installs",
        hover_name="Category",
        color_continuous_scale="Viridis",
        title="Top 5 App Categories by Installs (Filtered)"
    )

    # Highlight categories with >1 million installs
    highlighted = top5_categories[top5_categories['Highlight']]
    for _, row in highlighted.iterrows():
        fig.add_scattergeo(
            locations=[row['Country']],
            locationmode="ISO-3",
            text=[f"{row['Category']} (Highlight)"],
            mode='text',
            showlegend=False
        )

    # Save to HTML
    save_plot_as_html(fig,"choropleth_top5_categories.html","Choropleth map to visualize global installs by Category of top 5 categories having 1million+ installs")
else:
    print("⛔ Outside 6 PM - 8 PM IST. Map not generated.")

⛔ Outside 6 PM - 8 PM IST. Map not generated.


In [32]:
#task4
#note imports, data loaded above

#  Filter rows
mask = (
    ~df['App'].str.upper().str.startswith(tuple("XYZ")) &
    df['Category'].str.upper().str.startswith(tuple("BCE")) &
    (pd.to_numeric(df['Reviews'], errors='coerce') > 500)
)
df = df[mask].copy()


# Clean dates & installs
df['Last Updated'] = pd.to_datetime(df['Last Updated'], errors='coerce')
df = df.dropna(subset=['Last Updated'])

df['Installs'] = (
    df['Installs']
      .astype(str)  
      .str.replace('[+,]', '', regex=True)
      .astype(float)
)


#  Aggregate by month & category
df['Month'] = df['Last Updated'].dt.to_period('M')
monthly = (
    df.groupby(['Month', 'Category'])['Installs']
      .sum()
      .reset_index()
)
monthly['Month'] = monthly['Month'].dt.to_timestamp()

#  MoM Growth
monthly['MoM_Growth'] = (
    monthly
      .groupby('Category')['Installs']
      .pct_change()
)

# Check current IST time
ist = pytz.timezone('Asia/Kolkata')
now_ist = datetime.now(ist)
start_time = now_ist.replace(hour=18, minute=0, second=0, microsecond=0)
end_time = now_ist.replace(hour=21, minute=0, second=0, microsecond=0)

if start_time <= now_ist <= end_time:
    fig = go.Figure()
    
    for cat, cat_df in monthly.groupby('Category'):
        cat_df = cat_df.sort_values('Month')
        show_high_growth = cat_df['MoM_Growth'] > 0.20
        
        fig.add_trace(go.Scatter(
            x=cat_df['Month'], y=cat_df['Installs'],
            mode='lines',
            name=f"{cat}",
            line=dict(width=2)
        ))
        
        fig.add_trace(go.Scatter(
            x=cat_df[show_high_growth]['Month'],
            y=cat_df[show_high_growth]['Installs'],
            mode='lines',
            name=f"{cat} (High Growth)",
            fill='tozeroy',
            opacity=0.2,
            line=dict(width=0.5),
            showlegend=False
        ))
    
    fig.update_layout(
        title="Total Installs Over Time by App Category",
        xaxis_title="Month",
        yaxis_title="Total Installs",
        legend_title="App Category",
        template="plotly_white"
    )
    
    save_plot_as_html(fig,"installs_by_category.html","Time series line chart which shows the trend of total installs over time, segmented by app category.")
else:
    print("⏳ Plot hidden: available only 6 PM – 9 PM IST.")

⏳ Plot hidden: available only 6 PM – 9 PM IST.


In [33]:
import pandas as pd
import numpy as np
import plotly.express as px
from datetime import datetime
import pytz

# Clean and convert size
def convert_size(size):
    try:
        if 'M' in size:
            return float(size.replace('M', ''))
        elif 'K' in size:
            return float(size.replace('K', '')) / 1024
        elif size.strip() == 'Varies with device':
            return np.nan
    except:
        return np.nan
    return np.nan

# Load your datasets
apps_df = pd.read_csv("Play Store Data.csv")
user_reviews_df = pd.read_csv("User Reviews.csv")

# Clean size
apps_df['Size_MB'] = apps_df['Size'].apply(convert_size)

# Clean installs
apps_df = apps_df[apps_df['Installs'].astype(str).str.replace('[+,]', '', regex=True).str.isnumeric()]
apps_df['Installs'] = apps_df['Installs'].astype(str).str.replace('[+,]', '', regex=True).astype(int)

# Clean reviews
apps_df = apps_df[apps_df['Reviews'].astype(str).str.isnumeric()]
apps_df['Reviews'] = apps_df['Reviews'].astype(int)

# Merge with user_reviews_df
merged_df = pd.merge(apps_df, user_reviews_df, on='App', how='inner')

# Apply filters
categories = ['GAME', 'BEAUTY', 'BUSINESS', 'COMICS', 'COMMUNICATION', 'DATING', 'ENTERTAINMENT', 'SOCIAL', 'EVENT']
filtered_df = merged_df[
    (merged_df['Rating'] > 3.5) &
    (merged_df['Category'].str.upper().isin(categories)) &
    (merged_df['Reviews'] > 500) &
    (merged_df['Sentiment_Subjectivity'] > 0.5) &
    (merged_df['Installs'] > 50000)
]

# Time-based display (5PM to 7PM IST)
ist = pytz.timezone('Asia/Kolkata')
now_ist = datetime.now(ist)
start_time = now_ist.replace(hour=17, minute=0, second=0, microsecond=0)
end_time = now_ist.replace(hour=19, minute=0, second=0, microsecond=0)

if start_time <= now_ist <= end_time:
    fig = px.scatter(
        filtered_df,
        x='Size_MB',
        y='Rating',
        size='Installs',
        color='Category',
        hover_name='App',
        size_max=60,
        title='App Size vs Rating (Bubble Size = Installs)',
        labels={'Size_MB': 'App Size (MB)', 'Rating': 'Average Rating'},
        opacity=0.6
    )
    save_plot_as_html(fig,"bubble_chart_apps.html"," bubble chart to analyze the relationship between app size (in MB) and average rating")
else:
    print("⛔ This chart is only available between 5 PM and 7 PM IST.")

⛔ This chart is only available between 5 PM and 7 PM IST.


In [34]:
plot_containers_split=plot_containers.split('</div>')
if len(plot_containers_split) > 1:
    final_plot=plot_containers_split[-2]+'</div>'
else:
    final_plot=plot_containers

In [35]:
#dashboard
dashboard_html= """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name=viewport" content="width=device-width,initial-scale-1.0">
    <title> Google Play Store Review Analytics</title>
    <style>
        body {{
            font-family: Arial, sans-serif;
            background-color: #333;
            color: #fff;
            margin: 0;
            padding: 0;
        }}
        .header {{
            display: flex;
            align-items: center;
            justify-content: center;
            padding: 20px;
            background-color: #444
        }}
        .header img {{
            margin: 0 10px;
            height: 50px;
        }}
        .container {{
            display: flex;
            flex-wrap: wrap;
            justify_content: center;
            padding: 20px;
        }}
        .plot-container {{
            border: 2px solid #555
            margin: 10px;
            padding: 10px;
            width: {plot_width}px;
            height: {plot_height}px;
            overflow: hidden;
            position: relative;
            cursor: pointer;
        }}
        .insights {{
            display: none;
            position: absolute;
            right: 10px;
            top: 10px;
            background-color: rgba(0,0,0,0.7);
            padding: 5px;
            border-radius: 5px;
            color: #fff;
        }}
        .plot-container: hover .insights {{
            display: block;
        }}
        </style>
        <script>
            function openPlot(filename) {{
                window.open(filename, '_blank');
                }}
        </script>
    </head>
    <body>
        <div class= "header">
            <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/4/4a/Logo_2013_Google.png/800px-Logo_2013_Google.png" alt="Google Logo">
            <h1>Google Play Store Reviews Analytics</h1>
            <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/7/78/Google_Play_Store_badge_EN.svg/1024px-Google_Play_Store_badge_EN.svg.png" alt="Google Play Store Logo">
        </div>
        <div class="container">
            {plots}
        </div>
    </body>
    </html>
    """

In [36]:
final_html=dashboard_html.format(plots=plot_containers,plot_width=plot_width,plot_height=plot_height)
dashboard_path=os.path.join(html_files_path,"web page.html")
with open(dashboard_path, "w", encoding="utf-8") as f:
    f.write(final_html)
webbrowser.open('file://'+os.path.realpath(dashboard_path))

True