In [3]:
import os
import pandas as pd
import plotly.express as px
import plotly.io as pio
import webbrowser

# =========================
# RESET GLOBAL CONTAINER
# =========================
plot_containers = ""

# =========================
# FOLDER SETUP
# =========================
html_files_path = "./"
if not os.path.exists(html_files_path):
    os.makedirs(html_files_path)

# =========================
# SAVE PLOT FUNCTION
# =========================
def save_plot_as_html(fig, filename, insight):
    global plot_containers
    filepath = os.path.join(html_files_path, filename)

    html_content = pio.to_html(fig, full_html=False, include_plotlyjs='inline')

    plot_containers += f"""
    <div class="plot-container" onclick="openPlot('{filename}')">
        <div class="plot">{html_content}</div>
        <div class="insights">{insight}</div>
    </div>
    """

    fig.write_html(filepath, full_html=False, include_plotlyjs='inline')

# =========================
# COMMON STYLES
# =========================
plot_width = 400
plot_height = 300

# =========================
# LOAD DATA
# =========================
apps_df = pd.read_csv("googleplaystore.csv")
reviews_df = pd.read_csv("googleplaystore_user_reviews.csv")

# =========================
# FIGURE 1: TOP CATEGORIES
# =========================
category_counts = apps_df['Category'].value_counts().nlargest(10)
fig1 = px.bar(
    x=category_counts.index,
    y=category_counts.values,
    title="Top Categories on Play Store",
    color=category_counts.index,
    width=400,
    height=300
)
fig1.update_layout(plot_bgcolor="black", paper_bgcolor="black", font_color="white")
save_plot_as_html(fig1, "Category_Graph_1.html",
                  "Top categories are dominated by tools, entertainment and productivity apps")

# =========================
# FIGURE 2: APP TYPE
# =========================
type_counts = apps_df['Type'].value_counts()
fig2 = px.pie(
    values=type_counts.values,
    names=type_counts.index,
    title="App Type Distribution",
    width=300,
    height=300
)
fig2.update_layout(plot_bgcolor="black", paper_bgcolor="black", font_color="white")
save_plot_as_html(fig2, "Type_Graph_2.html",
                  "Most apps are free,indicating strategy to attract user first and monetize through ads and in-app purchases")

# =========================
# FIGURE 3: RATING DISTRIBUTION
# =========================
fig3 = px.histogram(
    apps_df,
    x="Rating",
    nbins=20,
    title="Rating Distribution",
    width=300,
    height=300
)
fig3.update_layout(plot_bgcolor="black", paper_bgcolor="black", font_color="white")
save_plot_as_html(fig3, "Rating_Graph_3.html",
                  "Ratings are skewed toward higher values,suggesting that most apps rated favourly by users")

# =========================
# FIGURE 4: SENTIMENT DISTRIBUTION
# =========================
sentiment_counts = reviews_df['Sentiment_Score'].value_counts()
fig4 = px.bar(
    x=sentiment_counts.index,
    y=sentiment_counts.values,
    title="Sentiment Distribution",
    width=300,
    height=300
)
fig4.update_layout(plot_bgcolor="black", paper_bgcolor="black", font_color="white")
save_plot_as_html(fig4, "Sentiment_Graph_4.html", "Sentiments in reviews show a mix of positive and negative feedback, with a slight lean towards positive sentiment")
# =========================
# FIGURE 5: INSTALLS BY CATEGORY
# =========================
apps_df['Installs'] = (
    apps_df['Installs'].astype(str)
    .str.replace(',', '', regex=False)
    .str.replace('+', '', regex=False)
)
apps_df['Installs'] = pd.to_numeric(apps_df['Installs'], errors='coerce')

installs_by_category = apps_df.groupby('Category')['Installs'].sum().nlargest(10)

fig5 = px.bar(
    x=installs_by_category.values,
    y=installs_by_category.index,
    orientation='h',
    title="Installs by Category",
    width=800,
    height=300
)
fig5.update_layout(plot_bgcolor="black", paper_bgcolor="black", font_color="white")
save_plot_as_html(fig5, "Installs_Graph_5.html",
                  "The categories with the most installs are social and communication apps, reflecting their broad appeal and everyday usage."
)

# =========================
# FIGURE 6: UPDATES OVER YEARS
# =========================
apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'], errors='coerce')
updates_per_year = apps_df['Last Updated'].dt.year.value_counts().sort_index()

fig6 = px.line(
    x=updates_per_year.index,
    y=updates_per_year.values,
    title="Number of Updates Over Years",
    width=800,
    height=300
)
fig6.update_layout(plot_bgcolor="black", paper_bgcolor="black", font_color="white")
save_plot_as_html(fig6, "Updates_Graph_6.html",
                  "Updates have been increasing over the years, showing that developers are actively maintaining and improving their apps")
# =========================
# FIGURE 7: REVENUE BY CATEGORY 
# =========================

# Clean Price safely
apps_df['Price'] = (
    apps_df['Price']
    .astype(str)
    .str.replace(r'[\$,]', '', regex=True)
)

apps_df['Price'] = pd.to_numeric(
    apps_df['Price'],
    errors='coerce'
).fillna(0)

# Ensure Installs is numeric
apps_df['Installs'] = pd.to_numeric(
    apps_df['Installs'],
    errors='coerce'
).fillna(0)

# Calculate Revenue
apps_df['Revenue'] = apps_df['Price'] * apps_df['Installs']

# Aggregate
revenue_by_category = (
    apps_df.groupby('Category')['Revenue']
    .sum()
    .nlargest(10)
)

fig7 = px.bar(
    x=revenue_by_category.index,
    y=revenue_by_category.values,
    title="Revenue by Category",
    width=800,
    height=300
)

fig7.update_layout(
    plot_bgcolor="black",
    paper_bgcolor="black",
    font_color="white"
)

save_plot_as_html(
    fig7,
    "Revenue_Graph_7.html",
    "Categories such as Business and Productivity lead in revenue generation, indicating their monetization potential")

# =========================
# FIGURE 8: TOP GENRES
# =========================
genre_counts = apps_df['Genres'].str.split(';').explode().value_counts().nlargest(10)
fig8 = px.bar(
    x=genre_counts.index,
    y=genre_counts.values,
    title="Top Genres",
    width=800,
    height=300
)
fig8.update_layout(plot_bgcolor="black", paper_bgcolor="black", font_color="white")
save_plot_as_html(fig8, "Genre_Graph_8.html",
                  "Action and Casual genres are the most common, reflecting users' preference for engaging and easy-to-play games")

# =========================
# FIGURE 9: LAST UPDATE VS RATING
# =========================
fig9 = px.scatter(
    apps_df,
    x="Last Updated",
    y="Rating",
    color="Type",
    title="Impact of Last Update on Rating",
    width=800,
    height=300
)
fig9.update_layout(plot_bgcolor="black", paper_bgcolor="black", font_color="white")
save_plot_as_html(fig9, "Update_Graph_9.html",
                  "The Scatter Plot shows a weak correlation between the last update and ratings, suggesting that more frequent updates don't always result in better ratings"
)
# =========================
# FIGURE 10: PAID VS FREE
# =========================
fig10 = px.box(
    apps_df,
    x="Type",
    y="Rating",
    color="Type",
    title="Rating for Paid vs Free Apps",
    width=800,
    height=300
)
fig10.update_layout(plot_bgcolor="black", paper_bgcolor="black", font_color="white")
save_plot_as_html(fig10, "Paid_Free_Graph_10.html",
                  "Paid apps generally have higher ratings compared to free apps, suggesting that users expect higher quality from apps they pay for")


# =========================
# DASHBOARD HTML
# =========================
dashboard_html = f"""
<!DOCTYPE html>
<html>
<head>
<title>Google Play Store Analytics</title>
<style>
body {{
    background:#333;
    color:white;
    font-family:Arial;
}}
.container {{
    display:flex;
    flex-wrap:wrap;
    justify-content:center;
}}
.plot-container {{
    border:2px solid #555;
    margin:10px;
    padding:10px;
    width:{plot_width}px;
    height:{plot_height}px;
    position:relative;
    cursor:pointer;
}}
.insights {{
    display:none;
    position:absolute;
    top:5px;
    right:5px;
    background:rgba(0,0,0,0.7);
    padding:5px;
}}
.plot-container:hover .insights {{
    display:block;
}}
</style>
<script>
function openPlot(file) {{
    window.open(file,'_blank');
}}
</script>
</head>
<body>
<h1 style="text-align:center;">Google Play Store Reviews Analytics</h1>
<div class="container">
{plot_containers}
</div>
</body>
</html>
"""

dashboard_path = os.path.join(html_files_path, "web_page.html")
with open(dashboard_path, "w", encoding="utf-8") as f:
    f.write(dashboard_html)

webbrowser.open("file://" + os.path.realpath(dashboard_path))


True