In [11]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import base64
import io

In [12]:
apps_df = pd.read_csv("Play Store Data.csv")
reviews_df = pd.read_csv("User Reviews.csv")

In [13]:
apps_df.dropna(subset=['App', 'Category', 'Reviews', 'Rating'], inplace=True)
apps_df['Reviews'] = pd.to_numeric(apps_df['Reviews'], errors='coerce')
apps_df = apps_df[apps_df['Reviews'] > 1000]

In [14]:
merged_df = pd.merge(reviews_df, apps_df, on='App')

In [15]:
merged_df.dropna(subset=['Sentiment', 'Rating', 'Category'], inplace=True)

In [16]:
def rating_group(rating):
    if rating <= 2:
        return '1-2 Stars'
    elif rating <= 4:
        return '3-4 Stars'
    else:
        return '4-5 Stars'

merged_df['Rating Group'] = merged_df['Rating'].apply(rating_group)


In [17]:
top_categories = merged_df['Category'].value_counts().nlargest(5).index
filtered_df = merged_df[merged_df['Category'].isin(top_categories)]


In [18]:
grouped = filtered_df.groupby(['Category', 'Rating Group', 'Sentiment']).size().reset_index(name='Count')
pivot_df = grouped.pivot_table(index=['Category', 'Rating Group'], columns='Sentiment', values='Count', fill_value=0).reset_index()

In [19]:
sns.set(style="whitegrid")

fig, ax = plt.subplots(figsize=(12, 8))

pivot_df['Combined'] = pivot_df['Category'] + ' | ' + pivot_df['Rating Group']
x = np.arange(len(pivot_df))

ax.bar(x, pivot_df.get('Positive', 0), label='Positive', color='green')
ax.bar(x, pivot_df.get('Neutral', 0), bottom=pivot_df.get('Positive', 0), label='Neutral', color='gray')
bottom = pivot_df.get('Positive', 0) + pivot_df.get('Neutral', 0)
ax.bar(x, pivot_df.get('Negative', 0), bottom=bottom, label='Negative', color='red')

ax.set_xticks(x)
ax.set_xticklabels(pivot_df['Combined'], rotation=45, ha='right')
ax.set_ylabel("Number of Reviews")
ax.set_title("Sentiment Distribution by Rating Groups and Top 5 Categories")
ax.legend()

plt.tight_layout()
buf = io.BytesIO()
plt.savefig(buf, format='png', bbox_inches='tight')
plt.close()
buf.seek(0)

0

In [20]:
img_base64 = base64.b64encode(buf.read()).decode('utf-8')

html_content = f"""
<!DOCTYPE html>
<html>
<head>
    <title>Sentiment Distribution Chart</title>
</head>
<body>
    <h2 style="text-align:center;">Sentiment Distribution by Rating Groups and Top 5 Categories</h2>
    <div style="text-align:center;">
        <img src="data:image/png;base64,{img_base64}" alt="Sentiment Distribution Chart" style="width:90%;">
    </div>
</body>
</html>
"""

with open("task1.html", "w") as f:
    f.write(html_content)

print("HTML with sentiment chart created successfully.")

HTML with sentiment chart created successfully.
