In [115]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import base64
import io

In [116]:
df_apps = pd.read_csv('Play Store Data.csv')
df_reviews = pd.read_csv('User Reviews.csv')

In [117]:
print(df_apps.head())

                                                 App        Category  Rating  \
0     Photo Editor & Candy Camera & Grid & ScrapBook  ART_AND_DESIGN     4.1   
1                                Coloring book moana  ART_AND_DESIGN     3.9   
2  U Launcher Lite – FREE Live Cool Themes, Hide ...  ART_AND_DESIGN     4.7   
3                              Sketch - Draw & Paint  ART_AND_DESIGN     4.5   
4              Pixel Draw - Number Art Coloring Book  ART_AND_DESIGN     4.3   

  Reviews  Size     Installs  Type Price Content Rating  \
0     159   19M      10,000+  Free     0       Everyone   
1     967   14M     500,000+  Free     0       Everyone   
2   87510  8.7M   5,000,000+  Free     0       Everyone   
3  215644   25M  50,000,000+  Free     0           Teen   
4     967  2.8M     100,000+  Free     0       Everyone   

                      Genres      Last Updated         Current Ver  \
0               Art & Design   January 7, 2018               1.0.0   
1  Art & Design;Pretend 

In [118]:
df_apps['Installs'] = df_apps['Installs'].str.replace('+', '', regex=False).str.replace(',', '', regex=False)
df_apps['Installs'] = pd.to_numeric(df_apps['Installs'], errors='coerce')

In [119]:
def size_to_mb(size):
    if 'M' in size:
        return float(size.replace('M', ''))
    elif 'k' in size:
        return float(size.replace('k', '')) / 1024  # kB to MB
    else:
        return np.nan

df_apps['Size_MB'] = df_apps['Size'].apply(lambda x: size_to_mb(str(x)))

In [120]:
df_apps['Price'] = df_apps['Price'].str.replace('$', '', regex=False)
df_apps['Price'] = pd.to_numeric(df_apps['Price'], errors='coerce').fillna(0)

In [121]:
if 'Android Ver' in df_apps.columns:
    df_apps['Android Ver'] = df_apps['Android Ver'].astype(str)
    df_apps['Android Ver'] = df_apps['Android Ver'].str.extract(r'(\d+(\.\d+)?)')[0]
    df_apps['Android Ver'] = pd.to_numeric(df_apps['Android Ver'], errors='coerce')
    android_version_filter = (df_apps['Android Ver'] > 4.0)
else:
    print("Warning: 'Android Ver' column not found. Skipping Android version filter.")
    android_version_filter = True


In [122]:
df_apps['App_name_length'] = df_apps['App'].apply(lambda x: len(str(x)))

In [123]:
df_apps['Revenue'] = np.where(df_apps['Type'] == 'Paid', df_apps['Price'] * df_apps['Installs'], 0)

In [124]:
df_filtered = df_apps[(df_apps['Installs'] >= 10000) &
                      (df_apps['Revenue'] >= 10000) &
                      (android_version_filter) &
                      (df_apps['Size_MB'] > 15) &
                      (df_apps['Content Rating'] == 'Everyone') &
                      (df_apps['App_name_length'] <= 30)]

In [125]:
print("Filtered dataset shape:", df_filtered.shape)

Filtered dataset shape: (33, 16)


In [126]:
top_categories = df_filtered['Category'].value_counts().head(3).index.tolist()
print("Top 3 Categories:", top_categories)

Top 3 Categories: ['GAME', 'FAMILY', 'PHOTOGRAPHY']


In [127]:
df_top = df_filtered[df_filtered['Category'].isin(top_categories)]

In [128]:
grouped = df_top.groupby(['Category', 'Type']).agg({
    'Installs': 'mean',
    'Revenue': 'mean'
}).reset_index()

print(grouped)

      Category  Type        Installs    Revenue
0       FAMILY  Paid   381428.571429   796900.0
1         GAME  Paid    71250.000000   279287.5
2  PHOTOGRAPHY  Paid  1000000.000000  5990000.0


In [129]:
plt.figure(figsize=(14, 8))
ax1 = plt.gca()
ax2 = ax1.twinx()

colors = ['#1f77b4', '#ff7f0e']  # Blue = Free, Orange = Paid

all_types = ['Free', 'Paid']
categories = grouped['Category'].unique()

full_index = pd.MultiIndex.from_product([categories, all_types], names=['Category', 'Type'])
grouped = grouped.set_index(['Category', 'Type']).reindex(full_index, fill_value=0).reset_index()

sns.barplot(x='Category', y='Installs', hue='Type', data=grouped, ax=ax1, palette=colors, alpha=0.7)

sns.pointplot(x='Category', y='Revenue', hue='Type', data=grouped, ax=ax2, 
              color='red', markers='o', linestyles='--', dodge=0.4, join=False, legend=False)

h1, l1 = ax1.get_legend_handles_labels()
ax1.legend(h1, l1, loc='upper left')

ax1.set_ylabel('Average Installs')
ax2.set_ylabel('Average Revenue ($)')
ax1.set_xlabel('Category')
plt.title('Average Installs (Bar) vs Average Revenue (Line) for Top 3 Categories')
ax1.grid(axis='y')

plt.tight_layout()
buf = io.BytesIO()
plt.savefig(buf, format='png', bbox_inches='tight')
plt.close()
buf.seek(0)


Setting a gradient palette using color= is deprecated and will be removed in v0.14.0. Set `palette='dark:red'` for the same effect.

  sns.pointplot(x='Category', y='Revenue', hue='Type', data=grouped, ax=ax2,

The `join` parameter is deprecated and will be removed in v0.15.0. You can remove the line between points with `linestyle='none'`.

  sns.pointplot(x='Category', y='Revenue', hue='Type', data=grouped, ax=ax2,


0

In [130]:
img_base64 = base64.b64encode(buf.read()).decode('utf-8')

html_content = f"""
<!DOCTYPE html>
<html>
<head>
    <title>Dual-axis Chart</title>
    <script>
        function isISTBetween1and2PM() {{
            let now = new Date();
            let istOffset = 330; // IST = UTC +5:30
            let utc = now.getTime() + now.getTimezoneOffset() * 60000;
            let istTime = new Date(utc + (60000 * istOffset));
            let hour = istTime.getHours();
            return hour >= 13 && hour < 14;
        }}

        window.onload = function () {{
            if (isISTBetween1and2PM()) {{
                document.getElementById("image-container").style.display = "block";
            }} else {{
                document.getElementById("message").style.display = "block";
            }}
        }};
    </script>
</head>
<body>
    <h2 style="text-align:center;">Dual-axis chart comparing the average installs and revenue for free vs. paid apps within the top 3 app categories</h2>
    
    <div id="image-container" style="display:none; text-align:center;">
        <img src="data:image/png;base64,{img_base64}" alt="Dual-axis Chart" style="width:90%;">
    </div>
    
    <div id="message" style="display:none; text-align:center; font-size:20px; padding:50px;">
        Dual-axis chart is visible only between 1 PM and 2 PM IST.
    </div>
</body>
</html>
"""

# Save HTML file
with open("task2.html", "w") as f:
    f.write(html_content)

print("Dual-axis chart HTML created successfully")


Dual-axis chart HTML created successfully
