In [195]:
# Importing required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt  
import seaborn as sns            
import plotly.express as px      
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
import webbrowser
import os

In [196]:
import plotly.io as pio

In [197]:
# Downloading NLTK's VADER lexicon for sentiment analysis
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\sairi\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [198]:
# Data Loading

In [199]:
# Reading the datasets
apps_df = pd.read_csv('Play Store Data.csv')         
reviews_df = pd.read_csv('User Reviews.csv')

In [200]:
apps_df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [201]:
reviews_df.head()

Unnamed: 0,App,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.0,0.533333
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.25,0.288462
2,10 Best Foods for You,,,,
3,10 Best Foods for You,Works great especially going grocery store,Positive,0.4,0.875
4,10 Best Foods for You,Best idea us,Positive,1.0,0.3


In [202]:
# df.isnull() detects the missing values 
# df.dropna() removes rows and columns that contain missing values
# df.fillna() fills missing values

In [203]:
# df.duplicate() identify the duplicate values
# df.drop_duplicates() removes duplicate rows

In [204]:
# Data cleaning 

# Dropping the rows containing missing ratings and duplicates
apps_df = apps_df.dropna(subset=['Rating'])  

# Filling missing values with the mode for each column
for column in apps_df.columns :
    
    apps_df[column].fillna(apps_df[column].mode()[0],inplace =True) 
    
apps_df.drop_duplicates(inplace = True)

# Removing Invalid ratings (greater than 5)
apps_df =apps_df= apps_df[apps_df['Rating']<= 5]

# Dropping rows with missing reviews in the 'reviews' dataset
reviews_df.dropna(subset=['Translated_Review'],inplace = True)


A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.





In [205]:
apps_df.dtypes

App                object
Category           object
Rating            float64
Reviews            object
Size               object
Installs           object
Type               object
Price              object
Content Rating     object
Genres             object
Last Updated       object
Current Ver        object
Android Ver        object
dtype: object

In [206]:
# convert the installs columns to numeric by removing comma and +
apps_df['Installs']=apps_df['Installs'].str.replace(',','').str.replace('+','').astype(int)

# converting price column to numeric after removing $
apps_df['Price']=apps_df['Price'].str.replace('$','').astype(float)

In [207]:
apps_df.dtypes

App                object
Category           object
Rating            float64
Reviews            object
Size               object
Installs            int32
Type               object
Price             float64
Content Rating     object
Genres             object
Last Updated       object
Current Ver        object
Android Ver        object
dtype: object

In [208]:
# Combining the apps and reviews datasets
merged_df = pd.merge(apps_df,reviews_df,on='App',how = 'inner')
merged_df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity
0,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,A kid's excessive ads. The types ads allowed a...,Negative,-0.25,1.0
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,It bad >:(,Negative,-0.725,0.833333
2,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,like,Neutral,0.0,0.0
3,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,I love colors inspyering,Positive,0.5,0.6
4,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,I hate,Negative,-0.8,0.9


In [209]:
# Function to convert 'Size' column to a uniform numeric format (MB)
def convert_size(size):
    if 'M' in size:
        return float(size.replace('M',''))     
    elif 'k' in size:
        return float(size.replace('k',''))/1024       
    else:
        return np.nan       # else return with nan
apps_df['Size'] = apps_df['Size'].apply(convert_size)
apps_df

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,10000,Free,0.0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,5000000,Free,0.0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,50000000,Free,0.0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,100000,Free,0.0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10834,FR Calculator,FAMILY,4.0,7,2.6,500,Free,0.0,Everyone,Education,"June 18, 2017",1.0.0,4.1 and up
10836,Sya9a Maroc - FR,FAMILY,4.5,38,53.0,5000,Free,0.0,Everyone,Education,"July 25, 2017",1.48,4.1 and up
10837,Fr. Mike Schmitz Audio Teachings,FAMILY,5.0,4,3.6,100,Free,0.0,Everyone,Education,"July 6, 2018",1.0,4.1 and up
10839,The SCP Foundation DB fr nn5n,BOOKS_AND_REFERENCE,4.5,114,,1000,Free,0.0,Mature 17+,Books & Reference,"January 19, 2015",Varies with device,Varies with device


In [210]:
# logorithimic

In [211]:
apps_df['Log_Installs']= np.log(apps_df['Installs'])

In [212]:
apps_df['Reviews'] = apps_df['Reviews'].astype(int)

In [213]:
apps_df['Log_Reviews']=np.log(apps_df['Reviews'])
apps_df.dtypes

App                object
Category           object
Rating            float64
Reviews             int32
Size              float64
Installs            int32
Type               object
Price             float64
Content Rating     object
Genres             object
Last Updated       object
Current Ver        object
Android Ver        object
Log_Installs      float64
Log_Reviews       float64
dtype: object

In [214]:
apps_df['Revenue']=apps_df['Price']*apps_df['Installs']
apps_df['Revenue']

0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
10834    0.0
10836    0.0
10837    0.0
10839    0.0
10840    0.0
Name: Revenue, Length: 8892, dtype: float64

In [215]:
# To measure polarity scores of any sentence or analyze the positive/negetive intensity of any sentence
sia = SentimentIntensityAnalyzer()

In [216]:
review = "This app is amazing! I Love the new features."
sentiment_score = sia.polarity_scores(review)
print(sentiment_score)

{'neg': 0.0, 'neu': 0.42, 'pos': 0.58, 'compound': 0.8516}


In [217]:
review = "This app is very bad! I hate the new features."
sentiment_score = sia.polarity_scores(review)
print(sentiment_score)

{'neg': 0.535, 'neu': 0.465, 'pos': 0.0, 'compound': -0.8427}


In [218]:
review = "This app is okay."
sentiment_score = sia.polarity_scores(review)
print(sentiment_score)

{'neg': 0.0, 'neu': 0.612, 'pos': 0.388, 'compound': 0.2263}


In [219]:
reviews_df['Sentiment_Score']=reviews_df['Translated_Review'].apply(lambda x: sia.polarity_scores(str(x))['compound'])
reviews_df.head()

Unnamed: 0,App,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity,Sentiment_Score
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.0,0.533333,0.9531
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.25,0.288462,0.6597
3,10 Best Foods for You,Works great especially going grocery store,Positive,0.4,0.875,0.6249
4,10 Best Foods for You,Best idea us,Positive,1.0,0.3,0.6369
5,10 Best Foods for You,Best way,Positive,1.0,0.3,0.6369


In [220]:
apps_df['Last Updated']=pd.to_datetime(apps_df['Last Updated'],errors='coerce')

apps_df['Year']=apps_df['Last Updated'].dt.year
apps_df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Installs,Log_Reviews,Revenue,Year
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,10000,Free,0.0,Everyone,Art & Design,2018-01-07,1.0.0,4.0.3 and up,9.21034,5.068904,0.0,2018
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,2018-01-15,2.0.0,4.0.3 and up,13.122363,6.874198,0.0,2018
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,5000000,Free,0.0,Everyone,Art & Design,2018-08-01,1.2.4,4.0.3 and up,15.424948,11.379508,0.0,2018
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,50000000,Free,0.0,Teen,Art & Design,2018-06-08,Varies with device,4.2 and up,17.727534,12.281384,0.0,2018
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,100000,Free,0.0,Everyone,Art & Design;Creativity,2018-06-20,1.1,4.4 and up,11.512925,6.874198,0.0,2018


In [221]:
# Directory for saving HTML plots 
html_files_path="./"
if not os.path.exists(html_files_path):
    os.makedirs(html_files_path)

In [222]:
plot_containers = ""

In [223]:
# Function to save plotly figures as HTML files

def save_plot_as_html(fig,filename,insight):
    global plot_containers
    filepath = os.path.join(html_files_path,filename)
    html_content=pio.to_html(fig,full_html=False ,include_plotlyjs='inline')
    plot_containers += f"""
    <div class = "plot.container" id="{filename}')" onclick="openPlot('{filename}')">
       <div class = "plot">{html_content}</div>
       <div class = "insights">{insight}</div>
    </div>
    """
    fig.write_html(filepath,full_html=False,include_plotlyjs='inline')

In [224]:
# Visualization layout configuration
plot_width = 400
plot_height = 300
plot_bg_color = 'black'
text_color='white'
title_font={'size':16}
axis_font={'size':12}

In [225]:
# Figure 1

category_counts = apps_df['Category'].value_counts().nlargest(10)
fig1=px.bar(
    x=category_counts.index,
    y=category_counts.values,
    labels={'x':'Category','y':'Count'},
    title = "Top Categories on playstore",
    color = category_counts.index,
    color_discrete_sequence= px.colors.sequential.Plasma,
    width = 400,
    height = 300
)
fig1.update_layout(
    plot_bgcolor= 'black',
    paper_bgcolor='black',
    font_color = 'white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig1.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig1,"category Graph 1.html","The top categories on the playstore are dominated by tools, entertainment, and productivity apps")

In [226]:
# Figure 2

type_counts = apps_df['Type'].value_counts()
fig2=px.pie(
   values = type_counts.values,
    names=type_counts.index,
    title = "App Type Distribution",
    color_discrete_sequence= px.colors.sequential.RdBu,
    width = 400,
    height = 300
)
fig2.update_layout(
    plot_bgcolor= 'black',
    paper_bgcolor='black',
    font_color = 'white',
    title_font={'size':16},
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig2.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig2,"Type Graph 2.html","Most apps on the playstore are free, indicating a strategy to attract users first and monetize through ads or in app purchases")

In [227]:
# Figure 3

fig3=px.histogram(
   apps_df,
    x = 'Rating',
    nbins=20,
    title = "Rating Distribution",
    color_discrete_sequence= ['#636EFA'],
    width = 400,
    height = 300
)
fig3.update_layout(
    plot_bgcolor= 'black',
    paper_bgcolor='black',
    font_color = 'white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig3.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig3,"Rating Graph 3.html","Ratings are skewed towards higher values, suggesting that most apps are rated favourably by users")

In [228]:
# Figure 4

sentiment_counts = reviews_df['Sentiment_Score'].value_counts()
fig4=px.bar(
    x=sentiment_counts.index,
    y=sentiment_counts.values,
    labels={'x':'Sentiment Score','y':'Count'},
    title = "Sentiment Distribution",
    color = sentiment_counts.index,
    color_discrete_sequence= px.colors.sequential.RdPu,
    width = 400,
    height = 300
)
fig4.update_layout(
    plot_bgcolor= 'black',
    paper_bgcolor='black',
    font_color = 'white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig4.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig4,"Sentiment Graph 4.html","Sentiments in reviews show a mix of positive and negetive feedback, with a slight lean towards positive sentiments")

In [229]:
#  Figure 5

installs_by_category = apps_df.groupby('Category')['Installs'].sum().nlargest(10)
fig5=px.bar(
    x=installs_by_category.index,
    y=installs_by_category.values,
    orientation = 'h',
    labels={'x':'Installs','y':'Category'},
    title = "Installs by Category",
    color = installs_by_category.index,
    color_discrete_sequence= px.colors.sequential.Blues,
    width = 400,
    height = 300
)
fig5.update_layout(
    plot_bgcolor= 'black',
    paper_bgcolor='black',
    font_color = 'white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig5.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig5,"Installs Graph 5.html","The category with the most installs are social and communication apps, reflecting their broad appeal and daily ")

In [230]:
# Figure 6

updates_per_year = apps_df['Last Updated'].dt.year.value_counts().sort_index()
fig6=px.line(
    x=updates_per_year.index,
    y=updates_per_year.values,
    labels={'x':'Year','y':'Number of Updates'},
    title = "Number of updates over years",
    color_discrete_sequence=['#AB63FA'],
    width = plot_width,
    height =plot_height
)
fig6.update_layout(
    plot_bgcolor= plot_bg_color,
    paper_bgcolor=plot_bg_color,
    font_color = text_color,
    title_font=title_font,
    xaxis=dict(title_font=axis_font),
    yaxis=dict(title_font=axis_font),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig6.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig6,"Updates Graph 6.html","Updates have been increasing over the years, showing that developers are actively maintaining and improving their apps")

In [231]:
# Figure 7

revenue_by_category = apps_df.groupby('Category')['Revenue'].sum().nlargest(10)
fig7=px.bar(
   x=installs_by_category.index,
    y=installs_by_category.values,
     labels={'x':'Category','y':'Revenue'},
    title = "Revenue by Category",
    color = installs_by_category.index,
    color_discrete_sequence= px.colors.sequential.Greens,
    width = 400,
    height = 300
)
fig7.update_layout(
    plot_bgcolor= 'black',
    paper_bgcolor='black',
    font_color = 'white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig7.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig7,"Revenue Graph 7.html","Categories such as business and productivity lead in revenue generation, including their monetization potential")

In [232]:
# Figure 8

genre_counts = apps_df['Genres'].str.split(';', expand = True).stack().value_counts().nlargest(10)
fig8=px.bar(
   x=genre_counts.index,
    y=genre_counts.values,
     labels={'x':'Genre','y':'Count'},
    title = "Top Genres",
    color = installs_by_category.index,
    color_discrete_sequence= px.colors.sequential.OrRd,
    width = 400,
    height = 300
)
fig8.update_layout(
    plot_bgcolor= 'black',
    paper_bgcolor='black',
    font_color = 'white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig8.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig8,"Genre Graph 8.html","Action and casual genres are the most common, reflecting users' preferences for engaging and easy to apply games")

In [233]:
# Figure 9

fig9 = px.scatter(
    apps_df,
    x='Last Updated',
    y = 'Rating',
    color = 'Type',
    title='Impact of Last Update on Rating',
    color_discrete_sequence= px.colors.qualitative.Vivid,
     width = 400,
    height = 300
)
fig9.update_layout(
    plot_bgcolor= 'black',
    paper_bgcolor='black',
    font_color = 'white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig9.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig9,"Update Graph 9.html","The scatter plot shows a weak corelation between the last update and ratings, suggesting that more frequent updates dont always result in better ratings ")
    

In [234]:
# Figure 10

fig10 = px.box(
    apps_df,
    x='Type',
    y = 'Rating',
    color = 'Type',
    title='Rating for paid vs free Apps',
    color_discrete_sequence= px.colors.qualitative.Pastel,
     width = 400,
    height = 300
)
fig10.update_layout(
    plot_bgcolor= 'black',
    paper_bgcolor='black',
    font_color = 'white',
    title_font={'size':16},
    xaxis=dict(title_font={'size':12}),
    yaxis=dict(title_font={'size':12}),
    margin=dict(l=10,r=10,t=30,b=10)
)
#fig10.update_traces(marker=dict(marker=dict(line=dict(color='white',width=1))))
save_plot_as_html(fig10,"Paid free Graph 10.html","Paid apps generally have higher ratings compared to free apps, suggesting that users expect higher quality from apps they pay for ")
    
    

In [235]:
#Container splitting 
plot_containers_split=plot_containers.split('</div>')

if len(plot_containers_split) > 1:
    final_plot=plot_containers_split[-2]+'</div>'
else:
    final_plot=plot_containers
    

In [236]:
dashboard_html= """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name=viewport" content="width=device-width,initial-scale-1.0">
    <title> Google Play Store Review Analytics</title>
    <style>
        body {{
            font-family: Arial, sans-serif;
            background-color: #333;
            color: #fff;
            margin: 0;
            padding: 0;
        }}
        .header {{
            display: flex;
            align-items: center;
            justify-content: center;
            padding: 20px;
            background-color: #444
        }}
        .header img {{
            margin: 0 10px;
            height: 50px;
        }}
        .container {{
            display: flex;
            flex-wrap: wrap;
            justify_content: center;
            padding: 20px;
        }}
        .plot-container {{
            border: 2px solid #555
            margin: 10px;
            padding: 10px;
            width: {plot_width}px;
            height: {plot_height}px;
            overflow: hidden;
            position: relative;
            cursor: pointer;
        }}
        .insights {{
            display: none;
            position: absolute;
            right: 10px;
            top: 10px;
            background-color: rgba(0,0,0,0.7);
            padding: 5px;
            border-radius: 5px;
            color: #fff;
        }}
        .plot-container: hover .insights {{
            display: block;
        }}
        </style>
        <script>
            function openPlot(filename) {{
                window.open(filename, '_blank');
                }}
        </script>
    </head>
    <body>
        <div class= "header">
            <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/4/4a/Logo_2013_Google.png/800px-Logo_2013_Google.png" alt="Google Logo">
            <h1>Google Play Store Reviews Analytics</h1>
            <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/7/78/Google_Play_Store_badge_EN.svg/1024px-Google_Play_Store_badge_EN.svg.png" alt="Google Play Store Logo">
        </div>
        <div class="container">
            {plots}
        </div>
    </body>
    </html>
    """

In [237]:
# Formatting and saving the dashboard as an HTML file
final_html=dashboard_html.format(plots=plot_containers, plot_width=plot_width, plot_height=plot_height)

dashboard_path=os.path.join(html_files_path, "web page.html")

with open(dashboard_path, "w", encoding="utf-8") as f:
    f.write(final_html)

# Opening the dashboard in a browser
webbrowser.open('file://'+os.path.realpath(dashboard_path))

True

NULLCLASS INTERNSHIP TASKS (MARCH, 2025 - APRIL, 2025)

TASK 1 - Visualization of sentiment distribution using a stacked bar chart

1. To show the sentiment distribution (positive, neutral, negetive) of user reviews by rating groups (1-2 stars, 3-4 stars and 4-5 stars)
2. including only apps with more than 1,000 reviews
3. sort the data into top five groups

In [238]:
# filtering the apps with more than 1000 reviews
filtered_apps = apps_df[apps_df['Reviews'].astype(int)>1000]
filtered_apps

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Installs,Log_Reviews,Revenue,Year
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.700000,5000000,Free,0.0,Everyone,Art & Design,2018-08-01,1.2.4,4.0.3 and up,15.424948,11.379508,0.0,2018
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.000000,50000000,Free,0.0,Teen,Art & Design,2018-06-08,Varies with device,4.2 and up,17.727534,12.281384,0.0,2018
7,Infinite Painter,ART_AND_DESIGN,4.1,36815,29.000000,1000000,Free,0.0,Everyone,Art & Design,2018-06-14,6.1.61.1,4.2 and up,13.815511,10.513661,0.0,2018
8,Garden Coloring Book,ART_AND_DESIGN,4.4,13791,33.000000,1000000,Free,0.0,Everyone,Art & Design,2017-09-20,2.9.2,3.0 and up,13.815511,9.531771,0.0,2017
10,Text on Photo - Fonteee,ART_AND_DESIGN,4.4,13880,28.000000,1000000,Free,0.0,Everyone,Art & Design,2017-10-27,1.0.4,4.1 and up,13.815511,9.538204,0.0,2017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10809,Castle Clash: RPG War and Strategy FR,FAMILY,4.7,376223,24.000000,1000000,Free,0.0,Everyone,Strategy,2018-07-18,1.4.2,4.1 and up,13.815511,12.837937,0.0,2018
10815,Golden Dictionary (FR-AR),BOOKS_AND_REFERENCE,4.2,5775,4.900000,500000,Free,0.0,Everyone,Books & Reference,2018-07-19,7.0.4.6,4.2 and up,13.122363,8.661294,0.0,2018
10826,Frim: get new friends on local chat rooms,SOCIAL,4.0,88486,,5000000,Free,0.0,Mature 17+,Social,2018-03-23,Varies with device,Varies with device,15.424948,11.390600,0.0,2018
10832,FR Tides,WEATHER,3.8,1195,0.568359,100000,Free,0.0,Everyone,Weather,2014-02-16,6.0,2.1 and up,11.512925,7.085901,0.0,2014


In [239]:
# filtering the top 5 categories

top_5_category = filtered_apps['Category'].value_counts().nlargest(5).index
top_5_category

Index(['FAMILY', 'GAME', 'TOOLS', 'PHOTOGRAPHY', 'PRODUCTIVITY'], dtype='object', name='Category')

In [240]:
top5_filtered_cate = filtered_apps[filtered_apps['Category'].isin(top_5_category)]
top5_filtered_cate

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Installs,Log_Reviews,Revenue,Year
1653,ROBLOX,GAME,4.5,4447388,67.0,100000000,Free,0.0,Everyone 10+,Adventure;Action & Adventure,2018-07-31,2.347.225742,4.1 and up,18.420681,15.307828,0.0,2018
1654,Subway Surfers,GAME,4.5,27722264,76.0,1000000000,Free,0.0,Everyone 10+,Arcade,2018-07-12,1.90.0,4.1 and up,20.723266,17.137746,0.0,2018
1655,Candy Crush Saga,GAME,4.4,22426677,74.0,500000000,Free,0.0,Everyone,Casual,2018-07-05,1.129.0.2,4.1 and up,20.030119,16.925762,0.0,2018
1656,Solitaire,GAME,4.7,254258,23.0,10000000,Free,0.0,Everyone,Card,2018-08-01,2.137.0,4.1 and up,16.118096,12.446105,0.0,2018
1657,Bubble Shooter,GAME,4.5,148897,46.0,10000000,Free,0.0,Everyone,Casual,2018-07-17,1.20.1,4.0.3 and up,16.118096,11.911010,0.0,2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10795,Reindeer VPN - Proxy VPN,TOOLS,4.2,7339,4.0,100000,Free,0.0,Everyone,Tools,2018-05-10,1.74,4.1 and up,11.512925,8.900958,0.0,2018
10796,Inf VPN - Global Proxy & Unlimited Free WIFI VPN,TOOLS,4.7,61445,7.8,1000000,Free,0.0,Everyone,Tools,2018-07-26,1.9.734,4.1 and up,13.815511,11.025898,0.0,2018
10803,Fatal Raid - No.1 Mobile FPS,GAME,4.3,56496,81.0,1000000,Free,0.0,Teen,Action,2018-08-07,1.5.447,4.0 and up,13.815511,10.941925,0.0,2018
10804,Poker Pro.Fr,GAME,4.2,5442,17.0,100000,Free,0.0,Teen,Card,2018-05-22,4.1.3,2.3 and up,11.512925,8.601902,0.0,2018


In [241]:
# Combining the top 5 filtered categories and review data

merged_df = pd.merge(top5_filtered_cate,reviews_df,on = 'App')
merged_df

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,...,Android Ver,Log_Installs,Log_Reviews,Revenue,Year,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity,Sentiment_Score
0,Candy Crush Saga,GAME,4.4,22426677,74.0,500000000,Free,0.0,Everyone,Casual,...,4.1 and up,20.030119,16.925762,0.0,2018,"If get free lives refill, continue accumulate ...",Positive,0.374411,0.556987,0.9623
1,Candy Crush Saga,GAME,4.4,22426677,74.0,500000000,Free,0.0,Everyone,Casual,...,4.1 and up,20.030119,16.925762,0.0,2018,My original rating 01/2015 5 Stars still holdi...,Positive,0.250000,0.475000,0.9039
2,Candy Crush Saga,GAME,4.4,22426677,74.0,500000000,Free,0.0,Everyone,Casual,...,4.1 and up,20.030119,16.925762,0.0,2018,"This good time passing game. However, I like l...",Positive,0.200926,0.437963,0.9325
3,Candy Crush Saga,GAME,4.4,22426677,74.0,500000000,Free,0.0,Everyone,Casual,...,4.1 and up,20.030119,16.925762,0.0,2018,"Fun first, spending two weeks level makes want...",Positive,0.183333,0.296825,0.8885
4,Candy Crush Saga,GAME,4.4,22426677,74.0,500000000,Free,0.0,Everyone,Casual,...,4.1 and up,20.030119,16.925762,0.0,2018,Please get rid amount pop ups love things holy...,Positive,0.319444,0.600000,0.9835
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28979,Fun Kid Racing - Motocross,FAMILY,4.1,59768,,10000000,Free,0.0,Everyone,Racing;Action & Adventure,...,4.2 and up,16.118096,10.998226,0.0,2018,love good,Positive,0.600000,0.600000,0.7964
28980,Fun Kid Racing - Motocross,FAMILY,4.1,59768,,10000000,Free,0.0,Everyone,Racing;Action & Adventure,...,4.2 and up,16.118096,10.998226,0.0,2018,Great,Positive,0.800000,0.750000,0.6249
28981,Fun Kid Racing - Motocross,FAMILY,4.1,59768,,10000000,Free,0.0,Everyone,Racing;Action & Adventure,...,4.2 and up,16.118096,10.998226,0.0,2018,It's good,Positive,0.700000,0.600000,0.4404
28982,Fun Kid Racing - Motocross,FAMILY,4.1,59768,,10000000,Free,0.0,Everyone,Racing;Action & Adventure,...,4.2 and up,16.118096,10.998226,0.0,2018,I like motorcycle,Neutral,0.000000,0.000000,0.3612


In [242]:
# segementing ratings into ratings rating groups
def rating_groups(rating):
    if 1<= rating <=2:
        return "1-2 stars"
    elif 3<= rating <= 4:
        return "3-4 stars"
    elif 4< rating <= 5:
        return "4-5 stars"
merged_df['Rating groups'] = merged_df['Rating'].apply(rating_groups)

In [243]:
# count of ratings for the each rating group
rating_group_count = merged_df['Rating groups'].value_counts()

In [244]:
# grouping the data for stacked bar chart
sentiment = (merged_df.groupby(['Category','Rating groups','Sentiment'])['Sentiment'].count().unstack(fill_value=0).reset_index())
sentiment

Sentiment,Category,Rating groups,Negative,Neutral,Positive
0,FAMILY,3-4 stars,100,45,172
1,FAMILY,4-5 stars,1338,364,3334
2,GAME,3-4 stars,78,14,124
3,GAME,4-5 stars,6446,725,9799
4,PHOTOGRAPHY,3-4 stars,41,17,80
5,PHOTOGRAPHY,4-5 stars,422,242,1532
6,PRODUCTIVITY,3-4 stars,21,14,40
7,PRODUCTIVITY,4-5 stars,410,296,1536
8,TOOLS,3-4 stars,58,76,137
9,TOOLS,4-5 stars,289,281,953


In [245]:
# sentiment_melt() - used to reshape the dataframe from a  wide format to a long format

sentiment_melt = pd.melt(sentiment,
                         id_vars=['Category','Rating groups'],
                                value_vars=['Positive','Neutral','Negative'],
                                var_name='Sentiment',value_name='Count')

In [246]:
# creating the stacked bar chart
fig = px.bar(sentiment_melt,
             x='Rating groups',
             y='Count',
             color='Sentiment',
             barmode="stack",
             title="Sentiment Distribution by Rating Groups (Top 5 Categories)",
             color_discrete_map={'Positive':'green','Neutral':'gray','Negative':'red'}
            )

# customizing the chart layout
fig.update_layout(
    plot_bgcolor='black',
    paper_bgcolor="black",
    font_color="white",
    title_font={'size':13}
)

# saving the chart as an HTML file
fig.write_html("sentiment_distribution_Task_1.html") 

# importing the webbrowser to open the graph
import webbrowser
webbrowser.open("sentiment_distribution_Task_1.html")


True

TASK 2 - comparing the average rating and total review count by Grouped Bar Chart

1. To compare the average rating and total review by grouped bar chart for the top 10 app categories
2. Removing the categories where the average rating is below 4.0 and size below 10 M
3. last updated should be in january and
4. The graph should be work only between 3PM IST to 5PM IST 

In [248]:
#  Adding extra column named 'month' to extract (january - 1st month) from 'Last Updated column'
apps_df['Month']=apps_df['Last Updated'].dt.month
apps_df['Month']

0        1
1        1
2        8
3        6
4        6
        ..
10834    6
10836    7
10837    7
10839    1
10840    7
Name: Month, Length: 8892, dtype: int32

In [249]:
# Removing the ratings which are more than 4.0 and size with greater than 10 MB
filtered_apps = apps_df[(apps_df['Rating']>4.0) &(apps_df['Size']>10.0)&(apps_df['Month']==1)]
filtered_apps

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Installs,Log_Reviews,Revenue,Year,Month
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,10000,Free,0.0,Everyone,Art & Design,2018-01-07,1.0.0,4.0.3 and up,9.210340,5.068904,0.0,2018,1
33,Easy Origami Ideas,ART_AND_DESIGN,4.2,1015,11.0,100000,Free,0.0,Everyone,Art & Design,2018-01-06,1.1.0,4.1 and up,11.512925,6.922644,0.0,2018,1
44,Popsicle Sticks and Similar DIY Craft Ideas,ART_AND_DESIGN,4.2,26,12.0,10000,Free,0.0,Everyone,Art & Design,2018-01-03,1.0.0,4.1 and up,9.210340,3.258097,0.0,2018,1
446,Video Caller Id,COMMUNICATION,4.2,15287,17.0,1000000,Free,0.0,Everyone,Communication,2018-01-24,2.2.245,4.0.3 and up,13.815511,9.634758,0.0,2018,1
719,Monster Truck Driver & Racing,EDUCATION,4.4,748,51.0,1000000,Free,0.0,Everyone,Education;Action & Adventure,2017-01-19,1.0.9,2.3 and up,13.815511,6.617403,0.0,2017,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10649,How it Works: FN SCAR assault rifle,FAMILY,4.6,44,45.0,10000,Free,0.0,Everyone,Casual,2018-01-09,2.1.9f7,4.0.3 and up,9.210340,3.784190,0.0,2018,1
10683,Hunting Safari 3D,SPORTS,4.2,36183,20.0,5000000,Free,0.0,Teen,Sports,2018-01-20,1.4,2.1 and up,15.424948,10.496345,0.0,2018,1
10686,Armed Cam Gun Pack,GAME,4.2,1012,50.0,10000,Free,0.0,Teen,Action,2015-01-18,1.0.2,3.0 and up,9.210340,6.919684,0.0,2015,1
10767,NFP 2018,EVENTS,4.8,8,16.0,500,Free,0.0,Everyone,Events,2018-01-09,1.0.3,4.2 and up,6.214608,2.079442,0.0,2018,1


In [250]:
# filtering the top 10 categories from 'filtered_apps'
top_cate = (filtered_apps.groupby('Category')['Installs'].sum().nlargest(10).index)
top_cate

Index(['FAMILY', 'SPORTS', 'GAME', 'ENTERTAINMENT', 'PERSONALIZATION',
       'PHOTOGRAPHY', 'EDUCATION', 'TOOLS', 'TRAVEL_AND_LOCAL',
       'COMMUNICATION'],
      dtype='object', name='Category')

In [251]:
# DataFrame with top 10 app categories  
filtered_top_cate =filtered_apps[filtered_apps['Category'].isin(top_cate)]
filtered_top_cate

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Installs,Log_Reviews,Revenue,Year,Month
446,Video Caller Id,COMMUNICATION,4.2,15287,17.0,1000000,Free,0.0,Everyone,Communication,2018-01-24,2.2.245,4.0.3 and up,13.815511,9.634758,0.0,2018,1
719,Monster Truck Driver & Racing,EDUCATION,4.4,748,51.0,1000000,Free,0.0,Everyone,Education;Action & Adventure,2017-01-19,1.0.9,2.3 and up,13.815511,6.617403,0.0,2017,1
748,Memorado - Brain Games,EDUCATION,4.4,56897,97.0,1000000,Free,0.0,Everyone,Education;Brain Games,2017-01-16,1.10.0,4.1 and up,13.815511,10.948998,0.0,2017,1
917,Nick,ENTERTAINMENT,4.2,123279,25.0,10000000,Free,0.0,Everyone 10+,Entertainment;Music & Video,2018-01-24,2.0.8,4.4 and up,16.118096,11.722205,0.0,2018,1
945,WWE,ENTERTAINMENT,4.5,736864,20.0,10000000,Free,0.0,Teen,Entertainment,2018-01-19,3.17.2,4.1 and up,16.118096,13.510159,0.0,2018,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10552,FK Crvena zvezda,SPORTS,4.9,1211,15.0,10000,Free,0.0,Everyone,Sports,2018-01-22,1.0.1,4.4 and up,9.210340,7.099202,0.0,2018,1
10649,How it Works: FN SCAR assault rifle,FAMILY,4.6,44,45.0,10000,Free,0.0,Everyone,Casual,2018-01-09,2.1.9f7,4.0.3 and up,9.210340,3.784190,0.0,2018,1
10683,Hunting Safari 3D,SPORTS,4.2,36183,20.0,5000000,Free,0.0,Teen,Sports,2018-01-20,1.4,2.1 and up,15.424948,10.496345,0.0,2018,1
10686,Armed Cam Gun Pack,GAME,4.2,1012,50.0,10000,Free,0.0,Teen,Action,2015-01-18,1.0.2,3.0 and up,9.210340,6.919684,0.0,2015,1


In [252]:
# Calculating average rating and total review count for each category
Rat_Rev_compared_df = (filtered_top_cate.groupby('Category')[['Rating','Reviews']].agg({'Rating':'mean',
                                                                                       'Reviews':'sum'}).reset_index())
Rat_Rev_compared_df

Unnamed: 0,Category,Rating,Reviews
0,COMMUNICATION,4.2,15287
1,EDUCATION,4.4,57645
2,ENTERTAINMENT,4.3,869111
3,FAMILY,4.414286,4524786
4,GAME,4.348148,2157275
5,PERSONALIZATION,4.475,155996
6,PHOTOGRAPHY,4.3,542561
7,SPORTS,4.342857,1982017
8,TOOLS,4.2,8010
9,TRAVEL_AND_LOCAL,4.1,974


In [253]:
from datetime import datetime
import pytz
import plotly.graph_objects as go

In [254]:
# ensuring the chart is displayed only between 3PM and 5PM IST

# Sets the Indian Standard Time (IST) using pytz library
ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist).time()


start_time = datetime.strptime("15:00:00","%H:%M:%S").time()
end_time = datetime.strptime("17:00:00","%H:%M:%S").time()

In [256]:
# applying (if condition) to enable graph to display between 3PM and 5PM IST

if start_time <= current_time <= end_time:

    fig = go.Figure()

# Adding bar trace for average rating
    fig.add_trace(
        go.Bar(
            x=Rat_Rev_compared_df['Category'],
            y=Rat_Rev_compared_df['Rating'],
            name = 'Average Rating',
            text = Rat_Rev_compared_df['Rating'],
            textposition = 'auto',
            marker_color = 'purple',
            opacity = 0.7
        )
    )

    
# Adding bar trace for total review counts
    fig.add_trace(
        go.Bar(
            x=Rat_Rev_compared_df['Category'],
            y=Rat_Rev_compared_df['Reviews'],
            name = 'Total Review Count',
            text = Rat_Rev_compared_df['Rating'],
            textposition = 'auto',
            marker_color = 'orange',
            opacity = 0.7
         )
    )
    
    
# Customizing the chart layout
    fig.update_layout(
        title = "Comparision of Average Rating and Total Review Counts (Top 10 Categories by Installs)",
        xaxis = dict(title="App Category", tickangle = -45),
        title_font={'size':12},
        yaxis = dict(title="Average Rating"),
        yaxis2 = dict(
            title = " Total Review Count ",
            overlaying = 'y',
            side = 'right'
        ),
        legend = dict(x=0, y=1.1),
        barmode = "group"
    )

    
# saving the chart as HTML file       
    fig.write_html("grouped_bar_chart.html_Task_2.html")
    

# opening the chart in the webbrowser
    webbrowser.open("grouped_bar_chart.html_Task_2.html")
else:
    print("graph is not available outside the time range(3PM - 5PM IST).")


graph is not available outside the time range(3PM - 5PM IST).


TASK 3 - Bubble chart visualization

1. Using a bubble chart to show the relation between app size (in MB) and average rating by number of installs
2. apps only with
   - having rating higher than 3.5,
   - belong to the category (Game, Beauty, Business, commics, communication, Dating, Entertainment, social and event categories)
   - Reviews should be more than 500
   - Sentiment subjectivity should be more than 0.5 and 
   - with more than 50k installs
3. The graph should be work only between 5PM IST to 7PM IST 

In [257]:
# Step 1 : Merging the column (sentiment_subjectivity) with apps dataframe

merged_df = apps_df.merge(reviews_df[['App','Sentiment_Subjectivity']],on='App',how = 'inner')
merged_df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Installs,Log_Reviews,Revenue,Year,Month,Sentiment_Subjectivity
0,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,2018-01-15,2.0.0,4.0.3 and up,13.122363,6.874198,0.0,2018,1,1.0
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,2018-01-15,2.0.0,4.0.3 and up,13.122363,6.874198,0.0,2018,1,0.833333
2,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,2018-01-15,2.0.0,4.0.3 and up,13.122363,6.874198,0.0,2018,1,0.0
3,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,2018-01-15,2.0.0,4.0.3 and up,13.122363,6.874198,0.0,2018,1,0.6
4,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,2018-01-15,2.0.0,4.0.3 and up,13.122363,6.874198,0.0,2018,1,0.9


In [258]:
# Valid categories
category = ['GAME','BEAUTY','BUSINESS','COMMICS','COMMUNICATION','DATING','ENTERTAINMENT','SOCIAL','EVENT']

# filtering the data
filtered_data = merged_df[
                         (merged_df['Rating']>3.5)&
                         (merged_df['Category'].isin(category))&
                         (merged_df['Reviews']>500)&
                         (merged_df['Sentiment_Subjectivity']>0.5)&
                         (merged_df['Installs']>50000)
                         ]

filtered_data

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Installs,Log_Reviews,Revenue,Year,Month,Sentiment_Subjectivity
671,"BestCam Selfie-selfie, beauty camera, photo ed...",BEAUTY,3.9,1739,21.0,500000,Free,0.0,Everyone,Beauty,2018-07-12,1.0.6,4.0.3 and up,13.122363,7.461066,0.0,2018,7,0.950000
672,"BestCam Selfie-selfie, beauty camera, photo ed...",BEAUTY,3.9,1739,21.0,500000,Free,0.0,Everyone,Beauty,2018-07-12,1.0.6,4.0.3 and up,13.122363,7.461066,0.0,2018,7,1.000000
673,"BestCam Selfie-selfie, beauty camera, photo ed...",BEAUTY,3.9,1739,21.0,500000,Free,0.0,Everyone,Beauty,2018-07-12,1.0.6,4.0.3 and up,13.122363,7.461066,0.0,2018,7,1.000000
674,"BestCam Selfie-selfie, beauty camera, photo ed...",BEAUTY,3.9,1739,21.0,500000,Free,0.0,Everyone,Beauty,2018-07-12,1.0.6,4.0.3 and up,13.122363,7.461066,0.0,2018,7,0.833333
675,"BestCam Selfie-selfie, beauty camera, photo ed...",BEAUTY,3.9,1739,21.0,500000,Free,0.0,Everyone,Beauty,2018-07-12,1.0.6,4.0.3 and up,13.122363,7.461066,0.0,2018,7,0.650000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59114,Firefox Focus: The privacy browser,COMMUNICATION,4.4,36981,4.0,1000000,Free,0.0,Everyone,Communication,2018-07-06,5.2,5.0 and up,13.815511,10.518160,0.0,2018,7,0.639286
59115,Firefox Focus: The privacy browser,COMMUNICATION,4.4,36981,4.0,1000000,Free,0.0,Everyone,Communication,2018-07-06,5.2,5.0 and up,13.815511,10.518160,0.0,2018,7,0.505398
59116,Firefox Focus: The privacy browser,COMMUNICATION,4.4,36981,4.0,1000000,Free,0.0,Everyone,Communication,2018-07-06,5.2,5.0 and up,13.815511,10.518160,0.0,2018,7,0.680000
59117,Firefox Focus: The privacy browser,COMMUNICATION,4.4,36981,4.0,1000000,Free,0.0,Everyone,Communication,2018-07-06,5.2,5.0 and up,13.815511,10.518160,0.0,2018,7,0.562626


In [259]:
from datetime import datetime
import pytz
import plotly.graph_objects as go

In [260]:
# Ensuring that graph is displayed between (5PM - 7PM) 

ist = pytz.timezone('Asia/Kolkata')   
current_time = datetime.now(ist).time()


start_time = datetime.strptime("17:00:00","%H:%M:%S").time()
end_time = datetime.strptime("19:00:00","%H:%M:%S").time()

In [261]:
if start_time <= current_time <= end_time:
    
 # Creating the Bubble Chart
    fig = px.scatter(
        filtered_data,
        x='Size',
        y='Rating',
        size='Installs',
        color='Installs',
        hover_name='App',
        title="Bubble Chart: Relationship between App Size and Average Ratings",
        labels={'Size': 'App Size (MB)', 'Rating': 'Average Rating', 'Installs': 'Number of Installs'}
    )

 # Updating the layout for better visualization
    fig.update_layout(
        xaxis_title="App Size (in MB)",
        yaxis_title="Average Rating",
        title_font={'size':15},
        coloraxis_colorbar=dict(title="Installs"),
        template="plotly_white"
    )
    
 # Saving the chart as an HTML file
    fig.write_html("bubble_chart_Task3.html")
    
 # Opening the chart in a web browser
    webbrowser.open("bubble_chart_Task3.html")
else:
    print("Bubble chart is not available outside the time range (5 PM - 7 PM IST).")
    

In [262]:
 from datetime import datetime
 import pytz

# Define plot size
 plot_width = 600
 plot_height = 400

# Filenames of the training graphs 
 regular_plot_files = [
     "category Graph 1.html",
     "Type Graph 2.html",
     "Rating Graph 3.html",
     "Sentiment Graph 4.html",
     "Installs Graph 5.html",
     "Updates Graph 6.html",
     "Revenue Graph 7.html",
     "Genre Graph 8.html",
     "Update Graph 9.html",
     "Paid free Graph 10.html"
 ]
 

 # Time-restricted graphs

time_restricted_plots = [
    
     {
             "file":"sentiment_distribution_Task_1.html",
             "start": "00:00:00",
             "end": "23:59:59"  
     },
     {
              "file":"grouped_bar_chart.html_Task_2.html",
              "start": "15:00:00",
              "end": "17:00:00"  
     },
     {
              "file":"bubble_chart_Task3.html",
              "start": "17:00:00",
              "end": "19:00:00"  
     }
 ]


# function to create plot container HTML
def create_plot_container(filename):
    return f'''
    <div class="plot-container" onclick="openPlot('{filename}')">
        <iframe src="{filename}" width="{plot_width}" height="{plot_height}"></iframe>
    </div>
    '''

# Generating plots HTML
plots_html = ""
for plot_file in regular_plot_files:
    plots_html += create_plot_container(plot_file)
    

# Time zone and current time
ist = pytz.timezone('Asia/Kolkata')
current_time = datetime.now(ist).time()


# Adding time-restricted plots based on current time
for plot in time_restricted_plots:
    start_time = datetime.strptime(plot["start"], "%H:%M:%S").time()
    end_time = datetime.strptime(plot["end"], "%H:%M:%S").time()
    if start_time <= current_time <= end_time:
        plots_html += create_plot_container(plot["file"])
        

# Final dashboard HTML
dashboard_html = f"""
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Google Play Store Review Analytics</title>
    <style>
        body {{
            font-family: Arial, sans-serif;
            background-color: #333;
            color: #fff;
            margin: 0;
            padding: 0;
        }}
        .header {{
            display: flex;
            align-items: center;
            justify-content: center;
            padding: 20px;
            background-color: #444;
        }}

        .header img {{
            margin: 0 10px;
            height: 50px;
        }}
        .container {{
            display: flex;
            flex-wrap: wrap;
            justify-content: center;
            padding: 20px;
        }}
        .plot-container {{
            border: 2px solid #555;
            margin: 10px;
            padding: 10px;
            width: {plot_width}px;
            height: {plot_height}px;
            overflow: hidden;
            position: relative;
            cursor: pointer;
        }}
        .insights {{
            display: none;
            position: absolute;
            right: 10px;
            top: 10px;
            background-color: rgba(0,0,0,0.7);
            padding: 5px;
            border-radius: 5px;
            color: #fff;
        }}
        .plot-container:hover .insights {{
            display: block;
        }}
    </style>
    <script>
        function openPlot(filename) {{
            window.open(filename, '_blank');
        }}
    </script>
</head>
<body>
    <div class="header">
        <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/4/4a/Logo_2013_Google.png/800px-Logo_2013_Google.png" alt="Google Logo">
        <h1>Google Play Store Reviews Analytics</h1>
        <img src="https://upload.wikimedia.org/wikipedia/commons/thumb/7/78/Google_Play_Store_badge_EN.svg/1024px-Google_Play_Store_badge_EN.svg.png" alt="Google Play Store Logo">
    </div>
    <div class="container">
        {plots_html}
    </div>
</body>
</html>
"""



In [263]:
import os
import webbrowser

dashboard_path = os.path.join(os.getcwd(), "final_dashboard.html")

with open(dashboard_path, "w", encoding="utf-8") as f:
    f.write(dashboard_html)

# Opening the dashboard in a browser
webbrowser.open('file://'+os.path.realpath(dashboard_path))

True