In [56]:
import pandas as pd
import numpy as np
import plotly.express as px  # usually imported as px, not pio
import plotly.io as pio
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import webbrowser
import os

In [57]:
apps_df = pd.read_csv("/content/drive/MyDrive/googleplaystoreDataset/googleplaystore.csv")
reviews_df = pd.read_csv("/content/drive/MyDrive/googleplaystoreDataset/googleplaystore_user_reviews.csv")

In [58]:
apps_df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [59]:
reviews_df.head()

Unnamed: 0,App,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.0,0.533333
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.25,0.288462
2,10 Best Foods for You,,,,
3,10 Best Foods for You,Works great especially going grocery store,Positive,0.4,0.875
4,10 Best Foods for You,Best idea us,Positive,1.0,0.3


Data Cleaning

In [60]:
apps_df.columns

Index(['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type',
       'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver',
       'Android Ver'],
      dtype='object')

In [61]:
reviews_df.columns

Index(['App', 'Translated_Review', 'Sentiment', 'Sentiment_Polarity',
       'Sentiment_Subjectivity'],
      dtype='object')

In [62]:
import warnings

# Ignore all warnings
warnings.filterwarnings('ignore')

In [63]:
# Dropping rows with missing values in the 'Rating' column
apps_df = apps_df.dropna(subset=['Rating'])

# Filling missing values in each column with the most frequent value (mode)
for column in apps_df.columns:
    apps_df[column].fillna(apps_df[column].mode()[0], inplace=True)

# Removing duplicate rows
apps_df.drop_duplicates(inplace=True)

# Filtering the DataFrame to include only rows where the 'Rating' is less than or equal to 5
apps_df = apps_df[apps_df['Rating'] <= 5]

# Dropping rows with missing values in the 'Translated_Review' column from reviews_df
reviews_df.dropna(subset=['Translated_Review'], inplace=True)

In [64]:
# conver the installs column to numerric by removing , and +
apps_df['Installs'] = apps_df['Installs'].str.replace(',','').str.replace('+','').astype(int)
# convert price column to numeric aafter removig the $
apps_df['Price']=apps_df['Price'].str.replace('$','').astype(float)

In [65]:
apps_df.dtypes

Unnamed: 0,0
App,object
Category,object
Rating,float64
Reviews,object
Size,object
Installs,int64
Type,object
Price,float64
Content Rating,object
Genres,object


In [66]:
merged_df = pd.merge(apps_df, reviews_df, on='App', how='inner')

In [67]:
merged_df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity
0,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,A kid's excessive ads. The types ads allowed a...,Negative,-0.25,1.0
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,It bad >:(,Negative,-0.725,0.833333
2,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,like,Neutral,0.0,0.0
3,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,I love colors inspyering,Positive,0.5,0.6
4,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up,I hate,Negative,-0.8,0.9


In [68]:
apps_df.columns

Index(['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type',
       'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver',
       'Android Ver'],
      dtype='object')

In [69]:
def convert_size(size):
    if 'M' in size:
        return float(size.replace('M', ''))
    elif 'K' in size:
        return float(size.replace('K', '')) / 1024
    else:
        return np.nan

# Applying the conversion function to the 'Size' column
apps_df['Size'] = apps_df['Size'].apply(convert_size)

In [70]:
apps_df

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,10000,Free,0.0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,5000000,Free,0.0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,50000000,Free,0.0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,100000,Free,0.0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10834,FR Calculator,FAMILY,4.0,7,2.6,500,Free,0.0,Everyone,Education,"June 18, 2017",1.0.0,4.1 and up
10836,Sya9a Maroc - FR,FAMILY,4.5,38,53.0,5000,Free,0.0,Everyone,Education,"July 25, 2017",1.48,4.1 and up
10837,Fr. Mike Schmitz Audio Teachings,FAMILY,5.0,4,3.6,100,Free,0.0,Everyone,Education,"July 6, 2018",1.0,4.1 and up
10839,The SCP Foundation DB fr nn5n,BOOKS_AND_REFERENCE,4.5,114,,1000,Free,0.0,Mature 17+,Books & Reference,"January 19, 2015",Varies with device,Varies with device


In [71]:
apps_df.columns

Index(['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type',
       'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver',
       'Android Ver'],
      dtype='object')

In [72]:
#logrithimic
apps_df['Log_Installs']=np.log(apps_df['Installs'])

In [73]:
apps_df['Reviews']= apps_df['Reviews'].astype(int)

In [74]:
apps_df['Log_Reviews']=np.log(apps_df['Reviews'])

In [75]:
apps_df.dtypes

Unnamed: 0,0
App,object
Category,object
Rating,float64
Reviews,int64
Size,float64
Installs,int64
Type,object
Price,float64
Content Rating,object
Genres,object


In [76]:
def rating_group(rating):
    if rating >= 4:
        return 'Top Rated app'
    elif rating >= 3:
        return 'Above average'
    elif rating >= 2:
        return 'Average'
    else:
        return 'Below average'  # Changed this to reflect ratings below 2

# Applying the function to the 'Rating' column
apps_df['Rating_Group'] = apps_df['Rating'].apply(rating_group)


In [77]:
apps_df['Revenue']=apps_df['Price']*apps_df['Installs']

In [78]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [79]:
sia = SentimentIntensityAnalyzer()

In [80]:
review = "This appp is amazing! I love new features"
sentiment_score = sia.polarity_scores(review)
print(sentiment_score)

{'neg': 0.0, 'neu': 0.376, 'pos': 0.624, 'compound': 0.8516}


In [81]:
review = "This appp is very bad! I hate the new features"
sentiment_score = sia.polarity_scores(review)
print(sentiment_score)

{'neg': 0.535, 'neu': 0.465, 'pos': 0.0, 'compound': -0.8427}


In [82]:
review = "This app is okay"
sentiment_score = sia.polarity_scores(review)
print(sentiment_score)

{'neg': 0.0, 'neu': 0.612, 'pos': 0.388, 'compound': 0.2263}


In [83]:
# Assuming 'sia' is an instance of SentimentIntensityAnalyzer
reviews_df['Sentiment_Score'] = reviews_df['Translated_Review'].apply(lambda x: sia.polarity_scores(x)['compound'])

In [84]:
reviews_df.head()

Unnamed: 0,App,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity,Sentiment_Score
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.0,0.533333,0.9531
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.25,0.288462,0.6597
3,10 Best Foods for You,Works great especially going grocery store,Positive,0.4,0.875,0.6249
4,10 Best Foods for You,Best idea us,Positive,1.0,0.3,0.6369
5,10 Best Foods for You,Best way,Positive,1.0,0.3,0.6369


In [85]:
apps_df['Last Updated']=pd.to_datetime(apps_df['Last Updated'], errors='coerce')

In [86]:
#extract the year column
apps_df['Year']=apps_df['Last Updated'].dt.year

In [87]:
apps_df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver,Log_Installs,Log_Reviews,Rating_Group,Revenue,Year
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,10000,Free,0.0,Everyone,Art & Design,2018-01-07,1.0.0,4.0.3 and up,9.21034,5.068904,Top Rated app,0.0,2018
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,Everyone,Art & Design;Pretend Play,2018-01-15,2.0.0,4.0.3 and up,13.122363,6.874198,Above average,0.0,2018
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,5000000,Free,0.0,Everyone,Art & Design,2018-08-01,1.2.4,4.0.3 and up,15.424948,11.379508,Top Rated app,0.0,2018
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,50000000,Free,0.0,Teen,Art & Design,2018-06-08,Varies with device,4.2 and up,17.727534,12.281384,Top Rated app,0.0,2018
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,100000,Free,0.0,Everyone,Art & Design;Creativity,2018-06-20,1.1,4.4 and up,11.512925,6.874198,Top Rated app,0.0,2018


Plotly GRaph

In [88]:
# static visulization - fixed images, or plots, non interactive
# interactive visulization - hover the data, zoom, zoomout
# fig.write_htmml("Interactive_plot.html")

In [89]:
html_files_path="./"
if not os.path.exists(html_files_path):
  os.makedirs(html_files_path)

In [90]:
plot_containers=""

In [91]:
def save_plot_as_html(fig, filename, insights):
    global plot_containers

    # Define the file path
    file_path = os.path.join(html_files_path, filename)

    # Convert the figure to HTML
    html_content = pio.to_html(fig, full_html=False, include_plotlyjs='inline')

    # Append the container to the global plot_containers with correct string formatting
    plot_containers += f"""
    <div class="plot-container" id="{filename}" onclick="openplot('{filename}')">
        <div class="plot">{html_content}</div>
        <div class="insights">{insights}</div>
    </div>
    """

    # Save the plot as an HTML file
    fig.write_html(file_path, full_html=False, include_plotlyjs="inline")


In [92]:
# def save_plot_as_html(fig, filename,insight):
#   global plot_containers
#   file_path=os.path.join(html_files_path, filename)
#   html_content=pio.to_html(fig,full_html=False, include_plotlyjs='inline')
#   plot_containers +=f""<div class = "plot-conatiner" id="{filename}" onclick="openplot('{filename})">
#   <div class="plot">{html_content}</div> <div class="insights">{insights}</div></div>
#   fig.write_html(file_path, full_html=False,include_plotlyjs="inline")

In [93]:
plot_width = 400
plot_height=300
plot_bg_color='black'
text_color="white"
title_font={'size':16}
axis_font={'size':12}

In [94]:
# Assuming 'apps_df' is your DataFrame, and you're plotting category counts

# Get the top 10 categories
category_counts = apps_df['Category'].value_counts().nlargest(10)

# Create a bar plot with Plotly Express
fig1 = px.bar(
    x=category_counts.index,
    y=category_counts.values,
    labels={'x': "Category", 'y': 'Count'},
    title="Top Categories on Play Store",
    color=category_counts.index,
    color_discrete_sequence=px.colors.sequential.Plasma,
    width=400,
    height=300
)

# Update layout for aesthetics
fig1.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=30)
)

# Save the plot as an HTML file
save_plot_as_html(fig1, "Category Graph 1.html", "The top categories on the Play Store are dominated by tools, entertainment, and productivity apps.")


Pie Chart

In [95]:
#fig2
# Assuming 'apps_df' is your DataFrame, and you're plotting category counts

# Get the top 10 categories
type_counts = apps_df['Type'].value_counts()

# Create a bar plot with Plotly Express
fig2 = px.pie(
    values=type_counts.values,
    names=type_counts.index,
    title="App Type Distribution",
    color_discrete_sequence=px.colors.sequential.RdBu,
    width=400,
    height=300
)

# Update layout for aesthetics
fig2.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    margin=dict(l=10, r=10, t=30, b=30)
)

# Save the plot as an HTML file
save_plot_as_html(fig2, "Type Graph 2.html", "Most apps on the play store indicating a strategy to attract users first and monitize through adsor in app purchases")


Histogram chart Rating distribution

In [96]:
# Get the counts of each type (e.g., Free, Paid)
type_counts = apps_df['Type'].value_counts()

# Create a pie chart using Plotly Express
fig3 = px.histogram(
    apps_df,
    x = 'Rating',
    nbins=20,
    title="Rating Distribution",
    color_discrete_sequence=['#636EFA'],
    width=400,
    height=300
)

# Update layout for aesthetics
fig3.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=30)
)

# Save the plot as an HTML file
save_plot_as_html(fig3, "Rating Graph 3.html", "Rating are skewed towards higher values, suggesting that most apps are rated favorably by users")


Bar chart of reviews dataset

In [97]:
# Assuming 'apps_df' is your DataFrame, and you're plotting category counts

# Get the top 10 categories
sentiment_counts = reviews_df['Sentiment_Score'].value_counts().nlargest(10)

# Create a bar plot with Plotly Express
fig4 = px.bar(
    x=sentiment_counts.index,
    y=sentiment_counts.values,
    labels={'x': 'Sentiment_Score', 'y': 'Count'},
    title="Sentiment Distribution",
    color=sentiment_counts.index,
    color_discrete_sequence=px.colors.sequential.RdPu,
    width=400,
    height=300
)

# Update layout for aesthetics
fig4.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=30)
)

# Save the plot as an HTML file
save_plot_as_html(fig4, "sentiment Graph 4.html", "sentiment in reviews show a mix of positive and negative  feedback with a sight lean towards positive")


Intsalls columns

In [98]:
# Assuming 'apps_df' is your DataFrame, and you're plotting category counts

# Get the top 10 categories
installs_by_category = apps_df.groupby('Category')['Installs'].sum().nlargest(10)

# Create a bar plot with Plotly Express
fig5 = px.bar(
    x=installs_by_category.index,
    y=installs_by_category.values,
    orientation='h',
    labels={'x': 'Installs', 'y': 'Category'},
    title="Installs by Category",
    color=installs_by_category.index,
    color_discrete_sequence=px.colors.sequential.Blues,
    width=400,
    height=300
)

# Update layout for aesthetics
fig5.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=30)
)

# Save the plot as an HTML file
save_plot_as_html(fig5, "Installs Graph 5.html", "The categories with the most installs are social and communication apps reflecting thier broad appeal and dailhy usage")


In [99]:
# Convert 'Last Updated' column to datetime if it's not already
apps_df['Last Updated'] = pd.to_datetime(apps_df['Last Updated'], errors='coerce')

# Extract the year from the 'Last Updated' column
apps_df['Last Updated Year'] = apps_df['Last Updated'].dt.year

# Group by the 'Last Updated Year' and count the number of updates per year
update_per_year = apps_df['Last Updated Year'].value_counts().sort_index()

# Create a line plot with Plotly Express
fig6 = px.line(
    x=update_per_year.index,
    y=update_per_year.values,
    labels={'x': 'Year', 'y': 'Number of Updates'},
    title="Number of Updates over the Years",
    color_discrete_sequence=['#AB63FA'],
    width=400,
    height=300
)

# Update layout for aesthetics
fig6.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=30)
)

# Save the plot as an HTML file
save_plot_as_html(fig6, "Updates Graph 6.html", "Updates have been increasing over the years, showing that developers are actively maintaining and improving their apps.")


In [100]:
# Get the top 10 categories by revenue
revenue_by_category = apps_df.groupby('Category')['Revenue'].sum().nlargest(10)

# Create a bar plot with Plotly Express
fig7 = px.bar(
    x=revenue_by_category.index,
    y=revenue_by_category.values,
    labels={'x': 'Category', 'y': 'Revenue'},
    title="Revenue by Category",
    color=revenue_by_category.index,
    color_discrete_sequence=px.colors.sequential.Greens,
    width=400,
    height=300
)

# Update layout for aesthetics
fig7.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=30)
)

# Save the plot as an HTML file
save_plot_as_html(fig7, "Revenue Graph 7.html", "Categories such as Business and Productivity lead in revenue generation, indicating their monetization potential.")


Scatter plot

In [101]:
# Fig 8
# Get the top 10 categories by revenue
genre_counts = apps_df['Genres'].str.split(';',expand=True).stack().value_counts().nlargest(10)

# Create a bar plot with Plotly Express
fig8 = px.bar(
    x=genre_counts.index,
    y=genre_counts.values,
    labels={'x': 'Genre', 'y': 'Count'},
    title="Top Genres",
    color=genre_counts.index,
    color_discrete_sequence=px.colors.sequential.OrRd,
    width=400,
    height=300
)

# Update layout for aesthetics
fig8.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=30)
)

# Save the plot as an HTML file
save_plot_as_html(fig8, "Genres Graph 8.html", "ACtion and casual genres are the most common refelecting users preferance for enganing and eassy to  play games")


In [102]:
# Create a scatter plot with Plotly Express
fig9 = px.scatter(
    apps_df,
    x='Last Updated',  # Corrected the column name
    y='Rating',
    color='Type',
    title='Impact of Last Update on Rating',
    color_discrete_sequence=px.colors.qualitative.Vivid,
    width=400,
    height=300
)

# Update layout for aesthetics
fig9.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=30)
)

# Save the plot as an HTML file
save_plot_as_html(fig9, "Update Graph 9.html", "The scatter plot shows a weak correlation between the last update and ratings, suggesting that more frequent updates don't always result in better ratings.")


Boxplot

In [103]:
# Create a scatter plot with Plotly Express
fig10 = px.box(
    apps_df,
    x='Type',  # Corrected the column name
    y='Rating',
    color='Type',
    title='Rating for Paid vs free  App',
    color_discrete_sequence=px.colors.qualitative.Pastel,
    width=400,
    height=300
)

# Update layout for aesthetics
fig10.update_layout(
    plot_bgcolor='black',
    paper_bgcolor='black',
    font_color='white',
    title_font={'size': 16},
    xaxis=dict(title_font={'size': 12}),
    yaxis=dict(title_font={'size': 12}),
    margin=dict(l=10, r=10, t=30, b=30)
)

# Save the plot as an HTML file
save_plot_as_html(fig10, "Paid Free Graph 10.html", "Paid apps generally have higher ratings compared to free appssuggesting that users aspect higher quality from apps they pay for.")


In [104]:
plot_containers_split=plot_containers.split('/<div>')

In [105]:
if len(plot_containers_split) > 1:
  final_plot=plot_containers_split[-2]+'</div>'
else:
  final_plot=plot_containers

Creating Dashbord

In [121]:
dashboard_html = """
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset ="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <titile>Google Play Store Review Analytics</title>
  <style>
  body {{
    font-family: Arial, sans-serif;
    background-color:#333;
    color: #fff;
    margin: 0;
    padding: 0;
  }}
  .header {{
    display:flex;
    align_items:center;
    justify-content:center;
    padding: 20px;
    background-color:#444
  }}
  .header img {{
    margin:0 10px;
    height:50px;
  }}
  .container {{
    display:flex;
    flex-wrap:wrap;
    justify_content:center;
    padding:20px;
  }}
  .plot-container {{
    border: 2px solid #555;
    margin: 10px;
    padding:10px;
    width: {plot_width}px;
    height: {plot_height}px;
    overflow: hidden;
    position: relative;
    cursor: pointer;
  }}
  .insights {{
    display : none;
    position: absolute;
    right: 10px;
    top: 10px;
    background-color: rgba(0,0,0,0.7);
    padding: 5px;
    border-radius: 5px;
    color: #fff;
  }}
  .plot-container:hover .insights {{
    display : block;
  }}
  </style>
  <script>
  function openPlot(filename) {{
    window.open(filename, '_blank');
  }}
  </script>
  </head>
  <body>
  <div class = "header">
   <img src="https://upload.wikimedia.org/wikipedia/commons/4/4a/Logo_2013_Google.png" alt="Google Logo">
    <h1>Google Play Store Review Analytics</h1>
    <img src="https://upload.wikimedia.org/wikipedia/commons/7/78/Google_Play_Store_badge_EN.svg" alt="Google Play Store Badge">
    </div>
    <div class="container">
    {plots}
    </div>
    </body>
    </html>
    """

In [122]:
final_html = dashboard_html.format(plots=plot_containers,plot_width=plot_width,plot_height=plot_height)


In [123]:
dashboard_path = os.path.join(html_files_path, "web page.html")

In [124]:
with open(dashboard_path, "w", encoding="utf-8") as f:
  f.write(final_html)

In [125]:
webbrowser.open('file://'+os.path.realpath(dashboard_path))

False