### In this notebook, I created a race chart to show browsers usage from 2020 to 2022.
### Summary of this notebook
- #### I have scraped the data from https://www.w3schools.com/browsers/default.asp 
- #### Transformed the data to the required format to plot the graph
- #### Used plotly for interactive data visualization

# Output

In [None]:
fig = px.bar(final_df_copy, x="Percentage", y="Browser", color="Browser",
  animation_frame="Month_year", range_x=[0,100],
            title="Browsers usage % from 2020 to 2022")
fig.show()

# Importing the required libraries

In [None]:
# Importing Data Visualization libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Web scraping libraries
from bs4 import BeautifulSoup as soup
from requests import get
import requests
import re

# To print multiple outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
title = []
for i in range(1): # Only 1 page to scrape
    url = 'https://www.w3schools.com/browsers/default.asp'
    r = requests.get(url)
    page_html = soup(r.text, 'html.parser') 
    table = page_html.find_all("div",class_="w3-responsive")
    for data in table:
        title_= data.find_all("tr")
        for i in title_:
            d = i.text
            title.append(d)
tr = []
for i in title:
    if i.find('2019') == 1:
        break
    tr.append(i.split('\n')[1:-1])
tr

### Creating a DataFrame 

In [None]:
data = pd.DataFrame(tr)

# Dividing it into separate dataframes based on year

data_2022 = data.iloc[:11,:]
data_2021 = data.iloc[11:24,:]
data_2020 = data.iloc[24:,:]

In [None]:
data_2022
data_2022.columns = data.iloc[0,:]
data_2022.drop([0],inplace=True)
data_2022

### Remove the percentage symbol to plot the values

In [None]:
def remove_percentage(percentage):
    if (percentage.find("%") != -1):
        return percentage.replace("%","")
    else:
        return percentage

In [None]:
data_2022 = data_2022.applymap(remove_percentage)
data_2022["Chrome"] = pd.to_numeric(data_2022["Chrome"])
data_2022["Edge"] = pd.to_numeric(data_2022["Edge"])
data_2022["Firefox"] = pd.to_numeric(data_2022["Firefox"])
data_2022["Safari"] = pd.to_numeric(data_2022["Safari"])
data_2022["Opera"] = pd.to_numeric(data_2022["Opera"])
data_2022
data_2022.dtypes

## Now the data is almost clean & we can plot the numbers.

## Repeating the same process for 2021 & 2020

In [None]:
data_2021 = data.iloc[11:24,:]
data_2021.columns = data.iloc[11,:]
data_2021.drop([11],inplace=True)
data_2021

In [None]:
data_2021 = data_2021.applymap(remove_percentage)
data_2021["Chrome"] = pd.to_numeric(data_2021["Chrome"])
data_2021["Edge"] = pd.to_numeric(data_2021["Edge"])
data_2021["Firefox"] = pd.to_numeric(data_2021["Firefox"])
data_2021["Safari"] = pd.to_numeric(data_2021["Safari"])
data_2021["Opera"] = pd.to_numeric(data_2021["Opera"])
data_2021
data_2021.dtypes

In [None]:
data_2020 = data.iloc[24:,:]
data_2020.columns = data.iloc[24,:]
data_2020.drop([24],inplace=True)
data_2020

In [None]:
data_2020 = data_2020.applymap(remove_percentage)
data_2020["Chrome"] = pd.to_numeric(data_2020["Chrome"])
data_2020["Edge/IE"] = pd.to_numeric(data_2020["Edge/IE"])
data_2020["Firefox"] = pd.to_numeric(data_2020["Firefox"])
data_2020["Safari"] = pd.to_numeric(data_2020["Safari"])
data_2020["Opera"] = pd.to_numeric(data_2020["Opera"])
data_2020
data_2020.dtypes

## Changing column name from Edge/IE to Edge to maintain consistency

In [None]:
data_2020.rename(columns = {"Edge/IE" : "Edge"},inplace=True)
data_2020

## Merging into 1 single DataFrame

In [None]:
data_2022.rename(columns={"2022":"Month"},inplace=True)
data_2021.rename(columns={"2021":"Month"},inplace=True)
data_2020.rename(columns={"2020":"Month"},inplace=True)

In [None]:
data_2022.columns
data_2021.columns
data_2020.columns

In [None]:
final_df = pd.concat([data_2022,data_2021,data_2020])
final_df

## Adding year column

In [None]:
list_2022 = [2022]*10
list_2021 = [2021]*12
list_2020 = [2020]*12

import itertools
list(itertools.chain(list_2022,list_2021,list_2020))

In [None]:
final_df["Year"] = list(itertools.chain(list_2022,list_2021,list_2020))
final_df

## We should transform this type of data to another format to achieve the race chart

In [None]:
# Got it from https://stackoverflow.com/questions/28654047/convert-columns-into-rows-with-pandas 

final_df = final_df.melt(id_vars=["Year", "Month"], 
        var_name="Browser", 
        value_name="Percentage")
final_df

## Need to sort the values chronologically to plot the graph

In [None]:
final_df.sort_values(["Year","Month","Browser"],inplace=True)

In [None]:
def convert_month_to_number(month):
    if month=="January":
        return 1
    elif month=="February":
        return 2
    elif month=="March":
        return 3
    elif month=="April":
        return 4
    elif month=="May":
        return 5
    elif month=="June":
        return 6
    elif month=="July":
        return 7
    elif month=="August":
        return 8
    elif month=="September":
        return 9
    elif month=="October":
        return 10
    elif month=="November":
        return 11
    elif month=="December":
        return 12

In [None]:
final_df["Month_no"] = final_df["Month"].apply(convert_month_to_number)
final_df

In [None]:
final_df.sort_values(["Year","Month_no","Browser"],inplace=True)
final_df

In [None]:
final_df.dtypes

In [None]:
final_df_copy = final_df.copy()
final_df_copy["Month_year"] = final_df_copy["Month"].map(str) + ' ' + final_df_copy["Year"].map(str)
final_df_copy

In [None]:
fig = px.bar(final_df_copy, x="Browser", y="Percentage", color="Browser",
  animation_frame="Month_year",range_y=[0,100],
            title="Browsers usage % from 2020 to 2022")
fig.show()

## Plotting horizontally

In [None]:
fig = px.bar(final_df_copy, x="Percentage", y="Browser", color="Browser",
  animation_frame="Month_year", range_x=[0,100],
            title="Browsers usage % from 2020 to 2022")
fig.show()

# Connect with me on
- ### LinkedIn : https://linkedin.com/in/bomma-pranay 
- ### Twitter : https://twitter.com/Pranaii1
- ### GitHub : https://github.com/Bomma-Pranay 
### --- by Pranay Bomma, a Data Science enthusiast