<a href="https://colab.research.google.com/github/mesavn04/my-first-binder/blob/master/Plotly_for_Hierarchical_Data_Visualization_Treemaps_and_More.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Download and Prepare the Data

In [None]:
!pip install selenium
!apt-get update # to update ubuntu to correctly run apt install
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin

In [None]:
import sys
sys.path.insert(0,'/usr/lib/chromium-browser/chromedriver')
from selenium import webdriver
from time import sleep
from bs4 import BeautifulSoup
import pandas as pd
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
wd = webdriver.Chrome('chromedriver',chrome_options=chrome_options)

site = 'https://www.bls.gov/news.release/jolts.t04.htm'

wd.get(site)
sleep(5)
html = wd.page_source

df = pd.read_html(html)
df[0].head(5)

In [None]:
df[0].to_excel('quits_levels_and_rates.xlsx',sheet_name='By_Industry',index=True)

In [None]:
#pip install openpyxl
import plotly.graph_objects as go
import pandas as pd

df0 = pd.read_excel('quits_levels_and_rates.xlsx',sheet_name="By_Industry",skiprows=1) 
df0=df0[['Industry and region','Oct.2021(p)','Oct.2021(p).1']]
new_cols = ['Industry','Oct_2021_Quits','Oct_2021_Quit_Rates']
df0.columns = new_cols
# df=df[['Industry','Oct_2021_Quits','Oct_2021_Quit_Rates']]
df0.head(5)

In [None]:
df = df0.dropna(how='any') 
df.head(5)

# Create a Treemap with go.Treemap

In [None]:
#Create a function to assign a 'parent' to each industry/label to define the hierarchical structure
def f(row):
    if row['Industry'] == 'Total':
        val = ""
    elif row['Industry'] in (['Total private', 'Government']):
        val = 'Total' 
    elif row['Industry'] in (['Mining and logging', 'Construction','Manufacturing','Trade, transportation, and utilities',
                             'Information','Financial activities','Professional and business services',
                             'Education and health services','Leisure and hospitality','Other services']):
        val = 'Total private' 
    elif row['Industry'] in (['Federal', 'State and local']):
        val = 'Government' 
    elif row['Industry'] in (['Durable goods', 'Nondurable goods']):
        val = 'Manufacturing' 
    elif row['Industry'] in (['Wholesale trade', 'Retail trade','Transportation, warehousing, and utilities']):
        val = 'Trade, transportation, and utilities' 
    elif row['Industry'] in (['Finance and insurance', 'Real estate and rental and leasing']):
        val = 'Financial activities' 
    elif row['Industry'] in (['Educational services', 'Health care and social assistance']):
        val = 'Education and health services' 
    elif row['Industry'] in (['Arts, entertainment, and recreation', 'Accommodation and food services']):
        val = 'Leisure and hospitality' 
    elif row['Industry'] in (['State and local education', 'State and local, excluding education']):
        val = 'State and local' 
    else:
        val = 'NA'
    return val

df['parent']= df.apply(f, axis=1) #add a new column 'parent' to the data frame
df.rename({'Industry': 'id','Oct_2021_Quits':'value','Oct_2021_Quit_Rates':'color'},axis=1, inplace=True)
df=df[['id','parent','value','color']]

In [None]:
fig = go.Figure(go.Treemap(
    labels=df['id'], #Show the name of each rectangle
    parents=df['parent'], #Define the hierarchy
    values=df['value'], #Define the area/size of each rectangle
    branchvalues='total', #When set to "total", items in values are taken to be value of all its descendants
    textinfo = "label", #Define what to show as the label for each rectangle
    pathbar_textfont_size=15,
    marker=dict(
        colors=df['color'], #Use 'color' column for continuous color scale
        colorscale='oranges',
    ),
    hovertemplate='<b>%{label} </b> <br> <br> Quit Rate: %{color:.1f}%<br>', #Define what and how the hover text is shown
    name=''
    ))

fig.show()

# Fine-Tune the Treemap

In [None]:
#Remove 'value' attribute so that the area of a category is divided equally amongst the other subcategories within its parent category
fig = go.Figure(go.Treemap(
    labels=df['id'],
    parents=df['parent'],
    #values=df['value'], #Remove the values attribute to avoid squeezing some industry sectors to extremely small rectangles
    branchvalues='total',
    textinfo = "label",
    pathbar_textfont_size=15,
    marker=dict(
        colors=df['color'],
        colorscale='oranges',
    ),
    hovertemplate='<b>%{label} </b> <br> <br> Quit Rate: %{color:.1f}%<br>',
    name=''
    ))
#Force text labels to have the same font size using uniform text parameter
fig.update_layout(
    uniformtext=dict(minsize=10, mode='show'), #Controlling text fontsize with uniformtext
    margin = dict(t=50, l=25, r=25, b=25)
)

fig.show()

In [None]:
def f(row):
    if row['id'] == 'Transportation, warehousing, and utilities':
        val = 'Transport-<br>ation/<br>utilities'
    elif row['id'] == 'Finance and insurance':
        val = 'Finance/<br>Insurance'    
    elif row['id'] == 'Real estate and rental and leasing':
        val = 'Real estate/<br>rental/<br>leasing' 
    elif row['id'] == 'Arts, entertainment, and recreation':
        val = 'Arts/<br>entertainment/<br>recreation'  
    elif row['id'] == 'Accommodation and food services':
        val = 'Accommo-<br>dation/<br>food<br>services' 
    elif row['id'] == 'Mining and logging':
        val = 'Mining/logging' 
    elif row['id'] == 'Professional and business services':
        val = 'Professional/<br>business services' 
    elif row['id'] == 'State and local education':
        val = 'State/local<br>education'
    elif row['id'] == 'State and local, excluding education':
        val = 'State and<br>local<br>(excluding<br>education)'
    elif row['id'] == 'Educational services':
        val = 'Educational<br>services'
    elif row['id'] == 'Health care and social assistance':
        val = 'Health Care/<br>social<br>assistance'
    elif row['id'] == 'Durable goods':
        val = 'Durable<br>goods'
    elif row['id'] == 'Nondurable goods':
        val = 'Nondurable<br>goods'
    else:
        val = row['id']
    return val

df['id2']= df.apply(f, axis=1)

In [None]:
#Final code to fine tune the treemap
fig = go.Figure(go.Treemap(
    labels=df['id2'], #Use 'id2' which is the wrapped text format
    parents=df['parent'],
    #values=df['value'],
    branchvalues='total',
    textinfo = "label",
    pathbar_textfont_size=15,
    marker=dict(
        colors=df['color'],
        colorscale='oranges',
    ),
    hovertemplate='<b>%{label} </b> <br> <br> Quit Rate: %{color:.1f}%<br>',
    name=''
    ))

fig.update_layout(
    uniformtext=dict(minsize=10, mode='show'),
    margin = dict(t=50, l=25, r=25, b=25),
    title_text = 'Oct 2021 Quit Rates by Industry',
    title_font_family="Arial",
    title_font_size = 22,
    title_font_color="black",
    title_x=0.5,
)

fig.show()

# Bonus: Sunburst Chart and Icicle Chart

In [None]:
#Plot Sunburst Chart
fig =go.Figure(go.Sunburst(
    labels=df['id2'],
    parents=df['parent'],
    #values=df['value'],
    textinfo = "label",
    marker=dict(
        colors=df['color'],
        colorscale='oranges',
    ),
    hovertemplate='<b>%{label} </b> <br> <br> Quit Rate: %{color:.1f}%<br>',
    name=''
))

fig.update_layout(
    uniformtext=dict(minsize=8, mode='show'),
    margin = dict(t=50, l=25, r=25, b=25),
    title_text = 'Oct 2021 Quit Rates by Industry',
    title_font_family="Arial",
    title_font_size = 22,
    title_font_color="black",
    title_x=0.5,
)

fig.show()

In [None]:
#Plot the Icicle Chart
fig =go.Figure(go.Icicle(
    labels=df['id'], #I changed back to 'id' because for this chart format it's better to leave the text labels as-is
    parents=df['parent'],
    #values=df['value'],
    textinfo = "label",
    marker=dict(
        colors=df['color'],
        colorscale='oranges',
    ),
    hovertemplate='<b>%{label} </b> <br> <br> Quit Rate: %{color:.1f}%<br>',
    name='',
    tiling = dict(
    orientation='h', #set the direction to be horizontal
    flip='y') #set the direction to be from left to right
))

fig.update_layout(
    uniformtext=dict(minsize=8, mode='show'),
    margin = dict(t=50, l=25, r=25, b=25),
    title_text = 'Oct 2021 Quit Rates by Industry',
    title_font_family="Arial",
    title_font_size = 22,
    title_font_color="black",
    title_x=0.5,
)

fig.show()