In [34]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

#### Data

In [49]:

jobs_csv_path = os.path.join(
    "..", "data", "business-establishments-and-jobs-data-by-business-size-and-industry.csv")

jobs_data = pd.read_csv(jobs_csv_path)
jobs_data.head()

Unnamed: 0,Census year,CLUE small area,ANZSIC indusrty,CLUE industry,Business size,Total establishments,Total jobs
0,2007,West Melbourne (Residential),"Transport, Postal and Warehousing","Transport, Postal and Storage",Small business,20,132.0
1,2006,Carlton,Accommodation and Food Services,Accommodation,Small business,17,102.0
2,2006,Carlton,Administrative and Support Services,Admin and Support Services,Non employing,1,
3,2006,Carlton,Arts and Recreation Services,Arts and Recreation Services,Medium business,4,207.0
4,2006,Carlton,Construction,Construction,Medium business,5,196.0


In [36]:
jobs_data.columns

Index(['Census year', 'CLUE small area', 'ANZSIC indusrty', 'CLUE industry',
       'Business size', 'Total establishments', 'Total jobs'],
      dtype='object')

In [50]:
jobs_data = jobs_data.groupby(["Census year",'ANZSIC indusrty','CLUE small area', "Total establishments",'Business size', "Total jobs"]).size().reset_index(name= "counts")
jobs_data

Unnamed: 0,Census year,ANZSIC indusrty,CLUE small area,Total establishments,Business size,Total jobs,counts
0,2002,Accommodation and Food Services,Carlton,3,Non employing,0.0,1
1,2002,Accommodation and Food Services,Carlton,4,Medium business,139.0,1
2,2002,Accommodation and Food Services,Carlton,7,Non employing,0.0,1
3,2002,Accommodation and Food Services,Carlton,11,Medium business,327.0,1
4,2002,Accommodation and Food Services,Carlton,16,Small business,106.0,1
...,...,...,...,...,...,...,...
10356,2022,Wholesale Trade,Southbank,3,Medium business,222.0,1
10357,2022,Wholesale Trade,Southbank,9,Small business,69.0,1
10358,2022,Wholesale Trade,West Melbourne (Industrial),6,Medium business,299.0,1
10359,2022,Wholesale Trade,West Melbourne (Industrial),24,Small business,151.0,1


#### TRIAL 1 Z

In [38]:
jobs_data_z = jobs_data
wanted_year = [2022]
wanted_job = ["---"]
jobs_data_idz = jobs_data_z["Census year"].isin(wanted_year)

jobs_data_z = jobs_data_z[jobs_data_idz]
jobs_data_z

summarized_z = jobs_data_z.groupby(['CLUE small area', 'ANZSIC indusrty']).agg({'Total jobs': 'sum'}).reset_index()
summarized_z

Unnamed: 0,CLUE small area,ANZSIC indusrty,Total jobs
0,Carlton,Accommodation and Food Services,2353.0
1,Carlton,Administrative and Support Services,68.0
2,Carlton,Arts and Recreation Services,327.0
3,Carlton,Construction,52.0
4,Carlton,Education and Training,3531.0
...,...,...,...
202,West Melbourne (Residential),Public Administration and Safety,28.0
203,West Melbourne (Residential),"Rental, Hiring and Real Estate Services",31.0
204,West Melbourne (Residential),Retail Trade,173.0
205,West Melbourne (Residential),"Transport, Postal and Warehousing",51.0


In [39]:

summarized_z["ANZSIC indusrty"]
summarized_z = summarized_z[summarized_z["ANZSIC indusrty"] != "All ANZSIC"]
summarized_z.reset_index(drop=True, inplace=True)


In [40]:
import pandas as pd
from bokeh.core.properties import value
from bokeh.plotting import ColumnDataSource, figure, show
from bokeh.transform import cumsum, factor_cmap

# Assuming you have already created the summarized dataframe with unique industries
# and their corresponding total jobs
# summarized_df = ...

summarized_z = summarized_z.drop_duplicates(subset=['ANZSIC indusrty'])

source = ColumnDataSource(summarized_z)

industries = summarized_z["ANZSIC indusrty"]
areas = summarized_z["CLUE small area"]
total_jobs = summarized_z["Total jobs"]

p = figure(y_range=industries, x_range=(0, max(total_jobs) * 1.1), height=400, width=700, tools="",
           x_axis_location=None, toolbar_location=None, outline_line_color=None)
p.grid.grid_line_color = None
p.yaxis.fixed_location = 0
p.axis.major_tick_line_color = None
p.axis.major_label_text_color = None
p.axis.axis_line_color = "#4a4a4a"
p.axis.axis_line_width = 6

# Assigning colors to industries
industry_colors = {"Accommodation and Food Services": "orchid",
    "Administrative and Support Services": "firebrick",
    "Agriculture, Forestry and Fishing": "skyblue",
    "Arts and Recreation Services":"lightcoral",
    "Construction":"gold",
    "Education and Training": "mediumpurple",
    "Electricity, Gas, Water and Waste Services": "olive",
    "Financial and Insurance Services": "darkorange",
    "Health Care and Social Assistance":"cadetblue",
    "Information Media and Telecommunications":"salmon",
    "Manufacturing": "lightseagreen",
    "Other Services": "skyblue",
    "Mining":"darkslateblue",
    "Professional, Scientific and Technical Services": "khaki",
    "Public Administration and Safety": "palegreen",
    "Rental, Hiring and Real Estate Services": "sienna",
    "Retail Trade": "lightskyblue",
    "Wholesale Trade": "thistle",
     "Mining":"darkslateblue",
    "Transport, Postal and Warehousing": "lightsteelblue",
                   }
#colors = ["#3182bd", "#6baed6", "#9ecae1", "#c6dbef", "#e6550d", "#fd8d3c", "#fdae6b", "#fdd0a2"]
source.data['color'] = [industry_colors[industry] for industry in industries]

# Plotting horizontal bars for total jobs
p.hbar(y='ANZSIC indusrty', right='Total jobs', height=0.9, source=source,
       color='color')

# Adding text labels for total jobs
p.text(y='ANZSIC indusrty', x='Total jobs', text='Total jobs', source=source,
       x_offset=5, text_baseline="middle", text_font_size="15px", text_color="black")

show(p)

#### MOST POPULAR JOBS X AND DF

In [41]:
from bokeh.io import show
from bokeh.models import (AnnularWedge, ColumnDataSource,
                          Legend, LegendItem, Plot, Range1d)
from bokeh.core.properties import value
from bokeh.plotting import ColumnDataSource, figure, show
from bokeh.transform import cumsum, factor_cmap
from math import pi

jobs_data_x = jobs_data
wanted_year = [2022]
wanted_job = ["---"]
jobs_data_idx = jobs_data_x["Census year"].isin(wanted_year)

jobs_data_x = jobs_data_x[jobs_data_idx]

jobs_data_x = jobs_data_x.groupby(["Census year",'ANZSIC indusrty', "Total jobs"]).size().reset_index(name= "counts")
summarized_df = jobs_data_x.groupby('ANZSIC indusrty')['Total jobs'].sum().reset_index()

#drop ALL ANZSIC
summarized_df["ANZSIC indusrty"]
summarized_df.drop(index=3, inplace=True)  
summarized_df.reset_index(drop=True, inplace=True)

In [42]:
data = {
    'ANZSIC indusrty': summarized_df['ANZSIC indusrty'],
   'Total jobs': summarized_df['Total jobs']
}
summarized_df = pd.DataFrame(data)

#New dict with toal jobs
industry_labels = {
    industry: f"{industry} ({total_jobs})" 
    for industry, total_jobs in zip(summarized_df['ANZSIC indusrty'], summarized_df['Total jobs'])
}

summarized_df['Industry_with_total_jobs'] = summarized_df['ANZSIC indusrty'] + ' (' + summarized_df['Total jobs'].astype(str) + ')'

colors = {
    "Accommodation and Food Services": "orchid",
    "Administrative and Support Services": "firebrick",
    "Agriculture, Forestry and Fishing": "skyblue",
    "Arts and Recreation Services":"lightcoral",
    "Construction":"gold",
    "Education and Training": "mediumpurple",
    "Electricity, Gas, Water and Waste Services": "olive",
    "Financial and Insurance Services": "darkorange",
    "Health Care and Social Assistance":"cadetblue",
    "Information Media and Telecommunications":"red",
    "Manufacturing": "lightseagreen",
    "Mining":"darkslateblue",
    "Other Services": "indianred",
    "Professional, Scientific and Technical Services": "khaki",
    "Public Administration and Safety": "palegreen",
    "Rental, Hiring and Real Estate Services": "sienna",
    "Retail Trade": "lightskyblue",
    "Transport, Postal and Warehousing": "lightsteelblue",
    "Wholesale Trade": "thistle",          
}

angles = (summarized_df['Total jobs'] / summarized_df['Total jobs'].sum() * 2 * pi).cumsum().tolist()
angles = [0] + angles[:-1]

browsers_source = ColumnDataSource(dict(
    start=angles,
    end=angles[1:] + [2 * pi],
    colors=[colors[industry] for industry in summarized_df['ANZSIC indusrty']],
    industry=summarized_df['ANZSIC indusrty']
))

plot = Plot(title="Jobs in Melbourne distributed by industry (Total jobs)", toolbar_location=None,
            x_range=Range1d(-2, 2), y_range=Range1d(-2, 2))

glyph = AnnularWedge(x=0, y=0, inner_radius=0.9, outer_radius=1.8,
                     start_angle="start", end_angle="end",
                     line_color="white", line_width=3, fill_color="colors")
r = plot.add_glyph(browsers_source, glyph)

legend = Legend(location="right")
for i, industry_with_total_jobs in enumerate(summarized_df['Industry_with_total_jobs']):
    legend.items.append(LegendItem(label=industry_with_total_jobs, renderers=[r], index=i))
plot.add_layout(legend, "right")

show(plot)


#### SHOW ARTS AND REC OVER YEARS

In [63]:
jobs_data_y = jobs_data
jobs_data_y

Unnamed: 0,Census year,ANZSIC indusrty,CLUE small area,Total establishments,Business size,Total jobs,counts
0,2002,Accommodation and Food Services,Carlton,3,Non employing,0.0,1
1,2002,Accommodation and Food Services,Carlton,4,Medium business,139.0,1
2,2002,Accommodation and Food Services,Carlton,7,Non employing,0.0,1
3,2002,Accommodation and Food Services,Carlton,11,Medium business,327.0,1
4,2002,Accommodation and Food Services,Carlton,16,Small business,106.0,1
...,...,...,...,...,...,...,...
10356,2022,Wholesale Trade,Southbank,3,Medium business,222.0,1
10357,2022,Wholesale Trade,Southbank,9,Small business,69.0,1
10358,2022,Wholesale Trade,West Melbourne (Industrial),6,Medium business,299.0,1
10359,2022,Wholesale Trade,West Melbourne (Industrial),24,Small business,151.0,1


In [100]:

wanted_industry = ["Arts and Recreation Services"]
jobs_data_idy = jobs_data_y["ANZSIC indusrty"].isin(wanted_industry)
jobs_data_y = jobs_data_y[jobs_data_idy]

#jobs_data_y = jobs_data_y.groupby(["Census year",'ANZSIC indusrty', "CLUE small area", "Total jobs"]).size().reset_index(name= "counts")
summarized_y = jobs_data_y.groupby(["Census year", "ANZSIC indusrty","CLUE small area"])['Total jobs'].sum().reset_index()

summarized_y = summarized_y[summarized_y["CLUE small area"] != "Melbourne (CBD)"]
summarized_y.reset_index(drop=True, inplace=True)
summarized_y = summarized_y[summarized_y["CLUE small area"] != "Melbourne (Remainder)"]
summarized_y.reset_index(drop=True, inplace=True)

summarized_y

Unnamed: 0,Census year,ANZSIC indusrty,CLUE small area,Total jobs
0,2002,Arts and Recreation Services,Carlton,127.0
1,2002,Arts and Recreation Services,City of Melbourne (total),18913.0
2,2002,Arts and Recreation Services,Docklands,24.0
3,2002,Arts and Recreation Services,East Melbourne,408.0
4,2002,Arts and Recreation Services,Kensington,395.0
...,...,...,...,...
247,2022,Arts and Recreation Services,Port Melbourne,30.0
248,2022,Arts and Recreation Services,South Yarra,0.0
249,2022,Arts and Recreation Services,Southbank,10177.0
250,2022,Arts and Recreation Services,West Melbourne (Industrial),0.0


In [102]:
import pandas as pd
from bokeh.palettes import Category20
from bokeh.plotting import figure, show

# Assuming your DataFrame is named df

# Create a figure
p = figure(width=800, height=400, x_axis_label="Year", y_axis_label="Total Jobs")

# Define a palette
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', 'blue', '#98df8a', 'red', '#ff9896']# Adjust the number based on the number of unique CLUE small areas

# Group by CLUE small area and iterate over each group
for i, (area, group) in enumerate(summarized_y.groupby('CLUE small area')):
    color = colors[i % len(colors)]  # Select color from palette cyclically
    p.line(group['Census year'], group['Total jobs'], line_width=4, color=color, alpha=0.8, legend_label=area)

# Set legend position and click policy
p.legend.location = "top_left"
p.legend.click_policy = "mute"

# Show all years
p.xaxis.ticker = summarized_y['Census year'].unique()

show(p)

### Old bokeh

In [None]:
# TODO: create bokeh plot
p = figure(width=800, height=400, x_axis_label="Year", y_axis_label="Counts")

#Define a palette
palette = Category10[7]

# Group by DayOfWeek and iterate over each group
for i, (day, group) in enumerate(crimeDataDay.groupby('DayOfWeek')):
    color = palette[i % len(palette)]  # Select color from palette cyclically
    p.line(group['Year'], group['counts'], line_width=2, color=color, alpha=0.8, legend_label=day)

# Set legend position and click policy
p.legend.location = "top_left"
p.legend.click_policy = "mute"

#Show all years
p.xaxis.ticker = crimeDataDay['Year'].unique()

# display the figure
#show(p)
