# Replicating Previous Visualizations in Dash

In [1]:
from jupyter_dash import JupyterDash
from dash import Dash, html, dcc
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import pandas as pd
import numpy as np
from dateutil.parser import parse

## Data Loading and Preprocessing

In [2]:
with open("C:/Users/mjwan/Documents/Jobs/applied_8_9_2022.txt", "r", encoding="utf-8") as file:
    text = file.readlines()

# filter job lines
date_dict = {}

for line in text:
    try: 
        date = parse(line)
        date_dict[date] = []
    except:
        if ":" in line:
            date_dict[date].append(line)

jobs_by_date = [item for sublist in [list(map(lambda vv: (k, vv), v)) for k, v in date_dict.items()] for item in sublist]

In [3]:
df = pd.DataFrame(jobs_by_date, columns=["Date_Applied", "Line"])
df["Company"] = df["Line"].str.strip().str.split(":").apply(lambda l: l[0].strip())
df["Title"] = df["Line"].str.strip().str.split(":|w.+\$$|==>", regex=True).apply(lambda l: l[1].strip().title())
df["Result"] = df["Line"].str.strip().str.split("==>").apply(lambda l: l[1].strip().title() if len(l) > 1 else "No Response")
df["DOW"] = df["Date_Applied"].apply(lambda d: d.day_name())

df_title = df["Title"]

dse_match = "(?i)Data Scientist|Data Science|Science|Scientist"
da_match = "(?i)Data Analyst|Data Analytics|Analytics|Analyst"
ml_match = "(?i)Machine Learning Engineer|Machine Learning"
de_match = "(?i)Data Engineer|Engineer|Engineering|Database"

# fixing overlaps
dse_series = df_title[df_title.str.contains(dse_match)]

ml_series = df_title[df_title.str.contains(ml_match)]
de_series = df_title[df_title.str.contains(de_match)]
de_series = de_series[(~de_series.isin(ml_series)) & (~de_series.isin(dse_series))]

da_series = df_title[df_title.str.contains(da_match)]
da_series = da_series[(~da_series.isin(dse_series)) & (~da_series.isin(de_series)) & (~da_series.isin(ml_series))]

other_series = df_title[~((df_title.str.contains(dse_match)) | (df_title.str.contains(da_match)) | (df_title.str.contains(ml_match)) | (df_title.str.contains(de_match)))]

df["Broad_Role"] = pd.Series("Data Scientist", dse_series.index).combine_first(pd.Series("Data Analyst", da_series.index)) \
    .combine_first(pd.Series("Data Engineer", de_series.index)) \
    .combine_first(pd.Series("ML Engineer", ml_series.index)) \
    .combine_first(pd.Series("Other", other_series.index))

In [4]:
df.head()

Unnamed: 0,Date_Applied,Line,Company,Title,Result,DOW,Broad_Role
0,2022-08-09,NYT: Data Insights (Games) - wordleBitch123$\n,NYT,Data Insights (Games) -,No Response,Tuesday,Other
1,2022-08-09,"SPOTIFY: Data Scientist, Spotify for Artists =...",SPOTIFY,"Data Scientist, Spotify For Artists",Rejected,Tuesday,Data Scientist
2,2022-08-09,DOW JONES: Data Analyst - wordleBitch123$\n,DOW JONES,Data Analyst -,No Response,Tuesday,Data Analyst
3,2022-08-09,MANTECH: Data Science/Analytics New Graduate -...,MANTECH,Data Science/Analytics Ne,No Response,Tuesday,Data Scientist
4,2022-08-09,OSCAR HEALTH: Data Scientist\n,OSCAR HEALTH,Data Scientist,No Response,Tuesday,Data Scientist


## Constructing Dash Application

In [5]:
external_stylesheets = ['']

date_count = df.groupby("Date_Applied").size().reindex(pd.date_range(df["Date_Applied"].min(), df["Date_Applied"].max()), fill_value=0)

app = JupyterDash(__name__) #external_stylesheets=external_stylesheets)

# Result color map
result_colors = {
              "No Response": "salmon",  
              "Rejected": "crimson",
              "Contacted": "gold",
              "First Interview": "seagreen",
              "Second Intervew": "mediumseagreen",
              "Third Interview": "mediumaquamarine",
              "Offer": "lime",
              "Scam": "purple"
}

# bar graph role groupby
role_group = df.groupby(["Broad_Role", "Result"]).size().unstack()

role_group_to_bar = role_group.assign(temp_sum=role_group.sum(axis=1)) \
    .sort_values(by="temp_sum", ascending=False).iloc[:,:-1] \
    .reindex(role_group.mean(axis=0).sort_values(ascending=False).index, axis=1)

role_group_fig = px.bar(role_group_to_bar,
            barmode="group",
            color_discrete_map=result_colors,
            labels=dict(Broad_Role="Broad Role Category", 
                        value="# of Applications"),
            title="Application Results by Role Category"
            )

time_series_fig = make_subplots(specs=[[{"secondary_y": True}]])

time_series_fig.add_bar(x=date_count.index, y=date_count.values, name="Daily Count", secondary_y=False)

time_series_fig.add_scatter(x=date_count.cumsum().index, y=date_count.cumsum().values, name="Cumulative Count", fillcolor="orange", secondary_y=True)

time_series_fig.update_layout(yaxis_range=[date_count.min(),date_count.max()+1], title_text="Applications Over Time")
time_series_fig.update_yaxes(title_text="# of Applications Per Day", secondary_y=False)
time_series_fig.update_yaxes(title_text="Cumulative # of Applications", secondary_y=True)

# adding vertical lines
resume_dates = ["2022-08-12", "2022-08-19", "2022-08-31", "2022-09-05"]
interview_dates = df["Date_Applied"][df["Result"].str.contains("(?i)Interview")].values
offer_dates = df["Date_Applied"][df["Result"].str.contains("Offer")].values

for i, x in enumerate(resume_dates):
    #time_series_fig.add_vline(x=str(x), line_width=1, line_dash="dash", line_color="red")
    time_series_fig.add_trace(go.Scatter(x=[str(x),str(x)], 
                         y=[date_count.min(),date_count.max()+1], 
                         mode='lines', 
                         line=dict(color='red', width=2, dash='dash'),
                         name="Resume Updated",
                         legendgroup="Resume Updated",
                         showlegend=False if i > 0 else True))

for i, x in enumerate(interview_dates):
    #time_series_fig.add_vline(x=str(x), line_width=1, line_dash="dashdot", line_color="green")
    time_series_fig.add_trace(go.Scatter(x=[str(x),str(x)], 
                         y=[date_count.min(),date_count.max()+1], 
                         mode='lines', 
                         line=dict(color='green', width=1, dash='dashdot'),
                         name="Application(s) Led to Interview",
                         legendgroup="Application(s) Led to Interview",
                         showlegend=False if i > 0 else True))
    
for i, x in enumerate(offer_dates):
    #time_series_fig.add_vline(x=str(x), line_width=1, line_dash="longdashdot", line_color="black")
    time_series_fig.add_trace(go.Scatter(x=[str(x),str(x)], 
                     y=[date_count.min(),date_count.max()+1], 
                     mode='lines', 
                     line=dict(color='black', width=1, dash='longdashdot'),
                     name="Application(s) Led to Offer",
                     legendgroup="Application(s) Led to Offer",
                     showlegend=False if i > 0 else True))
    
pie_fig = px.pie(df, values=df["Result"].value_counts(), names=df["Result"].value_counts().index,
                color=df["Result"].value_counts().index,
                color_discrete_map=result_colors)    
    
app.layout = html.Div(children=[
    html.H1(children='Journey to an Entry-Level Job'),

    html.Div(children=[
        html.H2("""Background"""),
        
        html.P("""bigman""")
    
    ]),
    
    html.Div(children=[
        html.H2("""Examining Applications over Time"""),
        
        html.P("""The date range of applications I will look at is 08/09/2022 to 12/31/2022, bigman"""),
        
        dcc.Graph(
            id='time-series',
            figure=time_series_fig
        )
    
    ]),
    
    html.Div(children=[
        html.H2("""Overall Results of Applications"""),
        
        html.P(""""""),
        
        dcc.Graph(
            id='results-pie',
            figure=pie_fig
        )
    ]),
    
    html.Div(children=[
        html.H2("""Examining Applications by Role"""),

        dcc.Graph(
            id='role-groups',
            figure=role_group_fig
        )
    ])
])

# when in script
#if __name__ == '__main__':
#    app.run_server(debug=True)

In [6]:
app.run_server(mode='jupyterlab', debug=False)

 * Running on http://127.0.0.1:8050
Press CTRL+C to quit
127.0.0.1 - - [07/Jan/2023 16:11:58] "GET /_alive_cb2010eb-433c-491c-ad43-dc86b0cbbb16 HTTP/1.1" 200 -
127.0.0.1 - - [07/Jan/2023 16:11:58] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [07/Jan/2023 16:11:58] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [07/Jan/2023 16:11:58] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [07/Jan/2023 16:11:58] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 304 -
127.0.0.1 - - [07/Jan/2023 16:11:58] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 304 -
