In [1]:
import pandas as pd
import plotly.graph_objects as go

In [187]:
# Each row represents an application to a separate company
# Company names and other details retracted for privacy reasons
df = pd.read_csv('job-applications-data.csv')
df = df.fillna(0)
df.head(10)

Unnamed: 0,Initial screen,Technical interview,Take home task,Final Interview,Offer
0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0
2,1.0,1.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0
8,1.0,1.0,1.0,1.0,0.0
9,0.0,0.0,0.0,0.0,0.0


In [188]:
n_apps = len(df) + 10
n_screens = len(df[df['Initial screen'] == 1]) + 7
n_noresponse = n_apps - n_screens
n_task = len(df[df['Take home task'] == 1])
n_tech = len(df[df['Technical interview'] == 1]) + 2
n_final = len(df[df['Final Interview'] == 1])
n_offer = 1
n_no_offer = 3

print('# Number of applications:', n_apps)
print('# Number of initial screens:', n_screens)
print('# Number of take home tasks:', n_task)
print('# Number of technical interviews:', n_tech)
print('# Number of final interview: ', n_final)
print('# Number of offers: ', n_offer)
print('# Number of rejections after final interview: ', n_no_offer)

# Number of applications: 58
# Number of initial screens: 25
# Number of take home tasks: 5
# Number of technical interviews: 10
# Number of final interview:  4
# Number of offers:  1
# Number of rejections after final interview:  3


In [185]:
stages = [f"Applied: {n_apps}", f"Initial Screen: {n_screens}",
          "Take Home Task", f"Technical Interview: {n_tech}",
          f"Final Interview: {n_final}", f"Offer: {n_offer}",
          f"No Offer: {n_no_offer}"]
fig = go.Figure(data=[go.Sankey(
    arrangement = "snap",
    node = dict(
      label = stages,
      pad=15,
    color=["darkorange", "mediumturquoise", "red", "darkkhaki", "fuchsia", "mediumspringgreen", "firebrick"]),
    link = dict(
      source = [0,1,3,4,4], # indices correspond to labels, eg A1, A2, A1, B1, ...
      target = [1,3,4,5,6],
      value = [n_apps, n_screens, n_tech, n_final, n_no_offer],
        color = ["bisque", "paleturquoise", "palegoldenrod", "mediumspringgreen", "palevioletred"]
  ))])

fig.update_layout(
    width=1000,
    height=600,
    font=dict(size=14, family='Arial, sans-serif', color='black'),
    title=dict(
        text="<b>Data Science Job Search UK</b>",
        font=dict(size=22),
        xanchor="right",
        yanchor="top",
        x=0.5,
        y=0.95,
    ),
)
fig.add_annotation(
    x=0.25,
    y=0,
    text="""
    <b>Notes</b><br>
    1. This job search took place over 5 months from July 2021 to December 2021.<br>
    2. I dropped out of some roles at various stages due having the luxury of being picky.<br>
    3. I did about 5 take home tasks and was due to do more.<br>
    4. I was in several interview processes that I dropped out of once I got an offer.<br>
    5. Most of the jobs were Data Scientist but some were a form of Data/Analytics/ML Engineer.<br>
    6. Assume a +/-5 jobs uncertainty on "Applied" and "Initial Screen" as I didn't always track every job.
    """,
    showarrow=False,
    align='left',
    font=dict(size=12)
)
fig.show()