# Interactive Chart of Job Compensation Offers in the San Francisco Bay Area

by Max Woolf (@minimaxir)

*This notebook is licensed under the MIT License. If you use the code or data visualization designs contained within this notebook, it would be greatly appreciated if proper attribution is given back to this notebook and/or myself. Thanks! :)*

In [1]:
source("Rstart.R")

library(plotly)
library(viridis)
library(htmlwidgets)

sessionInfo()


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

Registering fonts with R

Attaching package: ‘scales’

The following objects are masked from ‘package:readr’:

    col_factor, col_numeric


Attaching package: ‘plotly’

The following object is masked _by_ ‘.GlobalEnv’:

    subplot

The following object is masked from ‘package:ggplot2’:

    last_plot

The following object is masked from ‘package:graphics’:

    layout



R version 3.3.0 (2016-05-03)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: OS X 10.11.4 (El Capitan)

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] htmlwidgets_0.6    viridis_0.3.4      plotly_3.6.0       stringr_1.0.0     
 [5] digest_0.6.9       RColorBrewer_1.1-2 scales_0.4.0       extrafont_0.17    
 [9] ggplot2_2.1.0      dplyr_0.4.3        readr_0.2.2       

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.4      Rttf2pt1_1.3.3   magrittr_1.5     munsell_0.4.3   
 [5] uuid_0.1-2       colorspace_1.2-6 R6_2.1.2         httr_1.1.0      
 [9] plyr_1.8.3       tools_3.3.0      parallel_3.3.0   gtable_0.2.0    
[13] DBI_0.4          extrafontdb_1.0  htmltools_0.3.5  assertthat_0.1  
[17] gridExtra_2.2.1  IRdisplay_0.3    tidyr_0.4.1      repr_0.4        
[21] base64enc_0.1-3  IRkernel_0.5    

In [2]:
df <- read_csv("sfba_angelist_jobs.csv") %>% mutate(salary_mid = (salary_min+salary_max)/2,
                                                    equity_mid = (equity_min+equity_max)/2)

print(head(df))

Source: local data frame [6 x 14]

  job_id                            job_title  job_type salary_min salary_max
   (int)                                (chr)     (chr)      (int)      (int)
1 120562                      Growth Engineer full-time     100000     140000
2  18839                             Designer full-time      80000     120000
3  88730 Applications (Unreal/Unity) Engineer full-time     100000     130000
4 104888                       Data Scientist full-time     100000     180000
5 135728              Infrastructure Engineer full-time     100000     130000
6  57695           Senior Full Stack Engineer full-time     100000     150000
Variables not shown: equity_cliff (dbl), equity_vest (dbl), equity_min (dbl),
  equity_max (dbl), roles (chr), skills (chr), updated_at (time), salary_mid
  (dbl), equity_mid (dbl)


In [22]:
salaryFormat <- function(x) {
    paste0("$",x/1000,"k")
}

equityFormat <- function(x) {
    paste0(x,"%")
}

ifEngineerDesigner <- function(job_title) {
    ifelse(grepl("Engineer", job_title), "Engineer", ifelse(grepl("Designer", job_title), "Designer", "Other"))    
}

ifEngineerNonenginner <- function(job_title) {
    ifelse(grepl("engineer", tolower(job_title)) | grepl("developer", tolower(job_title)), "Engineer", "Non-Engineer")    
}



plot_tooltip <- function(df) {
    sprintf("Title: %s<br>Salary: %s — %s<br>Equity: %s — %s<br>Roles: %s",
            df$job_title, salaryFormat(df$salary_min), salaryFormat(df$salary_max),
            equityFormat(df$equity_min), equityFormat(df$equity_max), df$roles)
}

df$tooltip <- plot_tooltip(df)

print(head(df$tooltip))

[1] "Title: Growth Engineer<br>Salary: $100k — $140k<br>Equity: 0% — 1%<br>Roles: Full Stack Developer"              
[2] "Title: Designer<br>Salary: $80k — $120k<br>Equity: 0.2% — 0.8%<br>Roles: Designer, UI/UX Designer"              
[3] "Title: Applications (Unreal/Unity) Engineer<br>Salary: $100k — $130k<br>Equity: 0.1% — 0.2%<br>Roles: Developer"
[4] "Title: Data Scientist<br>Salary: $100k — $180k<br>Equity: 0.01% — 0.1%<br>Roles: Data Scientist"                
[5] "Title: Infrastructure Engineer<br>Salary: $100k — $130k<br>Equity: 0.1% — 0.35%<br>Roles: DevOps"               
[6] "Title: Senior Full Stack Engineer<br>Salary: $100k — $150k<br>Equity: 0% — 1%<br>Roles: Full Stack Developer"   


In [28]:
df_jobs <- df %>%
                #mutate(job_class = factor(ifEngineerDesigner(job_title), levels=c("Engineer", "Designer"))) %>%
                mutate(job_class = factor(ifEngineerNonenginner(job_title))) %>%
                filter(job_class!="Other", job_type=="full-time")

print(head(df_jobs))

Source: local data frame [6 x 16]

  job_id                            job_title  job_type salary_min salary_max
   (int)                                (chr)     (chr)      (int)      (int)
1 120562                      Growth Engineer full-time     100000     140000
2  18839                             Designer full-time      80000     120000
3  88730 Applications (Unreal/Unity) Engineer full-time     100000     130000
4 104888                       Data Scientist full-time     100000     180000
5 135728              Infrastructure Engineer full-time     100000     130000
6  57695           Senior Full Stack Engineer full-time     100000     150000
Variables not shown: equity_cliff (dbl), equity_vest (dbl), equity_min (dbl),
  equity_max (dbl), roles (chr), skills (chr), updated_at (time), salary_mid
  (dbl), equity_mid (dbl), tooltip (chr), job_class (fctr)


In [39]:
df_jobs_agg <- df_jobs %>% group_by(job_class) %>%
                    summarize(count=n(),
                              perc=count/nrow(df_jobs),
                              avg_salary=median(salary_mid),
                              avg_equity=median(equity_min))

print(df_jobs_agg)

Source: local data frame [2 x 5]

     job_class count      perc avg_salary avg_equity
        (fctr) (int)     (dbl)      (dbl)      (dbl)
1     Engineer  2965 0.4990742     115000       0.10
2 Non-Engineer  2976 0.5009258      90000       0.02


In [30]:
plot <- ggplot(df_jobs %>% filter(equity_mid <= 10, salary_mid <= 400000), aes(x=equity_mid, y=salary_mid, color=salary_mid)) +
            geom_point(alpha=0.2, stroke=0, size=1) +
            geom_hline(data=df_jobs_agg, aes(yintercept=avg_salary)) +
            #geom_smooth(size=0.5, alpha=0.2, color="#1a1a1a", fill="#1a1a1a") +
            fte_theme() +
            scale_x_continuous(limits=c(0,10), breaks=seq(0,10, by=2.5), labels=equityFormat(seq(0,10, by=2.5))) +
            scale_y_continuous(limits=c(0,350000), breaks=seq(0,350000, by=50000), labels=salaryFormat(seq(0,350000, by=50000))) +
            scale_color_viridis() +
            labs(x="Equity (Midpoint of Offered Range)", y="Salary (Midpoint of Offered Range)", title="Full-Time Job Compensation Offerings in San Francisco Bay Area") +
            facet_wrap(~ job_class, ncol=2) 

max_save(plot, "engineer-sfba-1", "AngelList", w=5)

: Removed 1 rows containing missing values (geom_point).

![](engineer-sfba-1.png)

In [38]:
f <- list(
  family = "Source Sans Pro",
  color = "#7f7f7f"
)

a <- list(
    tickfont=f
)


i_plot <- ggplot(df_jobs %>% filter(equity_mid <= 10, salary_mid <= 400000), aes(text=tooltip)) +
            geom_point(aes(x=equity_mid, y=salary_mid, color=salary_mid), alpha=0.2) +
            #geom_smooth(aes(x=equity_mid, y=salary_mid), method=lm, size=0.5, alpha=0.2, color="#1a1a1a", fill="#1a1a1a") +
            geom_hline(data=df_jobs_agg, aes(yintercept=avg_salary, text=sprintf("Median Salary: %s", salaryFormat(avg_salary)))) +
            fte_theme() +
            scale_x_continuous(limits=c(0,10), breaks=seq(0,10, by=2.5), labels=equityFormat(seq(0,10, by=2.5))) +
            scale_y_continuous(limits=c(0,350000), breaks=seq(0,350000, by=50000), labels=salaryFormat(seq(0,350000, by=50000))) +
            scale_color_viridis() +
            labs(x="Equity (Midpoint of Offered Range)", y="Salary (Midpoint of Offered Range)", title="Full-Time Job Compensation Offerings in San Francisco Bay Area") +
            #theme(panel.margin = unit(c(0.5), "cm")) +
            facet_wrap(~ job_class, ncol=2)


## plot.ly settings

i_plot %>%
    ggplotly(tooltip=c("text"), width="100%", height="400px") %>%
    layout(plot, yaxis=a, xaxis=a, xaxis2=a) %>%
    config(displaylogo = F, scrollZoom = T, modeBarButtonsToRemove = list('sendDataToCloud', 'toImage')) %>%
    as.widget() %>%
    saveWidget("engineer-sfba-1.html", selfcontained=F)