# Example 02-02: Salaries for Professors

[R documentation](http://finzi.psych.upenn.edu/R/library/car/html/Salaries.html)

In [None]:
using DataFramesMeta
using Gadfly
using KernelDensity
using RDatasets

## Exploring contents of RDatasets package

In [None]:
RDatasets.packages()

In [None]:
RDatasets.datasets()

In [None]:
RDatasets.datasets("car")

In [None]:
@where(RDatasets.datasets(), :Package .== "car")

In [None]:
@where(RDatasets.datasets(), :Package .== "car", :Dataset .== "Salaries")

## Loading a datasets from RDatasets

In [None]:
SalariesFrame = dataset("car", "Salaries")

## Getting basic information about a DataFrame

In [None]:
size(SalariesFrame)

In [None]:
names(SalariesFrame)

In [None]:
head(SalariesFrame, 10)

In [None]:
tail(SalariesFrame)

In [None]:
describe(SalariesFrame)

## Basic manipulation of a DataFrame

In [None]:
sort!(SalariesFrame, cols=[:YrsService, :YrsSincePhD])

In [None]:
# pool!(SalariesFrame, [:Discipline, :Sex])

In [None]:
levels(SalariesFrame[:Sex])

## Exploratory statistical analysis of professorial salaries

In [None]:
by(SalariesFrame, :Rank) do df
  DataFrame(
    n = length(df[:Salary]),
    mean = round(mean(df[:Salary]), 3),
    std = round(std(df[:Salary]), 3)
  )
end

In [None]:
by(SalariesFrame, :Discipline) do df
  DataFrame(
    n = length(df[:Salary]),
    mean = round(mean(df[:Salary]), 3),
    std = round(std(df[:Salary]), 3)
  )
end

In [None]:
by(SalariesFrame, :Sex) do df
  DataFrame(
    n = length(df[:Salary]),
    mean = round(mean(df[:Salary]), 3),
    std = round(std(df[:Salary]), 3)
  )
end

In [None]:
by(SalariesFrame, [:Rank, :Discipline]) do df
  DataFrame(
    n = length(df[:Salary]),
    mean = round(mean(df[:Salary]), 3),
    std = round(std(df[:Salary]), 3)
  )
end

In [None]:
by(SalariesFrame, [:Rank, :Sex]) do df
  DataFrame(
    n = length(df[:Salary]),
    mean = round(mean(df[:Salary]), 3),
    std = round(std(df[:Salary]), 3)
  )
end

In [None]:
by(SalariesFrame, [:Discipline, :Sex]) do df
  DataFrame(
    n = length(df[:Salary]),
    mean = round(mean(df[:Salary]), 3),
    std = round(std(df[:Salary]), 3)
  )
end

In [None]:
by(SalariesFrame, [:Rank, :Discipline, :Sex]) do df
  DataFrame(
    n = length(df[:Salary]),
    mean = round(mean(df[:Salary]), 3),
    std = round(std(df[:Salary]), 3)
  )
end

In [None]:
plot(SalariesFrame, x="Rank", y="Salary", Geom.boxplot)

In [None]:
plot(SalariesFrame, x="Discipline", y="Salary", Geom.boxplot)

In [None]:
plot(SalariesFrame, x="Sex", y="Salary", Geom.boxplot)

In [None]:
hstack(
  plot(SalariesFrame, x="Rank", y="Salary", Geom.boxplot),
  plot(SalariesFrame, x="Discipline", y="Salary", Geom.boxplot),
  plot(SalariesFrame, x="Sex", y="Salary", Geom.boxplot)
)

In [None]:
plot(SalariesFrame, x="YrsSincePhD", y="Salary")

In [None]:
plot(SalariesFrame, x="YrsService", y="Salary")

In [None]:
plot(SalariesFrame, x="YrsSincePhD", y="Salary", color="Rank")

In [None]:
plot(SalariesFrame, x="YrsService", y="Salary", color="Rank")

In [None]:
plot(SalariesFrame, x="YrsSincePhD", y="Salary", color="Discipline")

In [None]:
plot(SalariesFrame, x="YrsService", y="Salary", color="Discipline")

In [None]:
plot(SalariesFrame, x="YrsSincePhD", y="Salary", color="Sex")

In [None]:
plot(SalariesFrame, x="YrsService", y="Salary", color="Sex")

In [None]:
plot(SalariesFrame, x="Salary", Geom.histogram(bincount=50, density=true))

In [None]:
salaries = kde(SalariesFrame[:Salary])

plot(
  x=salaries.x,
  y=salaries.density,
  Geom.line,
  Guide.xlabel("Salary"),
  Guide.ylabel(""),
  Guide.title("Kernel density estimator")
)

In [None]:
plot(SalariesFrame, x="Salary", color="Discipline", Geom.histogram(bincount=50, density=true))

In [None]:
plot(SalariesFrame, x="Salary", color="Rank", Geom.histogram(bincount=50, density=true))

In [None]:
kdes = [kde(@where(SalariesFrame, :Rank .== l)[:Salary]) for l in levels(SalariesFrame[:Rank])]

In [None]:
plot(
layer(x=kdes[1].x, y=kdes[1].density, Geom.line, Theme(default_color=colorant"blue")),
layer(x=kdes[2].x, y=kdes[2].density, Geom.line, Theme(default_color=colorant"red")),
layer(x=kdes[3].x, y=kdes[3].density, Geom.line, Theme(default_color=colorant"green")),
Guide.manual_color_key("Rank", ["AsstProf", "AssocProf", "Prof"], ["blue", "red", "green"]),
Guide.xlabel("Salary"), Guide.ylabel(""), Guide.title("Kernel density estimators")
)

In [None]:
plot(SalariesFrame, x="YrsService", y="Salary", Geom.histogram2d(xbincount=30, ybincount=30))