# Regression and Other Stories: Earnings

Predict respondents' yearly earnings using survey data from 1990. See Chapter 15 in Regression and Other Stories.

---

### Load packages

In [None]:
using DataFrames, StatsPlots, CSV, HTTP, StatsBase
using Distributions, Turing, MCMCChains
using StatsFuns: logistic

### Load data

In [None]:
data = "https://raw.githubusercontent.com/avehtari/ROS-Examples/master/Earnings/data/earnings.csv"
earnings  = CSV.File(HTTP.get(data).body, delim=",", missingstring="NA") |> DataFrame
first(earnings , 6)

## Compound discrete-continuous model

### Logistic regression on non-zero earnings

In [None]:
@model function m1(height, male, binary_earn)
    
    α ~ Normal(0, 10)
    βₕ ~ Normal(0, 5) 
    βₘ ~ Normal(0, 5)
    
    for i in 1:length(binary_earn)
        v = logistic(α + βₕ * height[i] + βₘ * male[i])
        binary_earn[i] ~ Bernoulli(v)
    end
end;

In [None]:
binary_earn = Vector(earnings.earn .>0)
model_logistic = m1(earnings.height, earnings.male, binary_earn)
fit_2a = sample(model_logistic, NUTS(0.50), 5000)
summarystats(fit_2a)

### Linear regression on log scale

In [None]:
@model function m2(height, male, earn)
    
    σ ~ truncated(Normal(0,15), 0, Inf)
    
    α ~ Normal(0, 10)
    βₕ ~ Normal(0, 5) 
    βₘ ~ Normal(0, 5)
    
    μ = α .+ βₕ * height .+ βₘ * male
    log_earn = log.(earn)
    log_earn ~ MvNormal(μ, σ)
end;

In [None]:
valid = earnings.earn .!= 0.0
model_log = m2(earnings.height[valid], earnings.male[valid], earnings.earn[valid])
fit_2b = sample(model_log, NUTS(0.50), 5000)
summarystats(fit_2b)

In [None]:
log_earn = log.(earnings.earn)
