# Create weights for sample

1. We read the data
2. We create the categories needed to define weights
3. We use the autumn package to create raking weights
4. We inspect weights to see if they behave fine
5. We estimate the proportion using raking weights
6. We estimate the DEFF of weights

In [1]:
# install packages

# install.packages('data.table')
# install.packages('simstudy')
# install.packages('devtools')
# install.packages('weights')
# devtools::install_github("sdaza/autumn-adjustments", force=TRUE)

In [9]:
library(data.table)
library(weights)
library(autumn)

# Reading data 

In [10]:
dat = fread("../homeworks/final-assignment.csv")

# create age categories
dat[, age_group := cut(age, breaks=c(8, 13, 16, 19), labels=c("9-12", "13-15", "16-18"))]

# transform female to factor
dat[, female := factor(female)]

In [11]:
# check age categories
dat[, .(min=min(age), max=max(age)), age_group]

age_group,min,max
<fct>,<dbl>,<dbl>
13-15,13.0016,15.99923
16-18,16.00235,17.99953
9-12,9.00027,12.99976


# Explore data

In [12]:
summary(dat)

   id_school        id_student    female        age             drug       
 Min.   :  1.00   Min.   :    1   0:5151   Min.   : 9.00   Min.   :0.0000  
 1st Qu.: 25.00   1st Qu.: 3434   1:8584   1st Qu.:11.26   1st Qu.:0.0000  
 Median : 49.00   Median : 6868            Median :13.51   Median :0.0000  
 Mean   : 50.21   Mean   : 6868            Mean   :13.52   Mean   :0.1265  
 3rd Qu.: 76.00   3rd Qu.:10302            3rd Qu.:15.79   3rd Qu.:0.0000  
 Max.   :100.00   Max.   :13735            Max.   :18.00   Max.   :1.0000  
 age_group   
 9-12 :6064  
 13-15:4554  
 16-18:3117  
             
             
             

In [13]:
wpct(dat$age_group)

In [16]:
wpct(dat$female)

In [15]:
summary(dat)

   id_school        id_student    female        age             drug       
 Min.   :  1.00   Min.   :    1   0:5151   Min.   : 9.00   Min.   :0.0000  
 1st Qu.: 25.00   1st Qu.: 3434   1:8584   1st Qu.:11.26   1st Qu.:0.0000  
 Median : 49.00   Median : 6868            Median :13.51   Median :0.0000  
 Mean   : 50.21   Mean   : 6868            Mean   :13.52   Mean   :0.1265  
 3rd Qu.: 76.00   3rd Qu.:10302            3rd Qu.:15.79   3rd Qu.:0.0000  
 Max.   :100.00   Max.   :13735            Max.   :18.00   Max.   :1.0000  
 age_group   
 9-12 :6064  
 13-15:4554  
 16-18:3117  
             
             
             

# Weighting 

In [17]:
# using autumn 

target = list(
    age_group = c('9-12' = 0.40, '13-15' = 0.32, "16-18" = 0.28), 
    female = c('0' = 0.48, '1' = 0.52)
)
     
result = harvest(dat, target)
print(diagnose_weights(data=result, target=target, weights=result$weights))

   variable level prop_original prop_weighted target error_original
1 age_group  9-12     0.4414998          0.40   0.40     0.04149982
2 age_group 13-15     0.3315617          0.32   0.32     0.01156170
3 age_group 16-18     0.2269385          0.28   0.28     0.05306152
4    female     0     0.3750273          0.48   0.48     0.10497270
5    female     1     0.6249727          0.52   0.52     0.10497270
  error_weighted
1   0.000000e+00
2   0.000000e+00
3   1.110223e-16
4   5.551115e-17
5   0.000000e+00


In [20]:
# DEFF of weights 
design_effect(result$weights)

In [21]:
# proportion 
wpct(result$drug, result$weights) # or weighted.mean(result$drug, result$weights)
