# 3. Sample Weighting

## Tables of Contents

- [Objective](#section0)
- [Balanced Repeated Replication (BRR)](#section1)
- [Bootstrap](#section2)
- [Jackknife](#section3)

## Objective <a name="section0"></a>

In this tutorial, we will explore creating replicate weights using the class *ReplicateWeight*. Three replication methods have been implemented: balanced repeated replication (BRR) including the Fay-BRR, bootstrap and jackknife. 

In [1]:
import numpy as np
import pandas as pd

import samplics
from samplics.weighting import ReplicateWeight

In [2]:
psu_sample = pd.read_csv("psu_sample.csv")
ssu_sample = pd.read_csv("ssu_sample.csv")

full_sample = pd.merge(
    psu_sample[["cluster", "region", "psu_prob"]], 
    ssu_sample[["cluster", "household", "ssu_prob"]], 
    on="cluster")

full_sample["inclusion_prob"] = full_sample["psu_prob"] * full_sample["ssu_prob"] 
full_sample["design_weight"] = 1 / full_sample["inclusion_prob"] 

full_sample.head(15)

Unnamed: 0,cluster,region,psu_prob,household,ssu_prob,inclusion_prob,design_weight
0,7,North,0.187726,72,0.115385,0.021661,46.166667
1,7,North,0.187726,73,0.115385,0.021661,46.166667
2,7,North,0.187726,75,0.115385,0.021661,46.166667
3,7,North,0.187726,715,0.115385,0.021661,46.166667
4,7,North,0.187726,722,0.115385,0.021661,46.166667
5,7,North,0.187726,724,0.115385,0.021661,46.166667
6,7,North,0.187726,755,0.115385,0.021661,46.166667
7,7,North,0.187726,761,0.115385,0.021661,46.166667
8,7,North,0.187726,764,0.115385,0.021661,46.166667
9,7,North,0.187726,782,0.115385,0.021661,46.166667


## Balanced Repeated Replication (BRR) <a name="section1"></a>



In [3]:
import scipy
had = scipy.linalg.hadamard(4)
np.transpose(had[:,1:4])

array([[ 1, -1,  1, -1],
       [ 1,  1, -1, -1],
       [ 1, -1, -1,  1]])

In [4]:
brr_method = ReplicateWeight(method="brr", stratification=False)

brr_wgt = brr_method.replicate(full_sample["design_weight"], full_sample["cluster"])

brr_wgt.drop_duplicates().head(10)

Unnamed: 0,_stratum,_psu,_samp_weight,_brr_wgt_1,_brr_wgt_2,_brr_wgt_3,_brr_wgt_4,_brr_wgt_5,_brr_wgt_6,_brr_wgt_7,...,_brr_wgt_503,_brr_wgt_504,_brr_wgt_505,_brr_wgt_506,_brr_wgt_507,_brr_wgt_508,_brr_wgt_509,_brr_wgt_510,_brr_wgt_511,_brr_wgt_512
0,1,7,46.166667,0.0,92.333333,0.0,92.333333,0.0,92.333333,0.0,...,0.0,92.333333,0.0,92.333333,0.0,92.333333,0.0,92.333333,0.0,92.333333
15,1,10,50.783333,101.566667,0.0,101.566667,0.0,101.566667,0.0,101.566667,...,101.566667,0.0,101.566667,0.0,101.566667,0.0,101.566667,0.0,101.566667,0.0
30,2,16,62.149123,0.0,0.0,124.298246,124.298246,0.0,0.0,124.298246,...,124.298246,124.298246,0.0,0.0,124.298246,124.298246,0.0,0.0,124.298246,124.298246
45,2,24,58.940741,117.881481,117.881481,0.0,0.0,117.881481,117.881481,0.0,...,0.0,0.0,117.881481,117.881481,0.0,0.0,117.881481,117.881481,0.0,0.0
60,3,29,65.702778,0.0,131.405556,131.405556,0.0,0.0,131.405556,131.405556,...,131.405556,0.0,0.0,131.405556,131.405556,0.0,0.0,131.405556,131.405556,0.0
75,3,34,75.661566,151.323133,0.0,0.0,151.323133,151.323133,0.0,0.0,...,0.0,151.323133,151.323133,0.0,0.0,151.323133,151.323133,0.0,0.0,151.323133
90,4,45,85.398025,0.0,0.0,0.0,0.0,170.796049,170.796049,170.796049,...,170.796049,170.796049,0.0,0.0,0.0,0.0,170.796049,170.796049,170.796049,170.796049
105,4,52,85.520635,171.04127,171.04127,171.04127,171.04127,0.0,0.0,0.0,...,0.0,0.0,171.04127,171.04127,171.04127,171.04127,0.0,0.0,0.0,0.0
120,5,64,218.893889,0.0,437.787778,0.0,437.787778,437.787778,0.0,437.787778,...,437.787778,0.0,0.0,437.787778,0.0,437.787778,437.787778,0.0,437.787778,0.0
135,5,86,213.491667,426.983333,0.0,426.983333,0.0,0.0,426.983333,0.0,...,0.0,426.983333,426.983333,0.0,426.983333,0.0,0.0,426.983333,0.0,426.983333


In [5]:
fay_method = ReplicateWeight(method="brr", stratification=False, fay_coef=0.3)

fay_wgt = fay_method.replicate(
    full_sample["design_weight"], 
    full_sample["cluster"], 
    rep_prefix="fay_weight_",
    psu_varname="cluster", 
    str_varname="stratum"
)

fay_wgt.drop_duplicates().head(10)

Unnamed: 0,stratum,cluster,_samp_weight,fay_weight_1,fay_weight_2,fay_weight_3,fay_weight_4,fay_weight_5,fay_weight_6,fay_weight_7,...,fay_weight_503,fay_weight_504,fay_weight_505,fay_weight_506,fay_weight_507,fay_weight_508,fay_weight_509,fay_weight_510,fay_weight_511,fay_weight_512
0,1,7,46.166667,13.85,78.483333,13.85,78.483333,13.85,78.483333,13.85,...,13.85,78.483333,13.85,78.483333,13.85,78.483333,13.85,78.483333,13.85,78.483333
15,1,10,50.783333,86.331667,15.235,86.331667,15.235,86.331667,15.235,86.331667,...,86.331667,15.235,86.331667,15.235,86.331667,15.235,86.331667,15.235,86.331667,15.235
30,2,16,62.149123,18.644737,18.644737,105.653509,105.653509,18.644737,18.644737,105.653509,...,105.653509,105.653509,18.644737,18.644737,105.653509,105.653509,18.644737,18.644737,105.653509,105.653509
45,2,24,58.940741,100.199259,100.199259,17.682222,17.682222,100.199259,100.199259,17.682222,...,17.682222,17.682222,100.199259,100.199259,17.682222,17.682222,100.199259,100.199259,17.682222,17.682222
60,3,29,65.702778,19.710833,111.694722,111.694722,19.710833,19.710833,111.694722,111.694722,...,111.694722,19.710833,19.710833,111.694722,111.694722,19.710833,19.710833,111.694722,111.694722,19.710833
75,3,34,75.661566,128.624663,22.69847,22.69847,128.624663,128.624663,22.69847,22.69847,...,22.69847,128.624663,128.624663,22.69847,22.69847,128.624663,128.624663,22.69847,22.69847,128.624663
90,4,45,85.398025,25.619407,25.619407,25.619407,25.619407,145.176642,145.176642,145.176642,...,145.176642,145.176642,25.619407,25.619407,25.619407,25.619407,145.176642,145.176642,145.176642,145.176642
105,4,52,85.520635,145.385079,145.385079,145.385079,145.385079,25.65619,25.65619,25.65619,...,25.65619,25.65619,145.385079,145.385079,145.385079,145.385079,25.65619,25.65619,25.65619,25.65619
120,5,64,218.893889,65.668167,372.119611,65.668167,372.119611,372.119611,65.668167,372.119611,...,372.119611,65.668167,65.668167,372.119611,65.668167,372.119611,372.119611,65.668167,372.119611,65.668167
135,5,86,213.491667,362.935833,64.0475,362.935833,64.0475,64.0475,362.935833,64.0475,...,64.0475,362.935833,362.935833,64.0475,362.935833,64.0475,64.0475,362.935833,64.0475,362.935833


## Bootstrap <a name="section2"></a>



In [7]:
boot_method = ReplicateWeight(method="bootstrap", stratification=False, number_reps=50)

boot_wgt = boot_method.replicate(full_sample["design_weight"], full_sample["cluster"])

boot_wgt.drop_duplicates().head(15)

Unnamed: 0,_psu,_samp_weight,_boot_wgt_1,_boot_wgt_2,_boot_wgt_3,_boot_wgt_4,_boot_wgt_5,_boot_wgt_6,_boot_wgt_7,_boot_wgt_8,...,_boot_wgt_41,_boot_wgt_42,_boot_wgt_43,_boot_wgt_44,_boot_wgt_45,_boot_wgt_46,_boot_wgt_47,_boot_wgt_48,_boot_wgt_49,_boot_wgt_50
0,7,46.166667,205.185185,0.0,51.296296,51.296296,0.0,0.0,102.592593,102.592593,...,51.296296,51.296296,51.296296,0.0,51.296296,153.888889,0.0,0.0,51.296296,51.296296
15,10,50.783333,0.0,112.851852,112.851852,56.425926,56.425926,56.425926,56.425926,0.0,...,56.425926,169.277778,0.0,56.425926,0.0,56.425926,0.0,0.0,0.0,56.425926
30,16,62.149123,0.0,0.0,0.0,0.0,69.054581,138.109162,69.054581,138.109162,...,0.0,69.054581,138.109162,69.054581,69.054581,0.0,138.109162,69.054581,69.054581,138.109162
45,24,58.940741,0.0,130.979424,0.0,196.469136,65.489712,65.489712,65.489712,130.979424,...,0.0,0.0,130.979424,0.0,0.0,130.979424,130.979424,65.489712,0.0,0.0
60,29,65.702778,73.003086,73.003086,73.003086,73.003086,146.006173,146.006173,0.0,73.003086,...,146.006173,0.0,146.006173,73.003086,73.003086,0.0,73.003086,73.003086,73.003086,0.0
75,34,75.661566,0.0,84.068407,0.0,84.068407,0.0,84.068407,84.068407,84.068407,...,0.0,84.068407,0.0,84.068407,0.0,252.205222,84.068407,252.205222,168.136814,0.0
90,45,85.398025,94.886694,189.773388,0.0,94.886694,94.886694,94.886694,0.0,0.0,...,94.886694,0.0,94.886694,189.773388,0.0,0.0,189.773388,0.0,94.886694,0.0
105,52,85.520635,95.022928,95.022928,190.045855,0.0,95.022928,0.0,95.022928,0.0,...,95.022928,95.022928,0.0,0.0,285.068783,0.0,95.022928,0.0,285.068783,285.068783
120,64,218.893889,0.0,0.0,243.215432,0.0,243.215432,243.215432,0.0,0.0,...,243.215432,243.215432,243.215432,243.215432,486.430864,0.0,0.0,486.430864,0.0,0.0
135,86,213.491667,474.425926,0.0,474.425926,237.212963,237.212963,0.0,474.425926,237.212963,...,474.425926,237.212963,0.0,474.425926,237.212963,0.0,0.0,237.212963,0.0,474.425926


## Jackknife <a name="section3"></a>



In [8]:
jkn_method = ReplicateWeight(method="jackknife", stratification=False)

jkn_wgt = jkn_method.replicate(full_sample["design_weight"], full_sample["cluster"])

jkn_wgt.head(15)

Unnamed: 0,_psu,_samp_weight,_jk_wgt_1,_jk_wgt_2,_jk_wgt_3,_jk_wgt_4,_jk_wgt_5,_jk_wgt_6,_jk_wgt_7,_jk_wgt_8,_jk_wgt_9,_jk_wgt_10
0,7,46.166667,0.0,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296
1,7,46.166667,0.0,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296
2,7,46.166667,0.0,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296
3,7,46.166667,0.0,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296
4,7,46.166667,0.0,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296
5,7,46.166667,0.0,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296
6,7,46.166667,0.0,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296
7,7,46.166667,0.0,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296
8,7,46.166667,0.0,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296
9,7,46.166667,0.0,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296,51.296296


In [9]:
jkn_method = ReplicateWeight(method="jackknife", stratification=True)

jkn_wgt = jkn_method.replicate(full_sample["design_weight"], full_sample["cluster"], full_sample["region"])

jkn_wgt.head(15)

Unnamed: 0,_stratum,_psu,_samp_weight,_jk_wgt_1,_jk_wgt_2,_jk_wgt_3,_jk_wgt_4,_jk_wgt_5,_jk_wgt_6,_jk_wgt_7,_jk_wgt_8,_jk_wgt_9,_jk_wgt_10
0,North,7,46.166667,46.166667,46.166667,46.166667,0.0,92.333333,46.166667,46.166667,46.166667,46.166667,46.166667
1,North,7,46.166667,46.166667,46.166667,46.166667,0.0,92.333333,46.166667,46.166667,46.166667,46.166667,46.166667
2,North,7,46.166667,46.166667,46.166667,46.166667,0.0,92.333333,46.166667,46.166667,46.166667,46.166667,46.166667
3,North,7,46.166667,46.166667,46.166667,46.166667,0.0,92.333333,46.166667,46.166667,46.166667,46.166667,46.166667
4,North,7,46.166667,46.166667,46.166667,46.166667,0.0,92.333333,46.166667,46.166667,46.166667,46.166667,46.166667
5,North,7,46.166667,46.166667,46.166667,46.166667,0.0,92.333333,46.166667,46.166667,46.166667,46.166667,46.166667
6,North,7,46.166667,46.166667,46.166667,46.166667,0.0,92.333333,46.166667,46.166667,46.166667,46.166667,46.166667
7,North,7,46.166667,46.166667,46.166667,46.166667,0.0,92.333333,46.166667,46.166667,46.166667,46.166667,46.166667
8,North,7,46.166667,46.166667,46.166667,46.166667,0.0,92.333333,46.166667,46.166667,46.166667,46.166667,46.166667
9,North,7,46.166667,46.166667,46.166667,46.166667,0.0,92.333333,46.166667,46.166667,46.166667,46.166667,46.166667


In [12]:
jkn_method = ReplicateWeight(method="jackknife", stratification=True)

jkn_wgt = jkn_method.replicate(
    full_sample["design_weight"], full_sample["cluster"], full_sample["region"], rep_coefs=True
)

jkn_wgt.drop_duplicates().sort_values(by="_stratum").head(15)

Unnamed: 0,_stratum,_psu,_samp_weight,_jk_wgt_1,_jk_wgt_2,_jk_wgt_3,_jk_wgt_4,_jk_wgt_5,_jk_wgt_6,_jk_wgt_7,_jk_wgt_8,_jk_wgt_9,_jk_wgt_10
75,East,34,75.661566,1.5,1.5,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
90,East,45,85.398025,1.5,0.0,1.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0
105,East,52,85.520635,0.0,1.5,1.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,North,7,46.166667,1.0,1.0,1.0,0.0,2.0,1.0,1.0,1.0,1.0,1.0
15,North,10,50.783333,1.0,1.0,1.0,2.0,0.0,1.0,1.0,1.0,1.0,1.0
30,South,16,62.149123,1.0,1.0,1.0,1.0,1.0,1.5,1.5,0.0,1.0,1.0
45,South,24,58.940741,1.0,1.0,1.0,1.0,1.0,1.5,0.0,1.5,1.0,1.0
60,South,29,65.702778,1.0,1.0,1.0,1.0,1.0,0.0,1.5,1.5,1.0,1.0
120,West,64,218.893889,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,0.0
135,West,86,213.491667,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,2.0


In [11]:
jkn_method = ReplicateWeight(method="jackknife", stratification=False)

jkn_wgt = jkn_method.replicate(
    full_sample["design_weight"], full_sample["cluster"],  rep_coefs=True
)

jkn_wgt.drop_duplicates().head(15)

Unnamed: 0,_psu,_samp_weight,_jk_wgt_1,_jk_wgt_2,_jk_wgt_3,_jk_wgt_4,_jk_wgt_5,_jk_wgt_6,_jk_wgt_7,_jk_wgt_8,_jk_wgt_9,_jk_wgt_10
0,7,46.166667,0.0,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111
15,10,50.783333,1.111111,0.0,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111
30,16,62.149123,1.111111,1.111111,0.0,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111
45,24,58.940741,1.111111,1.111111,1.111111,0.0,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111
60,29,65.702778,1.111111,1.111111,1.111111,1.111111,0.0,1.111111,1.111111,1.111111,1.111111,1.111111
75,34,75.661566,1.111111,1.111111,1.111111,1.111111,1.111111,0.0,1.111111,1.111111,1.111111,1.111111
90,45,85.398025,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111,0.0,1.111111,1.111111,1.111111
105,52,85.520635,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111,0.0,1.111111,1.111111
120,64,218.893889,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111,0.0,1.111111
135,86,213.491667,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111,1.111111,0.0


In [23]:
10/9

1.1111111111111112