# Canned Example: sign(x)
This notebook defines our canned example which defines several labeling functions that model the sign function. (See Overleaf document for full definition of this example.)

In [37]:
import numpy as np
from snorkel.labeling import labeling_function, LFApplier, LFAnalysis
from snorkel.labeling.model.label_model import LabelModel

In [38]:
# Parameters for the analysis
N_LFS = 50
N_DPS = 10000

SEED_RNG = True
SEED = 12345

In [39]:
# Defining labels for sake of clarity
ABSTAIN = -1
NEGATIVE = 0
POSITIVE = 1

In [40]:
# Seed numpy.random for reproducible results
if SEED_RNG:
    np.random.seed(SEED)

## Define the Goal Model

In [41]:
def f_star(x):
    return 1 if x == 0 else np.sign(x)

## Define the Dataset

In [42]:
X_data = np.random.uniform(-1, 1, N_DPS)

f_star_vectorized = np.vectorize(f_star)
Y_data = f_star_vectorized(X_data)

## Define Labeling Functions

In [43]:
def f(p_f, p_A, x):    
    # Draw from Bernoulli distribution
    z_f = np.random.binomial(1, p_f, 1)[0]
    z_A = np.random.binomial(1, p_A, 1)[0]
    
    x = int( (f_star(x) * z_f - f_star(x) * (1 - z_f)) * (1 - z_A) )
    if x == 1:
        return POSITIVE
    elif x == -1:
        return NEGATIVE
    else:
        return ABSTAIN

In [44]:
# Generate several labeling functions for different values of p_f and p_A
lfs = []
for i in range(N_LFS):
    p_f = np.random.rand(1)[0]
    p_A = np.random.rand(1)[0]
    
    @labeling_function(name="LF{}({:.4f},{:.4f})".format(i, p_f, p_A))
    def lf(x):
        return f(p_f, p_A, x)
    
    lfs.append(lf)

## Apply Labeling Functions to the Data

In [45]:
applier = LFApplier(lfs)
L_data = applier.apply(X_data, progress_bar=True)

10000it [00:10, 984.63it/s]


In [46]:
LFAnalysis(L_data, lfs=lfs).lf_summary(Y_data)

Unnamed: 0,j,Polarity,Coverage,Overlaps,Conflicts,Correct,Incorrect,Emp. Acc.
"LF0(0.6227,0.0852)",0,"[0, 1]",0.3991,0.3991,0.393,1642,394,0.411426
"LF1(0.1630,0.5568)",1,"[0, 1]",0.406,0.406,0.4003,1654,399,0.407389
"LF2(0.8033,0.8255)",2,"[0, 1]",0.397,0.397,0.3899,1602,417,0.403526
"LF3(0.1417,0.3059)",3,"[0, 1]",0.3951,0.3951,0.3899,1579,390,0.399646
"LF4(0.7347,0.8167)",4,"[0, 1]",0.397,0.397,0.3904,1653,374,0.416373
"LF5(0.1439,0.2943)",5,"[0, 1]",0.4002,0.4002,0.3939,1624,379,0.405797
"LF6(0.1452,0.4308)",6,"[0, 1]",0.4042,0.4042,0.3987,1615,407,0.399555
"LF7(0.2259,0.4020)",7,"[0, 1]",0.3971,0.3971,0.3915,1605,406,0.40418
"LF8(0.2170,0.6369)",8,"[0, 1]",0.3996,0.3996,0.3936,1581,392,0.395646
"LF9(0.5933,0.5767)",9,"[0, 1]",0.3997,0.3997,0.3937,1634,403,0.408807


## Experiments

In [47]:
from snorkel.labeling.model.label_model import LabelModel
from snorkel.analysis import Scorer

label_model = LabelModel(cardinality=2)
label_model.fit(L_data)

Y_pred = label_model.predict(L_data)

scorer = Scorer(metrics=["accuracy","f1"], custom_metric_funcs={"abstain rate": 
lambda golds, preds, probs: np.sum(preds == ABSTAIN) / len(preds)})
scorer.score(Y_data, preds=Y_pred)

{'accuracy': 0.9980087614496217, 'f1': 0.9990033884791709, 'abstain rate': 0.0}

In [61]:
x = [0.9858, 0.9910, 0.9912, 0.9910, 0.9908, 0.9928, 0.9918, 0.9930, 0.9894, 0.9910]
print(np.mean(x), np.std(x))

0.99078 0.0019255129186790745
