In [151]:
import numpy as np
from sklearn.neural_network import MLPClassifier
import cleanlab
import matplotlib.pyplot as plt
import pandas as pd

In [152]:
def ccn_model(eta_star, beta, alpha):
    return (1 - beta - alpha)*eta_star + alpha

def eta(x):
    return .5 + .5 * np.sin(2.9*x + .1)


In [153]:
xs = np.random.uniform(-1, 1, size=10000)
py = ccn_model(eta(xs), beta=.1, alpha=.15)
y = np.random.binomial(1, py)

split_ix = int(.7*xs.size)
tr_x = xs[:split_ix].reshape(-1, 1)
tr_y = y[:split_ix]
te_x = xs[split_ix:].reshape(-1, 1)
te_y = y[split_ix:]
py_true = py[split_ix:]

# Train model and predict probability on test set 
clf = MLPClassifier(alpha=0, hidden_layer_sizes=(50, 3))
clf.fit(tr_x, tr_y)

py_val = clf.predict_proba(te_x)

py_true = np.array([
    1-py_true,
    py_true
]).T
    

In [150]:
py_val[:, 1][te_y == 1].mean()

0.6635731243747329

In [147]:

t = np.array([
    py_val[te_y == 0,0].mean(),
    py_val[te_y == 1,1].mean()
])
t


array([0.63450567, 0.66357312])

In [141]:
C = np.zeros((2,2))
Q = np.zeros((2,2))

for i in range(0, py_val.shape[0]): #samples
    count = 0
    for j in range(0, 2): #classes
        if py_val[i][j] >= t[j]:
            ys = j
            count += 1
    
    assert count < 2, 'Collision occured'
    if count > 0:
        C[te_y[i]][ys] = C[te_y[i]][ys] + 1

C_tmp = C.copy()
for i in range(0,2):
    for j in range(0,2):
        C[i][j] = C_tmp[i][j]/C_tmp[i,:].sum() * te_y[te_y==i].size

for i in range(0,2):
    for j in range(0,2):
        Q[i][j] = C[i][j]/C.sum()

beta_hat = Q[0, 1] / Q[:, 1].sum()
alpha_hat = Q[1, 0] / Q[:, 0].sum()

print(f'P[Y=0|Y*=1]={beta_hat:.2}')
print(f'P[Y=1|Y*=0]={alpha_hat:.2}')

P[Y=0|Y*=1]=0.26
P[Y=1|Y*=0]=0.22


In [52]:
# Official implementation
noisemat, _ = cleanlab.count.estimate_noise_matrices(te_x, te_y)
print(f'P[Y=0|Y*=1]={noisemat[0][1]:.2}')
print(f'P[Y=1|Y*=0]={noisemat[1][0]:.2}')


P[Y=0|Y*=1]=0.42
P[Y=1|Y*=0]=0.22
