In [None]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X, y = make_classification(
    n_samples=100_000, n_features=20, n_informative=2, n_redundant=2, random_state=42
)

train_samples = 100  # Samples used for training the models
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    shuffle=False,
    test_size=100_000 - train_samples,
)

In [None]:
import matplotlib.pyplot as plt
plt.scatter(X[y==0,4],X[y==0,3],alpha=0.1)
plt.scatter(X[y==1,4],X[y==1,3],alpha=0.1)


In [None]:
from sklearn.calibration import CalibrationDisplay
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB

# Create classifiers
lr = LogisticRegression()
gnb = GaussianNB()
rfc = RandomForestClassifier()

clf_list = [
    (lr, "Logistic"),
    (gnb, "Naive Bayes"),
    (rfc, "Random forest"),
]

for clf, name in clf_list:
    clf.fit(X_train, y_train)

In [None]:
import numpy as np
np.histogram(rfc.predict_proba(X_test)[y_test==1,1],np.linspace(0,1,21))

In [None]:
import numpy as np
np.histogram(rfc.predict_proba(X_test)[y_test==0,1],np.linspace(0,1,21))

In [None]:
from sklearn.calibration import calibration_curve
prob_pos, probs = calibration_curve(y_test, rfc.predict_proba(X_test)[:,1], n_bins=20)

plt.plot(probs,prob_pos)
plt.plot(probs[10],prob_pos[10],'r*')
plt.xlabel('Mittlere geschätzte Wahrscheinlichkeit')
plt.ylabel('Mittlere accuracy')
plt.grid()


In [None]:
plt.hist(rfc.predict_proba(X_test)[y_test==0,1],np.linspace(0,1,21), alpha=0.5, label='y==0')
plt.hist(rfc.predict_proba(X_test)[y_test==1,1],np.linspace(0,1,21), alpha=0.5,  label='y==1')
plt.legend()


In [None]:
from sklearn.calibration import CalibrationDisplay
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

fig = plt.figure(figsize=(10, 10))
gs = GridSpec(4, 2)
colors = plt.get_cmap("Dark2")

ax_calibration_curve = fig.add_subplot(gs[:2, :2])
calibration_displays = {}
markers = ["^", "v", "s", "o"]
for i, (clf, name) in enumerate(clf_list):
    
    display = CalibrationDisplay.from_estimator(
        clf,
        X_test,
        y_test,
        n_bins=20,
        name=name,
        ax=ax_calibration_curve,
        color=colors(i),
        marker=markers[i],
    )
    calibration_displays[name] = display

ax_calibration_curve.grid()
ax_calibration_curve.set_title("Calibration plots")

# Add histogram
grid_positions = [(2, 0), (2, 1), (3, 0), (3, 1)]
for i, (_, name) in enumerate(clf_list):
    row, col = grid_positions[i]
    ax = fig.add_subplot(gs[row, col])

    ax.hist(
        calibration_displays[name].y_prob,
        range=(0, 1),
        bins=20,
        label=name,
        color=colors(i),
    )
    ax.set(title=name, xlabel="Mean predicted probability", ylabel="Count")

plt.tight_layout()
plt.show()

In [None]:
import torch
nn = torch.nn

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(20, 32) 
        self.fc2 = nn.Linear(32, 32) 
        self.fc3 = nn.Linear(32, 32) 
        self.fc4 = nn.Linear(32, 2) 
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.sigmoid(self.fc1(x))
        out = self.sigmoid(self.fc2(out))
        out = nn.Dropout()(out)
        out = self.sigmoid(self.fc3(out))
        out = nn.Dropout()(out)
        out = self.sigmoid(self.fc4(out))
        return out


In [None]:
from tqdm import tqdm 
DEVICE='cpu' # set to mps for Apple M1/M2/M3, set to cuda if available, else set to cpu
# Creating a model
model = MLP().to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = torch.nn.CrossEntropyLoss()
n_iter = 8000

lossvals=[]
for it in tqdm(range(n_iter)):
    optimizer.zero_grad()
    predictions = model(torch.Tensor(X_train).to(DEVICE))
    target=torch.stack([1-torch.Tensor(y_train), torch.Tensor(y_train)],axis=1).squeeze().to(DEVICE)
    loss = loss_fn(predictions, target)
    lossvals.append(loss.detach().cpu().numpy().tolist()) 
    loss.backward()
    optimizer.step()


In [None]:
plt.plot(lossvals)

In [None]:
from sklearn.metrics import roc_curve
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score

with torch.no_grad():
    y_test_pred = model(torch.Tensor(X_test).to('cpu'))
fpr,tpr,thresholds = roc_curve(y_test, y_test_pred[:,1].cpu().detach().numpy() )

plt.plot(fpr,tpr)
plt.xlabel('False-Positive Rate (1 - Specificity)')
plt.ylabel('True-Positive Rate (Sensitivity)')
plt.show()

print('ROC AUC score: {:.3f}'.format(roc_auc_score(y_test, y_test_pred[:,1].cpu().detach().numpy())))

print("Accuracy: {:.4f}, F1 Score: {:.4f}".format(accuracy_score(y_test, y_test_pred.cpu().argmax(1)), f1_score(y_test, y_test_pred.cpu().argmax(1))))



In [None]:
display = CalibrationDisplay.from_predictions(
        y_test,
        y_test_pred.cpu().numpy()[:,1],
        n_bins=20,
        name='MLP',
    )

In [None]:
from tqdm import tqdm 
DEVICE='cpu' # set to mps for Apple M1/M2/M3, set to cuda if available, else set to cpu
# Creating a model
model = MLP().to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = torch.nn.CrossEntropyLoss()
n_iter = 2000

lossvals=[]
for it in tqdm(range(n_iter)):
    optimizer.zero_grad()
    predictions = model(torch.Tensor(X_train).to(DEVICE))
    target=torch.stack([1-torch.Tensor(y_train), torch.Tensor(y_train)],axis=1).squeeze().to(DEVICE)
    loss = loss_fn(predictions, target)
    lossvals.append(loss.detach().cpu().numpy().tolist()) 
    loss.backward()
    optimizer.step()
    if it%100==0:
        with torch.no_grad():
            y_test_pred = model(torch.Tensor(X_test).to(DEVICE))
        
        display = CalibrationDisplay.from_predictions(
                y_test,
                y_test_pred.cpu().numpy()[:,1],
                n_bins=20,
                name='MLP',
            )
        plt.title(f'N={it} iterations')
        # plt.savefig(f'calibration_{it}_it.pdf')

In [None]:
# model.parameters().abs()

In [None]:
from tqdm import tqdm 
DEVICE='cpu' # set to mps for Apple M1/M2/M3, set to cuda if available, else set to cpu
# Creating a model
model = MLP().to(DEVICE)

optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)
loss_fn = torch.nn.CrossEntropyLoss()
n_iter = 2000
alpha=0.01
lossvals=[]
for it in tqdm(range(n_iter)):
    optimizer.zero_grad()
    predictions = model(torch.Tensor(X_train).to(DEVICE))
    target=torch.stack([1-torch.Tensor(y_train), torch.Tensor(y_train)],axis=1).squeeze().to(DEVICE)
    loss = loss_fn(predictions, target) + alpha*torch.Tensor([x.abs().mean() for x in model.parameters()]).mean()
    lossvals.append(loss.detach().cpu().numpy().tolist()) 
    loss.backward()
    optimizer.step()
    if it%100==0:
        with torch.no_grad():
            y_test_pred = model(torch.Tensor(X_test).to(DEVICE))
        
        display = CalibrationDisplay.from_predictions(
                y_test,
                y_test_pred.cpu().numpy()[:,1],
                n_bins=20,
                name='MLP',
            )
        plt.title(f'N={it} iterations')
        # plt.savefig(f'calibration_l1_{it}_it.pdf')

In [None]:
import torch.nn.functional as F
class LogitNormLoss(nn.Module):

    def __init__(self, device, t=1.0):
        super(LogitNormLoss, self).__init__()
        self.device = device
        self.t = t

    def forward(self, x, target):
        norms = torch.norm(x, p=2, dim=-1, keepdim=True) + 1e-7
        logit_norm = torch.div(x, norms) / self.t
        return F.cross_entropy(logit_norm, target)

In [None]:
from tqdm import tqdm 
DEVICE='cpu' # set to mps for Apple M1/M2/M3, set to cuda if available, else set to cpu
# Creating a model
model = MLP().to(DEVICE)

optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)
loss_fn = LogitNormLoss(device=DEVICE)
n_iter = 2000
alpha=0.01
lossvals=[]
for it in tqdm(range(n_iter)):
    optimizer.zero_grad()
    predictions = model(torch.Tensor(X_train).to(DEVICE))
    target=torch.stack([1-torch.Tensor(y_train), torch.Tensor(y_train)],axis=1).squeeze().to(DEVICE)
    loss = loss_fn(predictions, target) 
    lossvals.append(loss.detach().cpu().numpy().tolist()) 
    loss.backward()
    optimizer.step()
    if it%100==0:
        with torch.no_grad():
            y_test_pred = model(torch.Tensor(X_test).to(DEVICE))
        
        display = CalibrationDisplay.from_predictions(
                y_test,
                y_test_pred.cpu().numpy()[:,1],
                n_bins=20,
                name='MLP',
            )
        plt.title(f'N={it} iterations')
        # plt.savefig(f'calibration_softlogits_{it}_it.pdf')

In [None]:
class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, epsilon: float = 0.1):
        super().__init__()
        self.epsilon = epsilon

    def forward(self, preds, target):
        n = preds.size()[-1]
        target_new = (1-self.epsilon)*target + self.epsilon/n
        return F.cross_entropy(preds, target_new)

l= LabelSmoothingCrossEntropy()

In [None]:
from tqdm import tqdm 
DEVICE='cpu' # set to mps for Apple M1/M2/M3, set to cuda if available, else set to cpu
# Creating a model
model = MLP().to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
#loss_fn = torch.nn.CrossEntropyLoss(label_smoothing=0.2)
loss_fn = LabelSmoothingCrossEntropy(epsilon=0.01)
n_iter = 2000
alpha=0.01
lossvals=[]
for it in tqdm(range(n_iter)):
    optimizer.zero_grad()
    predictions = model(torch.Tensor(X_train).to(DEVICE))
    target=torch.stack([1-torch.Tensor(y_train), torch.Tensor(y_train)],axis=1).squeeze().to(DEVICE)
    loss = loss_fn(predictions, target) 
    lossvals.append(loss.detach().cpu().numpy().tolist()) 
    loss.backward()
    optimizer.step()
    if it%100==0:
        with torch.no_grad():
            y_test_pred = model(torch.Tensor(X_test).to(DEVICE))
        
        display = CalibrationDisplay.from_predictions(
                y_test,
                y_test_pred.cpu().numpy()[:,1],
                n_bins=20,
                name='MLP',
            )
        plt.title(f'N={it} iterations')
        # plt.savefig(f'calibration_labelsmoothing_{it}_it.pdf')

In [None]:
from torchvision.ops.focal_loss import sigmoid_focal_loss

In [None]:
from tqdm import tqdm 
DEVICE='cpu' # set to mps for Apple M1/M2/M3, set to cuda if available, else set to cpu
# Creating a model
model = MLP().to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = torch.nn.CrossEntropyLoss()
n_iter = 2000
alpha=0.01
lossvals=[]
lossvals_val=[]
for it in tqdm(range(n_iter)):
    optimizer.zero_grad()
    predictions = model(torch.Tensor(X_train).to(DEVICE))
    target=torch.stack([1-torch.Tensor(y_train), torch.Tensor(y_train)],axis=1).squeeze().to(DEVICE)
    loss = loss_fn(predictions, target) 
    lossvals.append(loss.detach().cpu().numpy().tolist()) 
    loss.backward()
    optimizer.step()
    if it%50==0:
        with torch.no_grad():
            y_test_pred = model(torch.Tensor(X_test).to(DEVICE))
        
            loss = loss_fn(y_test_pred, torch.stack([1-torch.Tensor(y_test), torch.Tensor(y_test)],axis=1).squeeze().to(DEVICE)) 
            lossvals_val.append(loss.detach().cpu().numpy().tolist())
#        plt.title(f'N={it} iterations')
#        plt.savefig(f'calibration_labelsmoothing_{it}_it.pdf')

In [None]:
plt.plot(lossvals,label='train')
plt.plot(np.arange(0,2000,50),lossvals_val, label='val')
plt.legend()


## MixUp

In [None]:
from sklearn.calibration import CalibrationDisplay
from torchvision.transforms import v2
mixup=v2.MixUp(alpha=2.0, num_classes=2)


from tqdm import tqdm 
DEVICE='cpu' # set to mps for Apple M1/M2/M3, set to cuda if available, else set to cpu
# Creating a model
model = MLP().to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = torch.nn.CrossEntropyLoss()
n_iter = 2000
alpha=0.01
lossvals=[]
lossvals_val=[]
for it in tqdm(range(n_iter)):
    X_train_mixup, y_train_mixup = mixup(torch.Tensor(X_train).view(-1,1,1,20), torch.Tensor(y_train).long())
    X_train_mixup = X_train_mixup.view(-1,20)
    optimizer.zero_grad()
    predictions = model(X_train_mixup.to(DEVICE))
    target=y_train_mixup.squeeze().to(DEVICE)
    loss = loss_fn(predictions, target) 
    lossvals.append(loss.detach().cpu().numpy().tolist()) 
    loss.backward()
    optimizer.step()
    if it%50==0:
        with torch.no_grad():
            y_test_pred = model(torch.Tensor(X_test).to(DEVICE))
        
            loss = loss_fn(y_test_pred, torch.stack([1-torch.Tensor(y_test), torch.Tensor(y_test)],axis=1).squeeze().to(DEVICE)) 
            lossvals_val.append(loss.detach().cpu().numpy().tolist())
        display = CalibrationDisplay.from_predictions(
                y_test,
                y_test_pred.cpu().numpy()[:,1],
                n_bins=20,
                name='MLP',
            )
        plt.title(f'N={it} iterations')
        # plt.savefig(f'calibration_mixup_alpha_2_{it}_it.pdf')

In [None]:
from sklearn.calibration import CalibrationDisplay
from torchvision.transforms import v2
mixup=v2.MixUp(alpha=1.0, num_classes=2)


from tqdm import tqdm 
DEVICE='cpu' # set to mps for Apple M1/M2/M3, set to cuda if available, else set to cpu
# Creating a model
model = MLP().to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = torch.nn.CrossEntropyLoss()
n_iter = 2000
alpha=0.01
lossvals=[]
lossvals_val=[]
for it in tqdm(range(n_iter)):
    X_train_mixup, y_train_mixup = mixup(torch.Tensor(X_train).view(-1,1,1,20), torch.Tensor(y_train).long())
    X_train_mixup = X_train_mixup.view(-1,20)
    optimizer.zero_grad()
    predictions = model(X_train_mixup.to(DEVICE))
    target=y_train_mixup.squeeze().to(DEVICE)
    loss = loss_fn(predictions, target) 
    lossvals.append(loss.detach().cpu().numpy().tolist()) 
    loss.backward()
    optimizer.step()
    if it%100==0:
        with torch.no_grad():
            y_test_pred = model(torch.Tensor(X_test).to(DEVICE))
        
            loss = loss_fn(y_test_pred, torch.stack([1-torch.Tensor(y_test), torch.Tensor(y_test)],axis=1).squeeze().to(DEVICE)) 
            lossvals_val.append(loss.detach().cpu().numpy().tolist())
        display = CalibrationDisplay.from_predictions(
                y_test,
                y_test_pred.cpu().numpy()[:,1],
                n_bins=20,
                name='MLP',
            )
        plt.title(f'N={it} iterations')
        plt.savefig(f'calibration_mixup_alpha_1_{it}_it.pdf')

In [None]:
mixup=v2.MixUp(alpha=1.0, num_classes=2)
y_train_vals = []
for k in range(1000):
    y_train_vals += y_train.tolist()
#plt.hist(y_train_mixup[:,0],20,alpha=0.5, label='class 0')
#plt.hist(y_train_mixup[:,1],20,alpha=0.5, label='class 1')
plt.hist(y_train_vals,20)


In [None]:
mixup=v2.MixUp(alpha=2.0, num_classes=2)
y_train_vals = []
for k in range(1000):
    X_train_mixup, y_train_mixup = mixup(torch.Tensor(X_train).view(-1,1,1,20), torch.Tensor(y_train).long())
    y_train_vals += y_train_mixup[:,0].cpu().tolist()
#plt.hist(y_train_mixup[:,0],20,alpha=0.5, label='class 0')
#plt.hist(y_train_mixup[:,1],20,alpha=0.5, label='class 1')
plt.hist(y_train_vals,20)


In [None]:
X_train_mixup, y_train_mixup = mixup(torch.Tensor(X_train).view(-1,1,1,20), torch.Tensor(y_train).long())
X_train_mixup = X_train_mixup.view(-1,20)
plt.hist(y_train_mixup[:,0],20,alpha=0.5, label='class 0')
plt.hist(y_train_mixup[:,1],20,alpha=0.5, label='class 1')
