In [1]:
import os
import matplotlib.pyplot as plt
import torch 
# import torch.nn as nn
# import IPython
from sklearn.metrics import recall_score,f1_score,precision_score, confusion_matrix
import numpy as np
from scipy.stats import gaussian_kde
# from utils import *
import torch.nn as nn
import torch.nn.functional as F
import re
import json
import collections
from itertools import islice, chain
from collections import OrderedDict, defaultdict
from typing import Optional, Callable, Tuple, Dict, Union
from torch.hub import load_state_dict_from_url

from mlxtend.plotting import plot_decision_regions


In [2]:


def load_checkpoint(model, checkpoint_path):
    if checkpoint_path and os.path.isfile(checkpoint_path):
        checkpoint = torch.load(checkpoint_path, map_location='cpu')
    else:
        checkpoint = load_state_dict_from_url(checkpoint_path, map_location='cpu')

    
    orig_state_dict = checkpoint['model']
    new_state_dict = {}
    for key, item in orig_state_dict.items():

        
        if key.startswith('module'):
            key = '.'.join(key.split('.')[1:])
        
        # TODO: better ways
        if key.startswith('fc') or key.startswith('classifier') or key.startswith('mlp') or key.startswith('head'):
            continue
            
        # check vit and interpolate
        # if isinstance(model, VisionTransformer) and 'patch_emb'

        if key == 'pos_embed':
            posemb_new = model.pos_embed.data
            posemb = item
            item = resize_pos_embed_vit(posemb, posemb_new)

        new_state_dict[key] = item 
    
    match = model.load_state_dict(new_state_dict, strict=False)
    print(match)
    return model

class ModelWrapper(object):
    def __init__(self, model):
        self.model = model
 

    def predict(self, X):
        with torch.no_grad():
            X = torch.tensor(X)
            self.model.eval()
            logits = self.model(X)['logits']
            m = nn.Softmax(dim=1)
            preds = m(logits)
            preds = preds.to('cpu').detach().numpy()
            return np.argmax(preds, axis=1)

        
class ModelWrapper_feature(object):
    def __init__(self, model):
        self.model = model
 

    def predict(self, X):
        with torch.no_grad():
            X = torch.tensor(X)
            self.model.eval()
            logits = self.model(X)['logits']
            m = nn.Softmax(dim=1)
            preds = m(logits)
            preds = preds.to('cpu').detach().numpy()
            return np.argmax(preds, axis=1)

        

### Feature

In [None]:
path_dir=os.path.join(save_dir ,save_name)

weight_log = torch.load(os.path.join(path_dir,'data_weight_log.pt'))


from tensorboard.backend.event_processing import event_accumulator
import pandas as pd
import os

scalars =['train/sup_loss', 'train/sup_loss_true', 'train/unsup_loss', 'train/total_loss', 'train/mask_ratio', 'lr', 'train/prefecth_time', \
          'train/run_time', 'eval/loss', 'eval/top-1-acc', 'eval/balanced_acc', 'eval/precision', 'eval/recall', 'eval/F1', 'lid_logits', 'lid_feat',\
          'unl_ce_loss', 'tn_logits', 'fp_logits', 'fn_logits', 'tp_logits', 'tn_feat', 'fp_feat', 'fn_feat', 'tp_feat', 'tn_ce', 'fp_ce', 'fn_ce', 'tp_ce', \
          'f1_logits', 'f1_feat', 'f1_ce'] #'feat_std','cov_loss'

def parse_tensorboard(path_event, scalars):
    """returns a dictionary of pandas dataframes for each requested scalar"""
    ea = event_accumulator.EventAccumulator(
        path_event,
        size_guidance={event_accumulator.SCALARS: 0},
    )
    _absorb_print = ea.Reload()
    scalars = ea.Tags()["scalars"]
    # make sure the scalars are in the event accumulator tags
    assert all(
        s in ea.Tags()["scalars"] for s in scalars
    ), "some scalars were not found in the event accumulator"
    return {k: pd.DataFrame(ea.Scalars(k)) for k in scalars}

event_name = os.listdir(os.path.join(path_dir,'tensorboard'))[-1]

path_event = os.path.join(path_dir,'tensorboard', event_name)

log_ = parse_tensorboard(path_event, scalars)
print(weight_log.keys(), '\n',log_.keys())

from utils import *
def clean_ratio_batch(log_,var_name):
    N = log_['lid_80/tn_{}'.format(var_name)]['value']+ log_['lid_80/fp_{}'.format(var_name)]['value']
    all_ = N + log_['lid_80/tp_{}'.format(var_name)]['value']+log_['lid_80/fn_{}'.format(var_name)]['value']
    return N/all_
def metric_smoothing(list_metrics):
    smoothing_alpha=0.9
    s_list_metrics=[]
    p_l=0
    for i in range(len(list_metrics)):
        p_l = smoothing_alpha *p_l + (1 - smoothing_alpha)* list_metrics[i]
        s_list_metrics.append(p_l/(1 - smoothing_alpha**(i+1)))
    return s_list_metrics

smoothing_alpha =0.9
def smoothing_metrics(precision_logits, recall_logits,f1_logits):
    precision_logits_smooth=[]
    recall_logits_smooth=[]
    f1_logits_smooth=[]
    smoothing_alpha=0.9
    p_l=0
    r_l=0
    f_l=0

    for i in range(len(precision_logits)):
        p_l = smoothing_alpha *p_l + (1 - smoothing_alpha)* precision_logits[i]
        precision_logits_smooth.append(p_l/(1 - smoothing_alpha**(i+1)))

        r_l = smoothing_alpha *r_l + (1 - smoothing_alpha)* recall_logits[i]
        recall_logits_smooth.append(r_l/(1 - smoothing_alpha**(i+1)))
        f_l = smoothing_alpha *f_l + (1 - smoothing_alpha)* f1_logits[i]
        f1_logits_smooth.append(r_l/(1 - smoothing_alpha**(i+1)))
        
    return precision_logits_smooth, recall_logits_smooth, f1_logits_smooth


def get_g_clean_noisy(weight_log,key):
    noise_g_logits=[]

    clean_g_logits=[]
    for i,batch_idx in enumerate(weight_log['sample_idx']):
        noise_g_logits.append([weight_log[key][i][j].item() for j,idx_ in enumerate(batch_idx) if idx_.item() not in weight_log['clean_indices']])

        clean_g_logits.append([weight_log[key][i][j].item() for j,idx_ in enumerate(batch_idx) if idx_.item() in weight_log['clean_indices'] ])
    return noise_g_logits,clean_g_logits


def cal_metric(weight_log,key):
    precision=[]
    recall=[]
    f1=[]

    # noisy label=1, clean label=0
    for i in range(len(weight_log['iteration'])):
        pred = [1 if w<0 else 0 for w in weight_log[key][i].numpy() ]
        truth =[0 if idx_.item() in weight_log['clean_indices'] else 1 for idx_ in weight_log['sample_idx'][i]]
        r = recall_score(truth,pred,average='binary')
        p = precision_score(truth,pred,average='binary')
        f1_ = f1_score(truth,pred,average='binary')
        precision.append(p)
        recall.append(r)
        f1.append(f1_)
    precision_s, recall_s, f1_s = smoothing_metrics(precision, recall,f1)
    return precision, recall, f1, precision_s, recall_s, f1_s

In [None]:

fig, axes = plt.subplots(6,2, figsize=(18,30))
ax1, ax2,ax3,ax4,ax5,ax6,ax7,ax8, ax9, ax10,ax11,ax12 = axes.ravel()

steps=log_['train/sup_loss']['step']

ax1.plot(steps, log_['train/sup_loss_weighted']['value'], label='train/sup_loss_weighted') #which is the training loss
ax1.plot(steps, log_['train/sup_loss']['value'], label='train/sup_loss')
ax1.plot(steps,log_['train/sup_loss_true']['value'], alpha=0.5,label='train/sup_loss_true')
ax1.plot(steps,log_['train/sup_loss_true_weighted']['value'], alpha=0.5,c='orange',label='train/sup_loss_true_weighted')

# ax1.plot(steps, log_['train/sup_loss_true']['value'], label='train/sup_loss_true')
# ax1.plot(steps, log_['train/sup_loss_true_weighted']['value'], label='train/sup_loss_true_weighted')

# ax1.plot(steps,log_['train/unsup_loss']['value'], label='train/unsup_loss')
ax1.set_ylabel("Loss")
ax1.set_xlabel("Iteration")
ax1.legend()
# ax1.set_ylim(0,0.28)
ax1.set_title('Training Loss')

# ax2.plot(steps,log_['train/sup_loss_true']['value'], alpha=0.5,label='train/sup_loss_true')
ax2.plot(steps, log_['train/sup_loss_weighted']['value'], label='train/sup_loss_weighted') #which is the training loss
ax2.plot(steps,log_['train/sup_loss_true_weighted']['value'], alpha=0.5,c='orange',label='train/sup_loss_true_weighted')
ax2.set_ylabel("Loss")
# ax2.set_ylim(0,0.28)

ax2.set_xlabel("Iteration")
ax2.legend()
ax2.set_title('Training Loss calculated with ground truth label')

# ax2.plot(steps,log_['train/total_loss']['value'],label= 'train/total_loss')
# ax2.set_ylabel("Loss")
# ax2.set_xlabel("Iteration")
# ax2.legend()
# ax2.set_title('Total Training Loss')


ax3.plot(steps,log_['eval/loss']['value'],label='test/loss')
ax3.set_xlabel("Iteration")
ax3.set_ylabel("Loss")
# ax3.set_ylim(0,v0.28)

ax3.legend()
ax3.set_title('Test Set - Loss')

ax4.plot(steps,log_['eval/top-1-acc']['value'] ,label='test/top-1-acc')
ax4.set_xlabel("Iteration")
ax4.set_ylabel("Acc")
ax4.legend()
ax4.set_title('Test Set - Acc')

ax5.plot(steps,log_['eval/balanced_acc']['value'],label='test/balanced_acc')
ax5.set_xlabel("Iteration")
ax5.set_ylabel("Acc")
ax5.legend()
ax5.set_title('Test Set balanced_acc')



ax6.plot(log_['train_val_acc']['step'],log_['train_val_acc']['value'],label='val/top-1-acc')
ax6.set_xlabel("Iteration")
ax6.set_ylabel("Validation Acc")
ax6.legend()
ax6.set_title('Validation Set - Acc')

ax7.plot(weight_log['unl_ce_loss'],label='val/loss')
ax7.set_xlabel("Iteration")
ax7.set_ylabel("Loss")
# ax7.set_ylim(0,0.28)

ax7.legend()
ax7.set_title("Validation Set - Loss")



ax8.plot([i.mean() for i in weight_log['labi_lid_feat_l2']],label='LID(feat) l2')
ax8.set_xlabel("Iteration")
ax8.set_ylabel("LID")
ax8.legend()
ax8.set_title('Training Set - LID')


ax9.plot(weight_log['lid_feat_l2'] ,label='Validation LID(feat) l2')
ax9.set_xlabel("Iteration")
ax9.set_ylabel("LID")
ax9.legend()
ax9.set_title('LID(feat) of Validation set')





ax11.plot(log_['train/acc_lb']['step'],log_['train/acc_lb']['value'],label='train/acc')
ax11.plot(log_['train/acc_lb_true']['step'],log_['train/acc_lb_true']['value'],label='train/acc_true')
ax11.set_xlabel("Iteration")
ax11.set_ylabel("Acc")
ax11.legend()
ax11.set_title('Training Set - Acc')


plt.show()

### raw_weights

In [None]:

if w_type=='logits' :    
    prefix = 'lid_logits_l2'
elif w_type=='feat'or w_type=='n':
    prefix = 'lid_feat_l2'
else:
    prefix = 'ce_loss'

In [None]:
# precision_logits, recall_logits, f1_logits, precision_logits_s, recall_logits_s, f1_logits_s = cal_metric(weight_log,'w_lid_{}_l2'.format(prefix))
# plt.plot(weight_log['iteration'], f1_logits, label='f1 feat l2')
# plt.plot(weight_log['iteration'], f1_logits_s, label='f1 logits feat l2')

# plt.ylabel("F1")
# plt.ylim(0,1)
# plt.legend()
# plt.title('F1 for Mislabeled Samples Identification based on Feature l2')


In [None]:
for prefix in ['lid_feat_l2']:
    print('------------------------------------------------',prefix)
    w_noise_lid_logits, w_clean_lid_logits = get_g_clean_noisy(weight_log,'w_{}'.format(prefix))
    w_all = weight_log['w_{}'.format(prefix)]
    weight_log['w_lid_l2_nor'] =[]

    for it in range(len(w_all)):
        total_meta = torch.maximum(torch.zeros(len(w_all[it])),w_all[it])
        total_meta /= torch.sum(total_meta)
        weight_log['w_lid_l2_nor'].append(total_meta)
    w_noise_lid_logits_n, w_clean_lid_logits_n=get_g_clean_noisy(weight_log,'w_lid_l2_nor')

    noise_w_avg = [np.array(i).mean() for i in w_noise_lid_logits]
    noise_w_min = [np.array(i).min() for i in w_noise_lid_logits]
    noise_w_max = [np.array(i).max() for i in w_noise_lid_logits]

    clean_w_avg = [np.array(i).mean() for i in w_clean_lid_logits]
    clean_w_max = [np.array(i).max() for i in w_clean_lid_logits]
    clean_w_min = [np.array(i).min() for i in w_clean_lid_logits]

    s=len(clean_w_avg)
    fig, axes = plt.subplots(1,2, figsize=(15,5),sharey=True)
    ax1, ax2 = axes.ravel()

    ax1.plot(clean_w_avg[:s],label='clean_w_avg')
    ax1.plot(clean_w_max[:s],label='clean_w_max')
    ax1.plot(clean_w_min[:s],label='clean_w_min')

    ax1.legend()
    ax1.set_title('Training Raw Weights -Clean examples')
    ax1.set_ylabel('Raw Weights')
    ax1.set_xlabel('iteration')

    ax2.plot(noise_w_avg[:s],label='noisy_w_avg')
    ax2.plot(noise_w_max[:s],label='noisy_w_max')
    ax2.plot(noise_w_min[:s],label='noisy_w_min')
    ax2.legend()
    ax2.set_title('Training Raw Weights -Noisy examples')
    ax1.set_ylabel('Raw Weights')
    ax1.set_xlabel('iteration')

    plt.show()
    noise_w_avg = [np.array(i).mean() for i in w_noise_lid_logits_n]
    noise_w_min = [np.array(i).min() for i in w_noise_lid_logits_n]
    noise_w_max = [np.array(i).max() for i in w_noise_lid_logits_n]

    clean_w_avg = [np.array(i).mean() for i in w_clean_lid_logits_n]
    clean_w_max = [np.array(i).max() for i in w_clean_lid_logits_n]
    clean_w_min = [np.array(i).min() for i in w_clean_lid_logits_n]

    clean_w_avg_s = metric_smoothing(clean_w_avg)
    clean_w_max_s =metric_smoothing(clean_w_max)

    noise_w_avg_s = metric_smoothing(noise_w_avg)
    noise_w_max_s =metric_smoothing(noise_w_max)




    fig, axes = plt.subplots(2,2, figsize=(15,10),sharey=True)
    ax1, ax2,ax3,ax4 = axes.ravel()

    ax1.plot(clean_w_avg[:s],label='clean_w_avg')
    ax1.plot(clean_w_max[:s],label='clean_w_max')
    # ax1.plot(clean_w_min,label='clean_w_min')

    ax1.legend()
    ax1.set_title('Training Normalized Weights -Clean examples')
    ax1.set_ylabel('Normalized Weights')
    ax1.set_xlabel('iteration')

    ax2.plot(noise_w_avg[:s],label='noisy_w_avg')
    ax2.plot(noise_w_max[:s],label='noisy_w_max')
    # ax2.plot(noise_w_min,label='noisy_w_min')
    ax2.legend()
    ax2.set_title('Training Normalized Weights -Noisy examples')
    ax1.set_ylabel('Normalized Weights')
    ax1.set_xlabel('iteration')


    ax3.plot(clean_w_avg_s[:s],label='clean_w_avg_s')
    ax3.plot(clean_w_max_s[:s],label='clean_w_max_s')
    ax3.plot(noise_w_avg_s[:s],label='noisy_w_avg_s')

    # ax1.plot(clean_w_min,label='clean_w_min')

    ax3.legend()
    ax3.set_title('Training Normalized Weights (Smooth) -Clean examples')
    ax3.set_ylabel('Normalized Weights')
    ax3.set_xlabel('iteration')

    ax4.plot(noise_w_avg_s[:s],label='noisy_w_avg_s')
    ax4.plot(noise_w_max_s[:s],label='noisy_w_max_s')
    # ax2.plot(noise_w_min,label='noisy_w_min')
    ax4.legend()
    ax4.set_title('Training Normalized Weights (Smooth) -Noisy examples')
    ax4.set_ylabel('Normalized Weights')
    ax4.set_xlabel('iteration')
    plt.show()
    plt.close()
    
    print('------------------------------------------------',prefix)
    w_noise_lid_logits, w_clean_lid_logits = get_g_clean_noisy(weight_log,'w_{}'.format(prefix))
    it_perepo= num_labels/batch_size
    s_= int(it_perepo/2)

    # iteration_l =list(range(0,100*int(it_perepo),max(s_,1)))
    iteration_l =list(range(0,200,2))

    # iteration_l=list(range(40,50))
    fig, axes = plt.subplots(8, 5, tight_layout=True,sharey=True,figsize=(15.5,21))
    n_bins=10
    for row in axes:
        for col in row:
            if iteration_l:
                i = iteration_l.pop(0)
                col.hist(w_clean_lid_logits[i], bins=n_bins, alpha =0.5,label ='clean_weight')
                col.hist(w_noise_lid_logits[i], bins=n_bins,alpha =0.5,label ='noisy_weight',color='orange')
                col.set_xlabel("weight")
                col.set_ylabel("Frequency")
                col.set_title('Iteration %i: weight '%(i),y=1.05 )

                col.legend(loc='best')
    plt.show()
    
    plt.close()
    
    iteration_l =list(range(0,200,2))
    fig, axes = plt.subplots(8, 5, tight_layout=True,sharey=True,figsize=(15.5,21))
    n_bins=10
    for row in axes:
        for col in row:
            if iteration_l:
                i = iteration_l.pop(0)
                col.hist(w_clean_lid_logits_n[i], bins=n_bins, alpha =0.5,label ='clean_weight')
                col.hist(w_noise_lid_logits_n[i], bins=n_bins,alpha =0.5,label ='noisy_weight',color='orange')
                col.set_xlabel("weight")
                col.set_ylabel("Frequency")
                col.set_title('Iteration %i: Normalized weight '%(i),y=1.05 )

                col.legend(loc='best')
    plt.show()
    
    plt.close()



## Weight Distribution

### Logits

### Further investigate the relatonship between LID(xi) and Noisy label


dict_keys(['true_label', 'train_labels', 'clean_indices', 'iteration', 'sample_idx', 'w_ce_loss', 'w_lid_logits_l2', 'w_lid_feat_l2', 'w_lid_logits_cos', 'w_lid_feat_cos', 'lid_feat_l2', 'lid_feat_cos', 'lid_logits_l2', 'lid_logits_cos', 'unl_ce_loss', 'labi_loss', 'labi_lid_logits_l2', 'labi_lid_feat_l2', 'labi_lid_logits_cos', 'labi_lid_feat_cos'])

In [None]:
noise_loss_feat,clean_loss_feat = get_g_clean_noisy(weight_log,'labi_loss')
noise_lidfl2_feat,clean_lidfl2_feat = get_g_clean_noisy(weight_log,'labi_lid_logits_l2')

In [None]:
clean_loss_avg = [np.array(i).mean()/128 for i in clean_loss_feat]
noise_loss_avg = [np.array(i).mean()/128 for i in noise_loss_feat]
clean_loss_max = [np.array(i).max()/128 for i in clean_loss_feat]
clean_loss_min = [np.array(i).min()/128 for i in clean_loss_feat]

fig, axes = plt.subplots(1,2, figsize=(11,4), tight_layout=True, sharey=True,sharex=True)
ax1, ax2 = axes.ravel()

ax1.plot(clean_loss_avg, c='green',label='clean_loss_avg') 
ax1.set_ylabel("Loss")
ax1.set_xlabel("Iteration")
ax1.legend()
# ax1.set_ylim(0,0.28)
ax1.set_title('Raw training Loss - clean')

ax2.plot(noise_loss_avg, c='blue',label='mislabeled_loss_avg') 
ax2.set_ylabel("Loss")
ax2.set_xlabel("Iteration")
ax2.legend()
ax2.set_title('Raw training Loss - mislabeled')
plt.show()



plt.plot(clean_loss_avg,label='clean_loss_avg',c='green')
plt.plot(noise_loss_avg, c='blue',label='mislabeled_loss_avg')
plt.title('Training Loss')
plt.legend(loc='best')
plt.ylabel('Loss')
plt.xlabel('iteration')
# plt.ylim(0,2)
plt.show()

## Weighed Loss


In [None]:
prefix = 'lid_feat_l2'
w_all_ = weight_log['w_{}'.format(prefix)]
# weight_log['w_lid_l2_minmax'] = []
weight_log['weighted_loss'] = []

for it in range(len(w_all_)):
    total_meta_ = (w_all_[it]-w_all_[it].min())/(w_all_[it].max()-w_all_[it].min())
    total_meta_ /= torch.sum(total_meta_)
    weight_log['weighted_loss'].append(total_meta_*weight_log['labi_loss'][it])

    
noise_weighted_loss_feat,clean_weighted_loss_feat = get_g_clean_noisy(weight_log,'weighted_loss')

clean_wloss_avg = [np.array(i).mean() for i in clean_weighted_loss_feat]
noise_wloss_avg = [np.array(i).mean() for i in noise_weighted_loss_feat]


fig, axes = plt.subplots(1,2, figsize=(11,4), tight_layout=True, sharey=True,sharex=True)
ax1, ax2 = axes.ravel()

ax1.plot(clean_wloss_avg, c='green',label='clean_weighted_loss_avg') 
ax1.set_ylabel("Loss")
ax1.set_xlabel("Iteration")
ax1.legend()
# ax1.set_ylim(0,0.28)
ax1.set_title('Weighted training Loss - clean')

ax2.plot(noise_wloss_avg, c='blue',label='mislabeled_weighted_loss_avg') 
ax2.set_ylabel("Loss")
ax2.set_xlabel("Iteration")
ax2.legend()
ax2.set_title('Weighted taining Loss - mislabeled')
plt.show()



plt.plot(clean_wloss_avg,label='clean_weighted_loss_avg',c='green')
plt.plot(noise_wloss_avg, c='blue',label='mislabeled_weighted_loss_avg')
plt.title('Weighted Training Loss')
plt.legend(loc='best')
plt.ylabel('Loss')
plt.xlabel('iteration')
# plt.ylim(0,2)
plt.show()


In [None]:
clean_lid_avg = [np.array(i).mean() for i in clean_lidfl2_feat]
noise_lid_avg = [np.array(i).mean() for i in noise_lidfl2_feat]
clean_lid_max = [np.array(i).max() for i in clean_lidfl2_feat]
clean_lid_min = [np.array(i).min() for i in clean_lidfl2_feat]

plt.plot(clean_lid_avg,label='clean_lid_avg')
plt.plot(clean_lid_max,label='clean_lid_max')
plt.plot(clean_lid_min,label='clean_lid_min')

plt.legend()
plt.title('Training LID(logits) -Clean examples')
plt.ylabel('LID')
plt.xlabel('iteration')
# plt.ylim(0,2)
plt.show()

plt.plot(noise_lid_avg,label='noise_lid_avg', c='blue')
plt.title('Training LID(logits) - Noisy examples')
plt.ylabel('LID')
# plt.xlabel('iteration')
# plt.ylim(0,2)
plt.show()


In [None]:
noise_lidfl2_feat,clean_lidfl2_feat = get_g_clean_noisy(weight_log,'labi_lid_feat_l2')
clean_lid_avg_ = [np.array(i).mean() for i in clean_lidfl2_feat]
noise_lid_avg_ = [np.array(i).mean() for i in noise_lidfl2_feat]
clean_lid_max_ = [np.array(i).max() for i in clean_lidfl2_feat]
clean_lid_min_ = [np.array(i).min() for i in clean_lidfl2_feat]

plt.plot(clean_lid_avg_,label='clean_lid_avg')
plt.plot(clean_lid_max_,label='clean_lid_max')
plt.plot(clean_lid_min_,label='clean_lid_min')

plt.legend()
plt.title('Training LID(feat) -Clean examples')
plt.ylabel('LID')
plt.xlabel('iteration')
# plt.ylim(0,2)
plt.show()

plt.plot(noise_lid_avg_,label='noise_lid_avg', c='blue')
plt.title('Training LID(feat) - Noisy examples')
plt.ylabel('LID')
# plt.xlabel('iteration')
# plt.ylim(0,2)
plt.show()


In [None]:
# weighted loss
it_perepo= num_labels/batch_size
s_= int(it_perepo/2)

iteration_l =list(range(0,100*int(it_perepo),max(s_,1)))
# iteration_l=list(range(40,50))
fig, axes = plt.subplots(8, 5, tight_layout=True,sharex=False,sharey=True,figsize=(15.5,21))
n_bins=10
for row in axes:
    for col in row:
        if iteration_l:
            i = iteration_l.pop(0)
            col.hist(clean_weighted_loss_feat[i], bins=n_bins, alpha =0.5,label ='clean')
            col.hist(noise_weighted_loss_feat[i], bins=n_bins,alpha =0.5,label ='noisy',color='orange')
            col.set_xlabel("Loss")
            col.set_ylabel("Frequency")
            col.set_title('Iteration %i: Weighted Loss '%(i),y=1.05 )

            col.legend(loc='best')
plt.show()
plt.close()


In [None]:
it_perepo= num_labels/batch_size
s_= int(it_perepo/2)

iteration_l =list(range(0,100*int(it_perepo),max(s_,1)))
# iteration_l=list(range(40,50))
fig, axes = plt.subplots(8, 5, tight_layout=True,sharex=True,sharey=True,figsize=(15.5,21))
n_bins=10
for row in axes:
    for col in row:
        if iteration_l:
            i = iteration_l.pop(0)
            col.hist(clean_loss_feat[i], bins=n_bins, alpha =0.5,label ='clean_loss')
            col.hist(noise_loss_feat[i], bins=n_bins,alpha =0.5,label ='noisy_loss',color='orange')
            col.set_xlabel("Loss")
            col.set_ylabel("Frequency")
            col.set_title('Iteration %i: Loss '%(i),y=1.05 )

            col.legend(loc='best')
plt.show()
plt.close()


In [None]:
it_perepo= num_labels/batch_size
s_= int(it_perepo/2)

iteration_l =list(range(0,100*int(it_perepo),max(s_,1)))

fig, axes = plt.subplots(8, 5, tight_layout=True,sharex=True,sharey=True,figsize=(15.5,21))

for row in axes:
    for col in row:
        if iteration_l:
            i = iteration_l.pop(0)
            col.hist(clean_lidfl2_feat[i], bins=n_bins, alpha =0.5,label ='clean_lid')
            col.hist(noise_lidfl2_feat[i], bins=n_bins,alpha =0.5,label ='noisy_lid',color='orange')
            col.set_xlabel("LID")
            col.set_ylabel("Frequency")
            col.set_title('Iteration %i: LID '%(i),y=1.05 )

            col.legend(loc='best')
plt.show()
plt.close()


In [None]:


if w_type=='feat'or w_type=='n':
    prefix = 'lid_feat_l2'
else:
    prefix = 'ce_loss'

weight_log['w_{}_{}'.format(prefix,normal_)] = []
w_all = weight_log['w_{}'.format(prefix)]

for it in range(len(w_all)):
    total_meta = (w_all[it]-w_all[it].min())/w_all[it].max()
    total_meta /= torch.sum(total_meta)
    weight_log['w_{}_{}'.format(prefix,normal_)].append(total_meta)


w_noise_lid_logits_n, w_clean_lid_logits_n=get_g_clean_noisy(weight_log,'w_{}_{}'.format(prefix,normal_))
std_ = [i.std() for i in weight_log['w_{}_{}'.format(prefix,normal_)]]


s=len(w_all)

noise_w_avg = [np.array(i).mean() for i in w_noise_lid_logits_n]
noise_w_min = [np.array(i).min() for i in w_noise_lid_logits_n]
noise_w_max = [np.array(i).max() for i in w_noise_lid_logits_n]

clean_w_avg = [np.array(i).mean() for i in w_clean_lid_logits_n]
clean_w_max = [np.array(i).max() for i in w_clean_lid_logits_n]
clean_w_min = [np.array(i).min() for i in w_clean_lid_logits_n]

clean_w_avg_s = metric_smoothing(clean_w_avg)
clean_w_max_s =metric_smoothing(clean_w_max)

noise_w_avg_s = metric_smoothing(noise_w_avg)
noise_w_max_s = metric_smoothing(noise_w_max)
std_s = metric_smoothing(std_)



fig, axes = plt.subplots(2,2, figsize=(15,10),sharey=True)
ax1, ax2,ax3,ax4 = axes.ravel()

ax1.plot(clean_w_avg[:s],label='clean_w_avg')
# ax1.plot(clean_w_max[:s],label='clean_w_max')
# ax1.plot(clean_w_min,label='clean_w_min')

ax1.legend()
ax1.set_title('Training Normalized Weights -Clean examples')
ax1.set_ylabel('Normalized Weights')
ax1.set_xlabel('iteration')

ax2.plot(noise_w_avg[:s],label='noisy_w_avg')
# ax2.plot(noise_w_max[:s],label='noisy_w_max')
# ax2.plot(noise_w_min,label='noisy_w_min')
ax2.legend()
ax2.set_title('Training Normalized Weights -Noisy examples')
ax1.set_ylabel('Normalized Weights')
ax1.set_xlabel('iteration')


ax3.plot(clean_w_avg_s[:s],label='clean_w_avg_s')
# ax3.plot(clean_w_max_s[:s],label='clean_w_max_s')
ax3.plot(noise_w_avg_s[:s],label='noisy_w_avg_s')
ax3.plot(std_s[:s],label='std')

# ax1.plot(clean_w_min,label='clean_w_min')

ax3.legend()
ax3.set_title('Training Normalized Weights (Smooth) ')
ax3.set_ylabel('Normalized Weights')
ax3.set_xlabel('iteration')


ax4.plot(clean_w_avg[:s],label='clean_w_avg')
ax4.plot(noise_w_avg[:s],label='noisy_w_avg')

# ax4.plot(noise_w_max_s[:s],label='noisy_w_max_s')
# ax2.plot(noise_w_min,label='noisy_w_min')
ax4.legend()
ax4.set_title('Training Normalized Weights (Raw)')
ax4.set_ylabel('Normalized Weights')
ax4.set_xlabel('iteration')
plt.show()
plt.close()

print('------------------------------------------------',prefix)
# w_noise_lid_logits, w_clean_lid_logits = get_g_clean_noisy(weight_log,'w_{}'.format(prefix))
it_perepo= num_labels/batch_size
s_= int(it_perepo/2)

iteration_l =list(range(0,100*int(it_perepo),max(s_,1)))
# iteration_l =list(range(0,200,2))

# iteration_l=list(range(40,50))
fig, axes = plt.subplots(8, 5, tight_layout=True,sharex=True,sharey=True,figsize=(15.5,21))
n_bins=10
for row in axes:
    for col in row:
        if iteration_l:
            i = iteration_l.pop(0)
            col.hist(w_clean_lid_logits_n[i], bins=n_bins, alpha =0.5,label ='clean_weight')
            col.hist(w_noise_lid_logits_n[i], bins=n_bins,alpha =0.5,label ='noisy_weight',color='orange')
            col.set_xlabel("weight")
            col.set_ylabel("Frequency")
            col.set_title('Iteration %i: Normalized weight '%(i),y=1.05 )

            col.legend(loc='best')
plt.show()

In [None]:
feat_std,cov_loss= weight_log['feat_std'] ,weight_log['cov_loss']
fig, axes = plt.subplots(1, 2, tight_layout=True,sharex=True,sharey=False)


ax1.plot(feat_std, c='green',label='feat_std') 
ax1.set_ylabel("feat_std")
ax1.set_xlabel("Iteration")
ax1.legend()
# ax1.set_ylim(0,0.28)
ax1.set_title('feat_std loss')

ax2.plot(cov_loss, c='blue',label='cov_loss') 
ax2.set_ylabel("cov_loss")
ax2.set_xlabel("Iteration")
ax2.legend()
ax2.set_title('cov_loss')
plt.show()