In [90]:
from OSLPP import *
import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import Counter
import os
import pandas as pd
from torch.optim.lr_scheduler import StepLR
import pickle
from sklearn.metrics import classification_report
from sklearn.model_selection import StratifiedKFold

In [22]:
with open('small_datasets.pkl', 'rb') as f:
    config = pickle.load(f)

In [23]:
os.environ["CUDA_VISIBLE_DEVICES"] = '1'

In [24]:
def get_abbrev(source, target): return source[0].upper() + target[0].upper()
def fmeasures(d): return ' '.join([f'{k}={v*100:.2f}' for (k,v) in d.items()])

In [25]:
class FeaturesDataset(torch.utils.data.Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
    def __len__(self): return len(self.labels)
    def __getitem__(self, i): return self.features[i], self.labels[i]

In [26]:
def _create_balanced_sampler(ds:FeaturesDataset):
    freq2 = Counter(ds.labels.tolist())
    class_weight = {x: 1.0 / freq2[x] for x in freq2}
    source_weights = [class_weight[x] for x in ds.labels.tolist()]
    sampler = torch.utils.data.WeightedRandomSampler(source_weights, len(ds.labels.tolist()))
    return sampler

def _create_trn_dataloader(ds, batch_size, balanced):
    if balanced: return torch.utils.data.DataLoader(ds, batch_size=batch_size, sampler=_create_balanced_sampler(ds), drop_last=True)
    else: torch.utils.data.DataLoader(ds, batch_size=batch_size, shuffle=True, drop_last=True)

In [27]:
def train_NN(feats_S, lbls_S, num_epochs, params, balanced, lr):
    num_src_classes = params.num_common + params.num_src_priv
    assert (lbls_S.unique() == torch.arange(num_src_classes)).all()
    model = nn.Sequential(nn.Linear(params.pca_dim, params.proj_dim), nn.ReLU(), nn.Linear(params.proj_dim, num_src_classes)).cuda().train()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss().cuda()
    ds = FeaturesDataset(feats_S, lbls_S)
    dl = _create_trn_dataloader(ds, 32, balanced)
    for ep in range(num_epochs):
        for f,l in dl:
            optimizer.zero_grad()
            loss_fn(model(f.cuda()), l.cuda()).backward()
            optimizer.step()
    return model.eval()

In [28]:
def entropy_loss(logits):
    probs = F.softmax(logits, dim=1)
    entropy =  - probs * probs.log()
    entropy = entropy.sum(dim=1)
    return entropy.mean()

In [29]:
def predict_NN(model, feats, lbls):
    ds = FeaturesDataset(feats, lbls)
    dl = torch.utils.data.DataLoader(ds, batch_size=32, shuffle=False)
    model = model.eval()
    feats = []
    preds = []
    with torch.no_grad():
        for f,l in dl:
            out = model(f.cuda())
            preds.append(F.softmax(out, dim=1).detach().cpu())
            feats.append(f.detach())
    feats = torch.cat(feats, dim=0)
    preds = torch.cat(preds, dim=0)
    return feats, preds

In [85]:
def set_seed(seed:int):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

In [92]:
sources = ['painting', 'real', 'sketch']

In [93]:
results = pd.DataFrame({'source': [],  'lr': [], 'seed': [], 'epochs': [], 'report': []})
used_src = set()
# for (source, target), (common, tgt_private) in config.items():
splitter = StratifiedKFold(n_splits=5)
for source in sources:
    target = source
    if source in used_src:
        continue
    used_src.add(source)
    for epochs in [10, 50]:
        for lr in [1e-3, 1e-4]:
                set_seed(seed)
                print(source, '->', target, 'lr=', lr, 'seed=', seed)
                params = Params(pca_dim=512, proj_dim=128, T=10, n_r=1200, n_r_ratio=None,
                          dataset='DomainNet_DCC', source=source, target=target,
                          num_common=len(common), num_src_priv=0, num_tgt_priv=len(tgt_private))
                (feats_S, lbls_S), (feats_T, lbls_T) = create_datasets_sub(params.dataset, 
                                                                           params.source, 
                                                                           params.target, 
                                                                           common, 
                                                                           tgt_private)
                num_src_classes = params.num_common + params.num_src_priv

                # l2 normalization and pca
                feats_S, feats_T = do_l2_normalization(feats_S, feats_T)
                feats_S, feats_T = do_pca(feats_S, feats_T, params.pca_dim)
                feats_S, feats_T = do_l2_normalization(feats_S, feats_T)

                # initial
                feats_S, feats_T = torch.tensor(feats_S), torch.tensor(feats_T)
                lbls_S, lbls_T = torch.tensor(lbls_S), torch.tensor(lbls_T)
                
                for i, (train_idx, test_idx) in enumerate(splitter.split(feats_S, lbls_S)):
                    feats_S_train, lbls_S_train = feats_S[train_idx], lbls_S[train_idx]
                    feats_S_test, lbls_S_test = feats_S[test_idx], lbls_S[test_idx]
                    
                    model = train_initial_NN(feats_S_train, lbls_S_train, epochs, params, balanced=True, lr=lr)
                    feats_S_2, preds_S_2 = predict_NN(model, feats_S_test, lbls_S_test)
                    assert (feats_S_2 == feats_S_test).all()

                    confs, preds_labels = preds_S_2.max(dim=1)

                    np_preds = preds_labels.numpy()
                    np_labels = lbls_S_test.numpy()

                    report = classification_report(np_labels, np_preds, output_dict=True)

                    results = results.append({'source': source, 'lr': lr, 'seed': i, 'epochs': epochs, 'report': report}, ignore_index=True)

painting -> painting lr= 0.001 seed= 2
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
painting -> painting lr= 0.0001 seed= 2
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
painting -> painting lr= 0.001 seed= 2
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
painting -> painting lr= 0.0001 seed= 2
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
real -> real lr= 0.001 seed= 2
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
real -> real lr= 0.0001 seed= 2
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
real -> real lr= 

In [94]:
results.to_csv('results/dcc__source_only.csv', header=True, index=False)

In [95]:
# df = pd.read_csv('office_home_oslpp_nn_raw.csv')
# df['h_score'] = df['desc'].apply(lambda x: float(x.split()[-1].split('=')[-1]))
# import matplotlib.pyplot as plt
# for n, gr in df.groupby(['source', 'target']):
#     stat = gr.groupby(['lr', 'n_r'])['h_score'].mean()
#     exps = np.array(list(stat.index))
#     fig, ax = plt.subplots()
#     ax.scatter(exps[:, 0], exps[:, 1], c=stat.values)
#     for i, txt in enumerate(stat.values):
#         ax.annotate(round(txt, 2), (exps[i][0], exps[i][1]))
#     plt.title(n)
#     plt.xlabel('lr')
#     plt.ylabel('n_r')
#     plt.xticks(exps[:, 0])
#     plt.yticks(exps[:, 1])
#     fig.colorbar(ax.get_children()[0])
#     plt.show()

In [37]:
results['report'].iloc[0]

dict_values([{'precision': 0.9110320284697508, 'recall': 0.9884169884169884, 'f1-score': 0.9481481481481481, 'support': 259}, {'precision': 0.9859943977591037, 'recall': 0.9696969696969697, 'f1-score': 0.9777777777777777, 'support': 363}, {'precision': 0.9302325581395349, 'recall': 0.9917355371900827, 'f1-score': 0.96, 'support': 242}, {'precision': 0.9701492537313433, 'recall': 0.9701492537313433, 'f1-score': 0.9701492537313433, 'support': 201}, {'precision': 0.9087591240875912, 'recall': 0.9291044776119403, 'f1-score': 0.9188191881918818, 'support': 268}, {'precision': 0.9138461538461539, 'recall': 0.8761061946902655, 'f1-score': 0.8945783132530122, 'support': 339}, {'precision': 0.9353233830845771, 'recall': 0.8487584650112867, 'f1-score': 0.8899408284023668, 'support': 443}, {'precision': 0.8980716253443526, 'recall': 0.9314285714285714, 'f1-score': 0.9144460028050491, 'support': 350}, {'precision': 0.8838526912181303, 'recall': 0.9285714285714286, 'f1-score': 0.9056603773584906, '

In [96]:
results['f1'] = results['report'].apply(lambda all_elems: all_elems['macro avg']['f1-score'])

In [98]:
results.groupby(['source', 'lr', 'epochs'])['f1'].mean().to_frame(name = 'f1').reset_index().sort_values(by=['source', 'f1'], ascending=False)

Unnamed: 0,source,lr,epochs,f1
10,sketch,0.001,10.0,0.713864
9,sketch,0.0001,50.0,0.699836
11,sketch,0.001,50.0,0.697924
8,sketch,0.0001,10.0,0.57662
6,real,0.001,10.0,0.944632
5,real,0.0001,50.0,0.94248
7,real,0.001,50.0,0.941889
4,real,0.0001,10.0,0.897367
2,painting,0.001,10.0,0.823796
1,painting,0.0001,50.0,0.818777


In [25]:
wd.pivot(index=['lr', 'epochs', 'n_r'], columns=['source', 'target'], values='h_score')

Unnamed: 0_level_0,Unnamed: 1_level_0,source,painting,painting,real,real,sketch,sketch
Unnamed: 0_level_1,Unnamed: 1_level_1,target,sketch,real,painting,sketch,real,painting
lr,epochs,n_r,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
0.0001,10.0,0.15,51.36,81.74,66.86,56.74,70.5,54.68
0.0001,10.0,0.25,51.62,80.41,66.87,56.3,68.4,55.52
0.0001,10.0,0.35,52.04,80.18,66.26,56.23,68.32,57.55
0.0001,30.0,0.15,51.21,80.64,66.8,57.22,79.25,60.45
0.0001,30.0,0.25,51.77,80.16,66.92,57.06,77.62,62.18
0.0001,30.0,0.35,52.06,78.88,65.03,56.75,76.86,63.06
0.0001,50.0,0.15,51.37,79.92,65.41,57.13,78.66,66.8
0.0001,50.0,0.25,51.53,79.61,66.11,57.26,75.62,66.21
0.0001,50.0,0.35,51.37,78.93,65.3,56.95,74.76,65.89
0.0005,10.0,0.15,51.95,79.93,65.65,57.59,78.73,66.9


In [None]:
wd.set_index(['lr', 'epoch', 'n_r']).head(30)

In [44]:
wd = pd.read_csv('results/dcc__entropy__StepLR__50_epochs__nn_raw.csv')

In [45]:
wd['h_score'] = wd['desc'].apply(lambda x: float(x.split()[-1].split('=')[-1]))

In [46]:
wd

Unnamed: 0,source,target,desc,lr,seed,n_r,h_score
0,painting_train,real_train,cs/acc_i=59.98 cs/acc_c=60.09 os/recall_knw=71...,0.0005,0.0,0.15,54.4
1,painting_train,sketch_train,cs/acc_i=26.40 cs/acc_c=25.41 os/recall_knw=53...,0.0005,0.0,0.15,30.42
2,real_train,painting_train,cs/acc_i=39.74 cs/acc_c=39.39 os/recall_knw=65...,0.0005,0.0,0.15,40.83
3,real_train,sketch_train,cs/acc_i=24.48 cs/acc_c=23.68 os/recall_knw=51...,0.0005,0.0,0.15,29.29
4,sketch_train,painting_train,cs/acc_i=37.51 cs/acc_c=34.45 os/recall_knw=70...,0.0005,0.0,0.15,35.06
5,sketch_train,real_train,cs/acc_i=56.05 cs/acc_c=57.00 os/recall_knw=71...,0.0005,0.0,0.15,48.56
