In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs, make_circles

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchdiffeq
import copy

from torch.autograd import Variable

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import numpy as np
from tqdm import tqdm
from sklearn.metrics import log_loss
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split

def bootstrap_performance(X, y, clf, n=10, test_size=.2, eps=.01):
    all_cross_entropy, all_accuracy = [], []
    for i in tqdm(range(n)):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=i)
        
        clf.set_params(random_state=i)            
        clf.fit(X_train, y_train)
        
        all_cross_entropy.append(log_loss(y_test, clf.predict_proba(X_test), eps=eps))
        all_accuracy.append(clf.score(X_test, y_test))
    return all_cross_entropy, all_accuracy


#clf = GradientBoostingClassifier(learning_rate=0.015, n_estimators=300, max_depth=6, min_samples_split=30, min_samples_leaf=16)
#X = np.load('./train_X_mobilenet_v2.npy')
#y = np.load('./train_y_mobilenet_v2.npy')

#cen, acc = bootstrap_performance(X, y, clf, n=20)
#print(f'mean cross entropy: {np.mean(cen)}')
#print(f'mean accuracy: {np.mean(acc)}')

In [3]:
def set_seed(seed=0):
    """Set one seed for reproducibility."""
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

### Create data

In [4]:
# Use Sklearn to generate a random toy dataset
#X, y = make_circles(n_samples=300, factor=0.6, noise=0.15, random_state=1)

In [6]:
X = np.load('/scratch/jialin/image-classification-sep2022/projects/weight_analysis/extracted_source/weight_stats/train_X_mobilenet_v2.npy')
y = np.load('/scratch/jialin/image-classification-sep2022/projects/weight_analysis/extracted_source/weight_stats/train_y_mobilenet_v2.npy')

In [7]:
X.shape

(96, 948)

In [8]:
def mixup_data(x, y, alpha=1.0, use_cuda=False):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

def get_lr(step, total_steps, lr_max, lr_min):
  """Compute learning rate according to cosine annealing schedule."""
  return lr_min + (lr_max - lr_min) * 0.5 * (1 + np.cos(step / total_steps * np.pi))

### Step 1: Create a torch data loader

In [9]:
torch.manual_seed(1)
np.random.seed(1)
torch.cuda.manual_seed(1)
torch.cuda.manual_seed_all(1)

In [15]:
LEARNING_RATE = 5e-4
WEIGHT_DECAY = 1e-2 #5e-3
NUMBER_EPOCHS = 800
alpha = 1.0

In [11]:
test_errors = []

In [12]:
import torch.optim as optim

In [17]:
for i in tqdm(range(10)):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=1)

    BATCH_SIZE = 5 # define batch size

    dataset = torch.utils.data.TensorDataset(
        torch.tensor(X_train, dtype=torch.float),
        torch.tensor(y_train, dtype=torch.float).reshape(-1,1))

    train_loader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE)


    dataset = torch.utils.data.TensorDataset(
        torch.tensor(X_test, dtype=torch.float),
        torch.tensor(y_test, dtype=torch.float).reshape(-1,1))

    test_loader = torch.utils.data.DataLoader(dataset, batch_size=500)



    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.fc1 = nn.Linear(948, 128) # hidden layer
            self.fc2 = nn.Linear(128, 32) # hidden layer
            self.fc4 = nn.Linear(32, 1)   # ouput layer

            self.drop = torch.nn.Dropout(p=0.15, inplace=False)
        def forward(self, x):
            x = self.drop(x)
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc4(x)
            return x


    net = Net()
    #print(net)   

    criterion = nn.BCEWithLogitsLoss() # Sigmoid layer + Binary Cross Entropy


    optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer,
        lr_lambda=lambda step: get_lr(  # pylint: disable=g-long-lambda
            step, NUMBER_EPOCHS * len(train_loader),
            1,  # lr_lambda computes multiplicative factor
            1e-8 / LEARNING_RATE))

    loss_hist = []
    for epoch in range(1, NUMBER_EPOCHS):
        for batch_idx, (inputs, targets) in enumerate(train_loader):
            net.train()
            inputs, targets_a, targets_b, lam = mixup_data(inputs, targets, alpha)
            inputs, targets_a, targets_b = map(Variable, (inputs, targets_a, targets_b))

            outputs = net(inputs)
            loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)        

            loss.backward()        
            optimizer.step()
            optimizer.zero_grad()
            scheduler.step()

        for batch_idx, (inputs, targets) in enumerate(test_loader):

            net.eval()
            outputs = net(inputs)
            loss = criterion(outputs, targets) 
            #optimizer.zero_grad()

        loss_hist.append(loss.item())
        
        if epoch == 50: print(f'Epoch: {epoch}, Loss: {loss.item()}')
        if (epoch+1) % 200 == 0: print(f'Epoch: {epoch}, Loss: {loss.item()}')

    #plt.plot(loss_hist)
    #plt.ylabel('loss', fontsize=20)
    #plt.xlabel('epoch', fontsize=20)       


    net.eval()
    preds = []
    test_targets = []
    sig = nn.Sigmoid()

    for batch_idx, (inputs, targets) in enumerate(test_loader):
        pred = net(inputs)
        pred = sig(pred)
        preds.append(pred.detach().numpy())
        test_targets.append(targets.detach().numpy())


    preds = np.asarray(preds).reshape(-1,1)
    test_targets = np.asarray(test_targets).flatten()  

    test_errors.append(log_loss(test_targets, preds, eps=.1))
    print(test_errors[-1])

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 50, Loss: 0.954047679901123
Epoch: 199, Loss: 0.8746837377548218
Epoch: 399, Loss: 0.4702509939670563
Epoch: 599, Loss: 0.44249042868614197


 10%|█         | 1/10 [00:46<06:54, 46.04s/it]

Epoch: 799, Loss: 0.4262056350708008
0.3888330549001694
Epoch: 50, Loss: 2.377702236175537
Epoch: 199, Loss: 1.196948766708374
Epoch: 399, Loss: 2.1547210216522217
Epoch: 599, Loss: 0.5175000429153442


 20%|██        | 2/10 [01:36<06:29, 48.73s/it]

Epoch: 799, Loss: 0.48510199785232544
0.49462076723575593
Epoch: 50, Loss: 0.7253899574279785
Epoch: 199, Loss: 0.5926820635795593
Epoch: 399, Loss: 0.6599323749542236
Epoch: 599, Loss: 0.6928311586380005


 30%|███       | 3/10 [02:24<05:38, 48.30s/it]

Epoch: 799, Loss: 0.6407779455184937
0.6503372725099326
Epoch: 50, Loss: 2.640005111694336
Epoch: 199, Loss: 1.2846449613571167
Epoch: 399, Loss: 0.7999675869941711
Epoch: 599, Loss: 0.8757997751235962


 40%|████      | 4/10 [03:13<04:50, 48.40s/it]

Epoch: 799, Loss: 0.7972866296768188
0.5702416054904461
Epoch: 50, Loss: 1.3878971338272095
Epoch: 199, Loss: 1.2104965448379517
Epoch: 399, Loss: 0.5020182132720947
Epoch: 599, Loss: 0.4654081463813782


 50%|█████     | 5/10 [03:58<03:57, 47.52s/it]

Epoch: 799, Loss: 0.49648746848106384
0.5025696609169245
Epoch: 50, Loss: 0.758650004863739
Epoch: 199, Loss: 0.68907630443573
Epoch: 399, Loss: 0.599901020526886
Epoch: 599, Loss: 0.6559334993362427


 60%|██████    | 6/10 [04:42<03:05, 46.31s/it]

Epoch: 799, Loss: 0.6424709558486938
0.6227991584688425
Epoch: 50, Loss: 0.7170708775520325
Epoch: 199, Loss: 0.841819167137146
Epoch: 399, Loss: 0.8811086416244507
Epoch: 599, Loss: 0.5732848048210144


 70%|███████   | 7/10 [05:25<02:15, 45.16s/it]

Epoch: 799, Loss: 0.7113984227180481
0.6708385396748782
Epoch: 50, Loss: 4.652772903442383
Epoch: 199, Loss: 0.5443645715713501
Epoch: 399, Loss: 0.5127273797988892
Epoch: 599, Loss: 0.4478883147239685


 80%|████████  | 8/10 [06:09<01:29, 44.87s/it]

Epoch: 799, Loss: 0.46080970764160156
0.47851885482668877
Epoch: 50, Loss: 1.4569380283355713
Epoch: 199, Loss: 0.929425060749054
Epoch: 399, Loss: 0.8255554437637329
Epoch: 599, Loss: 0.7831293344497681


 90%|█████████ | 9/10 [06:57<00:45, 45.74s/it]

Epoch: 799, Loss: 0.8108711242675781
0.7995184458792209
Epoch: 50, Loss: 5.258440971374512
Epoch: 199, Loss: 0.6939379572868347
Epoch: 399, Loss: 0.6022968292236328
Epoch: 599, Loss: 0.6284703016281128


100%|██████████| 10/10 [07:45<00:00, 46.51s/it]

Epoch: 799, Loss: 0.58275306224823
0.5921660248190165





In [21]:
torch.save(net.state_dict(), '/scratch/jialin/image-classification-sep2022/projects/weight_analysis/extracted_source/mbnet_classifier.pt')

In [18]:
test_errors.append(log_loss(test_targets, preds))

In [19]:
print(np.mean(test_errors))

0.46254294823641734


In [22]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(948, 128) # hidden layer
        self.fc2 = nn.Linear(128, 32) # hidden layer
        self.fc4 = nn.Linear(32, 1)   # ouput layer

        self.drop = torch.nn.Dropout(p=0.1, inplace=False)

    def forward(self, x):
        x = self.drop(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc4(x)
        return x

test_net = Net()

In [23]:
test_net.load_state_dict(torch.load('/scratch/jialin/image-classification-sep2022/projects/weight_analysis/extracted_source/mbnet_classifier.pt'))

<All keys matched successfully>

In [24]:
test_net.eval()
preds = []
test_targets = []
sig = nn.Sigmoid()

for batch_idx, (inputs, targets) in enumerate(test_loader):
    pred = net(inputs)
    pred = sig(pred)
    preds.append(pred.detach().numpy())
    test_targets.append(targets.detach().numpy())


preds = np.asarray(preds).reshape(-1,1)
test_targets = np.asarray(test_targets).flatten()  

test_errors.append(log_loss(test_targets, preds))
print(test_errors[-1])

0.4583745114534395


In [31]:
inputs.shape

torch.Size([20, 948])

In [26]:
test_targets

array([0., 1., 0., 1., 0., 1., 1., 0., 1., 0., 1., 0., 0., 0., 0., 1., 0.,
       0., 1., 1.], dtype=float32)

In [34]:
pred

tensor([[0.4906],
        [0.7241],
        [0.4544],
        [0.8758],
        [0.1423],
        [0.8272],
        [0.2249],
        [0.4057],
        [0.5117],
        [0.0467],
        [0.4976],
        [0.5859],
        [0.1714],
        [0.1997],
        [0.0274],
        [0.8140],
        [0.6003],
        [0.6562],
        [0.9998],
        [0.8612]], grad_fn=<SigmoidBackward0>)