### Weight Distribution Comparison Metrics

In [93]:
import tensorflow as tf
import torch
import torchvision
import pytorch_lightning as pl
from torch import nn
import statistics
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from math import log2
import pickle
from itertools import product

In [22]:
config = {'learning_rate': .001, 'dropout': 0.2, 'batch_size': 64, 'epochs': 25}

In [23]:
def get_tf_weights(config):
    mnist = tf.keras.datasets.mnist

    (x_train, y_train),(x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0

    model = tf.keras.models.Sequential([
      tf.keras.layers.Flatten(input_shape=(28, 28)),
      tf.keras.layers.Dense(128, activation='relu'),
      tf.keras.layers.Dropout(config['dropout']),
      tf.keras.layers.Dense(10, activation='softmax')
    ])

    opt = tf.keras.optimizers.Adam(learning_rate=config['learning_rate'])

    model.compile(optimizer=opt,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    res = model.fit(x_train, y_train, epochs=config['epochs'], batch_size=config['batch_size'])
    res_test = model.evaluate(x_test, y_test)
    just_tf_weights = list()
    # get weights
    for w in model.weights:
        just_tf_weights.extend(w.numpy().flatten())
    # scale the weights
    scaled_weights = MinMaxScaler().fit_transform(np.array(just_tf_weights).reshape(-1, 1))+1
    return scaled_weights

In [24]:
class NumberNet(pl.LightningModule):
    def __init__(self, config):
        super().__init__()
        self.model = nn.Sequential(
            nn.Flatten(), 
            nn.Linear(784, 128), 
            nn.ReLU(), 
            nn.Dropout(config['dropout']), 
            nn.Linear(128, 10)) ### no softmax because it's included in cross entropy loss
        self.criterion = nn.CrossEntropyLoss()
        self.config = config
        self.test_loss = None
    
    def train_dataloader(self):
        return torch.utils.data.DataLoader(torchvision.datasets.MNIST("~/resiliency/", train=True, 
                                                                      transform=torchvision.transforms.ToTensor(), target_transform=None, download=True), 
                                           batch_size=int(self.config['batch_size']))
    
    def test_dataloader(self):
        return torch.utils.data.DataLoader(torchvision.datasets.MNIST("~/resiliency/", train=True, 
                                                                      transform=torchvision.transforms.ToTensor(), target_transform=None, download=True), 
                                           batch_size=int(self.config['batch_size']))
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.config['learning_rate'])
        return optimizer
    
    def forward(self, x):
        return self.model(x)
    
    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        logits = self.forward(x)
        loss = self.criterion(logits, y)
        logs = {'train_loss': loss}
        return {'loss': loss}
    
    def test_step(self, test_batch, batch_idx):
        x, y = test_batch
        logits = self.forward(x)
        loss = self.criterion(logits, y)
        logs = {'test_loss': loss}
        return {'test_loss': loss, 'logs': logs}
    
    def test_epoch_end(self, outputs):
        loss = []
        for x in outputs:
            loss.append(float(x['test_loss']))
        avg_loss = statistics.mean(loss)
        tensorboard_logs = {'test_loss': avg_loss}
        self.test_loss = avg_loss
        return {'avg_test_loss': avg_loss, 'log': tensorboard_logs}

In [25]:
def get_pt_weights(config):
    model = NumberNet(config)
    trainer = pl.Trainer(max_epochs=config['epochs'])
    trainer.fit(model)
    trainer.test(model)
    pt_model_weights = list(model.parameters())
    just_pt_weights = list()
    for w in pt_model_weights:
        just_pt_weights.extend(w.detach().numpy().flatten())
    pt_weights_scaled = MinMaxScaler().fit_transform(np.array(just_pt_weights).reshape(-1, 1))+1
    return pt_weights_scaled

In [None]:
all_tf_weights = list()
for i in range(10):
    all_tf_weights.append(get_tf_weights(config))

In [27]:
mean_tf_weights = np.mean(all_tf_weights, axis=0)

In [None]:
all_pt_weights = list()
for i in range(10):
    all_pt_weights.append(get_pt_weights(config))

In [None]:
all_pt_weights

In [30]:
mean_pt_weights = np.mean(all_pt_weights, axis=0)

In [31]:
# calculate the kl divergence
def kl_divergence(p, q):
	return sum(p[i] * log2(p[i]/q[i]) for i in range(len(p)))

In [32]:
kl_divergence(mean_tf_weights, mean_pt_weights)

array([3885.5615], dtype=float32)

In [33]:
import scipy

In [68]:
scipy.special.kl_div(mean_tf_weights, mean_pt_weights).sum()

61.856396

In [58]:
scipy.stats.entropy(mean_tf_weights.flatten(), qk=mean_pt_weights.flatten(), axis=0)

0.00024667307

In [60]:
scipy.spatial.distance.jensenshannon(mean_tf_weights.flatten(), mean_pt_weights.flatten())

0.007853274003175311

In [145]:
scipy.spatial.distance.jensenshannon(np.mean(all_tf_weights, axis=1).flatten(), np.mean(all_pt_weights, axis=1).flatten())

0.006773344657721863

In [92]:
### load the top 5 and bottom 5 weight lists from the hyperspace search
top_5_tf = pickle.load(open("../top_5_config_tf_model_weights.pkl", "rb"))
top_5_pt = pickle.load(open("../top_5_config_pt_model_weights.pkl", "rb"))
bottom_5_tf = pickle.load(open("../bottom_5_config_tf_model_weights.pkl", "rb"))
bottom_5_pt = pickle.load(open("../bottom_5_config_pt_model_weights.pkl", "rb"))

In [114]:
test_tf = np.array(list(top_5_tf.values())[0])
test_tf = MinMaxScaler().fit_transform(test_tf.reshape(-1, 1))+1

In [118]:
test_pt = np.array(list(top_5_pt.values())[0])
test_pt = MinMaxScaler().fit_transform(test_pt.reshape(-1, 1))+1

In [119]:
kl_divergence(test_tf, test_pt)

array([1687.7921], dtype=float32)

In [122]:
def my_normalize(a):
    # take a list a, return an array normalized between 1 and 2
    new_a = MinMaxScaler().fit_transform(np.array(a).reshape(-1, 1))+1
    return new_a

In [137]:
# compare each top 5 pt to each top 5 tf
top5_euclidean = []
for pair in product(top_5_tf, top_5_pt):
    tf = top_5_tf[pair[0]]
    pt = top_5_pt[pair[1]]
#     normal_tf = my_normalize(tf)
#     normal_pt = my_normalize(pt)
#     top_5_both_jensenshannon.append(scipy.spatial.distance.jensenshannon(tf, pt))
#     top_5_both_entropy.append(scipy.stats.entropy(tf, pt))
#     top_5_both_mykl.append(kl_divergence(normal_tf, normal_pt))
    top5_euclidean.append(scipy.spatial.distance.euclidean(tf, pt))

In [138]:
mix1_euclidean = []
for pair in product(top_5_tf, bottom_5_pt):
    tf = top_5_tf[pair[0]]
    pt = bottom_5_pt[pair[1]]
#     normal_tf = my_normalize(tf)
#     normal_pt = my_normalize(pt)
#     top_5_both_jensenshannon.append(scipy.spatial.distance.jensenshannon(tf, pt))
#     top_5_both_entropy.append(scipy.stats.entropy(tf, pt))
#     top_5_both_mykl.append(kl_divergence(normal_tf, normal_pt))
    mix1_euclidean.append(scipy.spatial.distance.euclidean(tf, pt))

In [139]:
mix2_euclidean = []
for pair in product(bottom_5_tf, top_5_pt):
    tf = bottom_5_tf[pair[0]]
    pt = top_5_pt[pair[1]]
    mix2_euclidean.append(scipy.spatial.distance.euclidean(tf, pt))

In [140]:
bottom5_euclidean = []
for pair in product(bottom_5_tf, bottom_5_pt):
    tf = bottom_5_tf[pair[0]]
    pt = bottom_5_pt[pair[1]]
    bottom5_euclidean.append(scipy.spatial.distance.euclidean(tf, pt))

In [155]:
top5_euclidean

[248.22796630859375,
 231.326904296875,
 220.34353637695312,
 272.3984680175781,
 157.28028869628906,
 259.25592041015625,
 242.80320739746094,
 232.80638122558594,
 281.92523193359375,
 174.42401123046875,
 204.72842407226562,
 183.0611114501953,
 168.662109375,
 233.46591186523438,
 66.2406234741211,
 283.1245422363281,
 268.6000671386719,
 259.2295837402344,
 304.4197998046875,
 208.2905731201172,
 261.2315979003906,
 245.09693908691406,
 234.21742248535156,
 284.2892150878906,
 177.13597106933594]

In [162]:
scipy.spatial.distance.euclidean(top_5_tf['0.008332646839818986lr_0.3985947224532251drop_64epochs_950batch'], top_5_pt['0.008332646839818986lr_0.3985947224532251drop_64epochs_950batch'])

220.34353637695312

In [168]:
scipy.spatial.distance.euclidean(bottom_5_tf['0.09949307671494452lr_0.8847296070468049drop_68epochs_67batch'], bottom_5_pt['0.09949307671494452lr_0.8847296070468049drop_68epochs_67batch'])

233.34609985351562

In [169]:
scipy.spatial.distance.euclidean(top_5_tf['0.008332646839818986lr_0.3985947224532251drop_64epochs_950batch'], bottom_5_pt['0.09949307671494452lr_0.8847296070468049drop_68epochs_67batch'])

250.7360382080078

In [171]:
scipy.spatial.distance.euclidean(bottom_5_tf['0.09949307671494452lr_0.8847296070468049drop_68epochs_67batch'],  top_5_pt['0.008332646839818986lr_0.3985947224532251drop_64epochs_950batch'])

219.71839904785156

In [142]:
for l in [top5_euclidean, mix1_euclidean, mix2_euclidean, bottom5_euclidean]:
    print(np.array(l).mean())

228.1034323120117
257.1180706787109
255.5523388671875
265.99940185546876


In [153]:
scipy.spatial.distance.euclidean(list(top_5_tf.values())[0], list(top_5_pt.values())[0])

248.22796630859375

In [148]:
scipy.spatial.distance.euclidean(list(top_5_tf.values())[0], list(bottom_5_pt.values())[0])

250.7360382080078

In [149]:
scipy.spatial.distance.euclidean(list(bottom_5_tf.values())[0], list(top_5_pt.values())[0])

257.3309020996094

In [150]:
scipy.spatial.distance.euclidean(list(bottom_5_tf.values())[0], list(bottom_5_pt.values())[0])

245.04383850097656

In [173]:
### What about comparing within the frameworks, not cross?
print(scipy.spatial.distance.euclidean(list(top_5_tf.values())[0], list(bottom_5_tf.values())[0]))

221.55516052246094


In [174]:
print(scipy.spatial.distance.euclidean(list(top_5_pt.values())[0], list(bottom_5_pt.values())[0]))

274.5293273925781


In [104]:
# compare each bottom 5 pt to each bottom 5 tf'
bottom_5_both_jensenshannon = []
bottom_5_both_entropy = []
for pair in product(bottom_5_tf, bottom_5_pt):
    bottom_5_both_jensenshannon.append(scipy.spatial.distance.jensenshannon(bottom_5_tf[pair[0]], bottom_5_pt[pair[1]]))
    bottom_5_both_entropy.append(scipy.stats.entropy(bottom_5_tf[pair[0]], bottom_5_pt[pair[1]]))
    print(scipy.special.kl_div(bottom_5_tf[pair[0]], bottom_5_pt[pair[1]]).sum())

inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf


In [105]:
# compare each top 5 pt to each bottom 5 tf
bottom_5_both_jensenshannon = []
bottom_5_both_entropy = []
for pair in product(top_5_tf, bottom_5_pt):
    bottom_5_both_jensenshannon.append(scipy.spatial.distance.jensenshannon(top_5_tf[pair[0]], bottom_5_pt[pair[1]]))
    bottom_5_both_entropy.append(scipy.stats.entropy(top_5_tf[pair[0]], bottom_5_pt[pair[1]]))
    print(scipy.special.kl_div(top_5_tf[pair[0]], bottom_5_pt[pair[1]]).sum())

inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf
inf


In [106]:
bottom_5_both_jensenshannon

[inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf,
 inf]

In [84]:
# compare each bottom 5 pt to each bottom 5 tf

In [175]:
top_tf_weights, top_pt_weights = top_5_tf['0.008332646839818986lr_0.3985947224532251drop_64epochs_950batch'], top_5_pt['0.008332646839818986lr_0.3985947224532251drop_64epochs_950batch']

In [176]:
bottom_tf_weights, bottom_pt_weights = bottom_5_tf['0.09949307671494452lr_0.8847296070468049drop_68epochs_67batch'], bottom_5_pt['0.09949307671494452lr_0.8847296070468049drop_68epochs_67batch']