In [1]:
import tensorflow as tf 
import os
from tensorflow import keras
import numpy as np
# importing the operator module to do some comparisons
import operator
import datetime
import random


In [17]:
# doing the loading of the tensorboard
#%load_ext tensorboard
%reload_ext tensorboard

In [2]:
def create_model(useTensorboard=False):
    """
    The function to create a model
    """
    inputs = keras.layers.Input(shape=(784,))
    t = keras.layers.Dense(512, activation="relu", )(inputs)
    t = keras.layers.Dropout(.2)(t)
    out = keras.layers.Dense(10, )(t)

    model = keras.Model(inputs=inputs, outputs=out)

   
    # doing the compilation of the model
    model.compile(optimizer='adam',
                loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
    return model


In [20]:
# loading the model
model = create_model()

In [21]:
# getting the path
path = os.path.join(os.path.curdir, "..", "checkPoints/cp.ckpt")
path

'.\\..\\checkPoints/cp.ckpt'

In [22]:
model.load_weights(path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x255ce3cab20>

In [23]:
# showing the summary of the model
model.summary()

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               401920    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                5130      
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________


In [18]:
# printing the weights of the layers
counter = 0
for layer in model.layers:
    print(f"The shape of what is returned with the weigths is ---- {len(layer.weights)}")
    print(f"The type is --- {type(layer.weights)}")
    if len(layer.weights) > 0:
        print(f"The type for each item in the list is: {type(layer.weights[0])}")
    print(f"These are the weights for the {layer.name} layer --- {layer.weights}" )
    counter += 1
    

The shape of what is returned with the weigths is ---- 0
The type is --- <class 'list'>
These are the weights for the input_1 layer --- []
The shape of what is returned with the weigths is ---- 2
The type is --- <class 'list'>
The type for each item in the list is: <class 'tensorflow.python.ops.resource_variable_ops.ResourceVariable'>
These are the weights for the dense layer --- [<tf.Variable 'dense/kernel:0' shape=(784, 512) dtype=float32, numpy=
array([[-0.05626954, -0.05369684, -0.06167276, ..., -0.001957  ,
         0.00331007,  0.05483547],
       [-0.049044  , -0.04101725,  0.04828054, ...,  0.0366065 ,
         0.00066533,  0.05718373],
       [ 0.00024945,  0.05019872, -0.0286467 , ...,  0.05689124,
        -0.01853205,  0.05268286],
       ...,
       [-0.02224023, -0.06074744,  0.06550466, ..., -0.06264395,
        -0.02794588, -0.01869341],
       [ 0.03836417, -0.06636625, -0.0272741 , ...,  0.06481206,
         0.06491137,  0.03532643],
       [-0.02355461, -0.01153382,  

In [3]:
# This is the funtion that will create the bounds from where the  first indices
# within a data window can be.  Then with a random a value will be chosen 
# from the possible data indices
def begin_all_in_window(dataLength, all_in_per_data_window:int, data_window_size:int, start_index_of_data_window:int):
    # The window end is not included in the window
    windowEnd = start_index_of_data_window + data_window_size
    # This is to make sure that it doesn't overstep the bounds per window
    if windowEnd > dataLength:
        # need to alter the amount of all_in_that can be used
        windowEnd = dataLength 
        if all_in_per_data_window > (windowEnd - start_index_of_data_window ):
            # need to change the size of the all_in_per_data_window
            all_in_per_data_window = windowEnd - start_index_of_data_window 

    end_bound = windowEnd - all_in_per_data_window
    choice = random.randint(start_index_of_data_window, end_bound)
    return choice


# This is the function that will get the beginning index of the next data window
# if there is no more data windows will return false
def get_next_data_window_index(dataSize:int, current_begin_window_index:int, data_window_size:int):
    new_index = current_begin_window_index + data_window_size
    if new_index >= dataSize or new_index + data_window_size >= dataSize:
        return False
    return new_index
    


# This is the function that will return the indices of the data
# that is in all of the chunks of data.
def get_in_all_chunks_indices(data, all_in_size:int, num_chunks_estimate:int, chunk_size:int):
    indices_list = []
    # checking to see if the data is a tuple
    if isinstance(data, tuple):
        # Will only look at one but the indices can be used
        # for both data and the data_lables
        data = data[0]
    # Will go through the data by quarters
    # from each quarter will grab 2 1/8th of the all_in size
    data_window_size = int(len(data) /8)
    # getting size of 1/8th of the all_in_size
    all_in_per_data_window = int(all_in_size/8)
    start_index_of_data_window = 0
    # doing the loop that will get the indices
    while True:
        begin_all_in = begin_all_in_window(len(data), all_in_per_data_window, data_window_size, 
                                            start_index_of_data_window)
        end = begin_all_in + all_in_per_data_window
        # indices_list will contain a tuple of the begin and the end and the range between the two
        # indices_list.append((begin_all_in, end, list(range(begin_all_in, end + 1))))
        indices_to_add = list(range(begin_all_in, end + 1))
        indices_list += indices_to_add

        # moving to the next data window
        start_index_of_data_window = get_next_data_window_index(len(data), 
                                                start_index_of_data_window, data_window_size)
        if not start_index_of_data_window:
            # breaking out if it is false
            break
    #TODO will make a set and a list that is returned
    return indices_list


# getting the data_chunk size
def get_data_chunk_size(data_size:int, chunk_size:float, in_all:float):
    chunked_size = int(data_size * chunk_size)
    in_all =int(chunked_size * in_all)
    original_data_per_chunk = chunked_size - in_all
    # finding the number of chunck estimated to make
    num_chunks_estimate = int((data_size - in_all)/ original_data_per_chunk)
    return original_data_per_chunk, chunked_size, in_all, num_chunks_estimate

# making the indices list
def makeIndexList(chunkStart:int, windowSize:int,                                         all_in_indices_list:list, original_data_per_chunk:int):
    indexList = []
    created_window_size = 0
    if 
    # adding the first index
    indices_list.append(chunkStart)
    created_window_size += 1
    i_val = 0

    # check if the 
    for i_val, theTuple in enumerate(all_in_indices_list):
        # going through the list of the indexes
        begin, end, index_vals = theTuple
        if chunkStart >= begin and chunkStart <= end and created_window_size < windowSize:
            # if in here fill with the in_all indexes


# This is the function that will make the data_chunks
def make_data_chunks(data_length:int, all_in_indices_list:list, orginal_data_size:int,
                        chunked_window_size:int):
    original_portion_window_size = 0
    window_index = 0
    chunk_size = 0
    list_of_chunk_indexes = []
    index_pos_choice = 0
    # index list
    indexList = []
    chunk_indexes = []
    # if isinstance(data, np.ndarray):
    while     
    
    for i in range(len(indices_list)):
        begin, end , index_val = indices_list[i]
        # if the index is less than begin
        # then will add upto the begin
        # and then contiue after the end
        if index_pos_choice == begin:
            # build some of the chunk
            chunk_indexes += index_val
            index_pos_choice = end + 1

        # now building the index list from the 
        # data not in the in_all indexes
        if not orginalFilled:
            for j in range(index_pos_choice, data_length):
                if original_portion_window_size >= orginal_data_size:
                    originalFilled = True
                    window_index = index_pos_choice + 1  # TODO making sure this works
                    break
                if index_pos_choice != begin:
                    # checking to see if we need to check the next batch of in_all indices
                    if i < len(indices_list) -1:
                        if index_pos_choice == indices_list[i+1][0]:
                            orginalFilled = False
                            break # breaking out of this loop
                    # adding one index at a time
                    chunk_indexes += [j]
                    index_pos_choice += 1
                    original_portion_window_size += 1
    # adding this chunk index to the list of chunk indexes
    list_of_chunk_indexes.append(chunk_indexes)

                    

    indexList, window_index = makeIndexList(chunkStart= window_index, windowSize=chunked_window_size,                                         all_in_indices_list=all_in_indices_list,                       original_data_per_chunk=orginal_data_size)
    # need to build one of the chunks

            

# This is the function that will be used to get the data but have it so that there is 
# some of the data that is mixed in all of the data
def chunk_shuffle(data, data_chunk_size=None, in_all=None ):
    """
    This is the function that will get the data as chunks and having some 
    of the data found in each of the chunks.

    data:   The data is the data passed into the function. Not a tuple

    data_chunk_size:    This is the size of the data chunk that the function will try to return
                        It is not guaranteed to get the exact amount of chunk size depending on the size 
                        of the data that is passed in the function. Data_chunk_size is a percentage or 
                        float that will be passed in.  For example if .8 would mean that each chunk_size                            will be 80% of the total data.

    in_all:             This is the parameter that if passed in will have some of the data that is found in                         all of data chunks.  A float is expected as the variable. This float is as                                  percentage .8 means that each of the chunks will have 80% of the data found in each                         of the data_chunks.
                        If not passed in then there will be no amount overlapping between data_chunks

    Returns:            Will return a list of data_chunks
    """
    if data_chunk_size == None:
        raise Exception("You need to pass in a float value for the data_chunk size")

    data_length = None

    if isinstance(data, tuple):
        # will need to pass in to the get_data_chunk_size not a tuple
        data_length = data[0]
     
    
    # getting the sizes used in the making of the chunks
    original_data_per_window_size, chunked_window_size, in_all_size, num_chunks_estimate = get_data_chunk_size(data_length,                                                                                   data_chunk_size,  in_all)
    # getting the random data that is spread through all the data chunks
    # will return a list of tuples, where each tuple has the start and the end
    # indices for some of the data that is in all the chunks
    # This function will check if the data is a tuple, if it is then all the data uses
    # the same indices
    in_all_indices_list = get_in_all_chunks_indices(data, in_all_size, num_chunks_estimate)

    # making the data chunks
    # need to make the original_data_size
    # chunkStart:int, windowSize:int, all_in_indices_list:list,                             original_data_per_chunk:int
    make_data_chunks(data_lenth, in_all_indices_list, orginal_data_size, chunked_window_size)


# getting the data for another block  that is different from the one that the modle above is trained with.
def load_images():
    training, testing = keras.datasets.mnist.load_data()
    # Each of these are tuples that contain ndarrays

    return training, testing




def reshape_data(data, start_index:int, end_index:int ):
    images = None
    labels = None
    if isinstance(data, tuple):
        images, labels = data
    
    # made the slice to pull from the opposite end
    images = images[start_index: end_index].reshape(-1, (28 * 28))
    labels = labels[start_index: end_index]
    
    images = images/255.0
    
    return images, labels

In [10]:
# now creating a model
# looking at the shape of the train 
train_images.shape, train_labels.shape

((1000, 784), (1000,))

In [11]:
model2 = create_model()

In [12]:
model2.fit(x=train_images, y=train_labels, batch_size=128, epochs=10, 
            validation_data=(test_images, test_labels))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x255cdd512e0>

In [15]:
# getting the weights for this model
the_weights = model2.weights
type(the_weights), type(the_weights[0])

(list, tensorflow.python.ops.resource_variable_ops.ResourceVariable)

In [16]:
the_weights[0]


<tf.Variable 'dense/kernel:0' shape=(784, 512) dtype=float32, numpy=
array([[ 2.0674616e-02,  3.2345042e-02, -7.7739358e-05, ...,
        -6.2627956e-02, -3.8801931e-02, -1.3471089e-02],
       [ 2.7597107e-02,  5.6585222e-02,  1.0034814e-03, ...,
         3.5107583e-03, -3.3186216e-02, -1.6239803e-02],
       [ 2.3986742e-02,  2.8475918e-02,  1.3272785e-02, ...,
        -1.3933085e-02,  1.9999221e-03,  1.2072645e-02],
       ...,
       [ 8.9056566e-03,  3.4107566e-02, -5.5254426e-02, ...,
        -5.1518083e-03,  1.5794642e-02,  3.3432893e-02],
       [-2.1421652e-02,  2.4194144e-02,  2.5040790e-02, ...,
        -2.1030013e-02,  6.0876384e-02, -6.0057208e-02],
       [-3.4105852e-03,  2.0341501e-03,  6.6730469e-02, ...,
        -4.5968406e-02,  2.5523305e-02,  5.9543997e-02]], dtype=float32)>

In [19]:
# getting the weights of the model
w = the_weights[0].numpy()
type(w), w.shape

(numpy.ndarray, (784, 512))

In [25]:
# getting the weights of the first model above
first_w = model.weights[0].numpy()
type(first_w), w.shape

(numpy.ndarray, (784, 512))

In [35]:
# getting the average of the two weights
avg_wt = (w + first_w)/2
type(avg_wt)

numpy.ndarray

In [34]:
# getting the item of the tensorflow variables
the_var_weights = model2.get_weights()
print(type(the_var_weights),  type(the_var_weights[0]), len(the_var_weights))
print(f"-first is {the_var_weights[0].shape}, second {the_var_weights[1].shape}, third {the_var_weights[2].shape}, 4th is {the_var_weights[3].shape}")


<class 'list'> <class 'numpy.ndarray'> 4
-first is (784, 512), second (512,), third (512, 10), 4th is (10,)


In [4]:
# This is the function that will give the loss or the accuracy in a list
def get_loss_or_acc(historyList:list, loss=None, acc=None):
    """
    param:  Loss should be  the type of loss (string) that is found in the dictionary of history
            Acc if not None should be a string of the name that person wants to get from the history
            can only have either the loss or the acc passed in.

    Returns:    Will return a list of the loss or the acc in the order that the histories are passed in.
                Returns the last value in the history list.
    """
    if loss == None and acc == None:
        raise Exception ("Need to have at either loss or acc not be None")
    value_list = []
    item  = loss
    if acc:
        item = acc
    
    for h in historyList:
        value_list.append(h.history[item][-1])
    return value_list


# This function will find the average using the loss or the acc 
# If using the loss the best one is the one with the least loss 
# If using the acc then the best one is the one with the highest acc
def get_avg_with_metric(listArr:list, amount:float, loss=None, acc=None ):
    best_val = None
    best_arr = 0
    metric = None

    # this is the number to divide by to get the average
    divide_for_avg = 0
    # array that will hold all the values and will hold the end result of the avg 
    # array
    avg_arr = np.zeros(shape=listArr[0].shape)

    # the amount will be if you want it to be by the tenth, hundreth or the thousandth
    # for example .1 is tenth, .01 hundreth, .001 thousandth
    multiplier =int(1 / amount)
    # this is used to do the number of loops for adding each array expect the best array
    loop_num = 0
    # TODO need to fix this here using the values
    if loss != None:
        # loss must be a list
        # need to find the lowest loss
        comp = operator.lt
        metric = loss
        # setting to a high nunber for the loss to
        # be able to find something that is lower than this
        best_val = 1000 
    else:
        comp = operator.gt
        metric = acc
        best_val = 0
    # doing the looping find the array that is the best
    for i, val in enumerate(metric):
        if comp(val, best_val):
            best_val = val
            best_arr = i
    # adding the correct amount to each of the array
    for i, arr in enumerate(listArr):
        if i == best_arr:
            # doing the best one into the avg_arr
            divide_for_avg += multiplier
            for _ in range(multiplier):
                avg_arr += arr
        else:
            if loss != None:
                # doing a loss
                loop_num = round(((best_val/loss[i]) * multiplier))
            else:
                loop_num = round(((acc[i]/best_val) * multiplier))
            divide_for_avg += loop_num
            # doing the looping and adding the array value to a
            for _ in range(loop_num):
                avg_arr += arr
    # will now divide by the number to get the average
    avg_arr = avg_arr/divide_for_avg
    return avg_arr
    

# This makes a list of the numpy array at the correct levl
def  makeList(allWeights, level:int):
    theList = []
    for i in range(len(allWeights)):
        theList.append(allWeights[i][level])
    return theList


def create_weight_avg(allWeights:list, loss=None, acc=None, amount=None):
    """
    Function to create a average of the weights.

    If we want to make the averages based on the loss we put a list of the losses 
    which will correspond to the weights.  If we want it based on the accuracy, 
    then we will put in a list of the accuracies for each of the weights.

    Amount is used when doing loss or accuracy.  It is the amount of accuracy or loss precision.
    can be .1, .01, .001 ect.

    Returns:  will return the new list of the weights which can then be used to set the weights 
        of the model.
    """
    # list of the numpy
    numpyList = []
    
    # doing a loop
    for i in range(len(allWeights[0])):
        # making the val a numpy array
        val = np.zeros(allWeights[0][i].shape)
        # outher loop doing the number of the numpy arrays in each list  in the list
        for j in range(len(allWeights)):
            if loss != None or acc != None:
                npList = makeList(allWeights, level=i)  
                # calling the function to get the avg val
                val = get_avg_with_metric(loss=loss, acc=acc, listArr=npList, amount= amount)
            else:        
                val += allWeights[j][i]
        if loss != None or acc != None:        
            # finding the average of the weights of one layer
            val = val/len(allWeights)
        # putting the val numpy array into the list
        numpyList.append(val)
    return numpyList


In [8]:
# getting the data and then will reshape the data
train, test = load_images()
# reshaping the data
end = len(train)
train_images1, train_labels1 = reshape_data(train,start_index=-1000, end_index=len(train[0]))
test_images1, test_labels1 = reshape_data(test, start_index=-1000, end_index=len(test[0]))

train_images2 , train_labels2 = reshape_data(train, start_index=0, end_index=1000)
test_images2, test_labels2 = reshape_data(test, start_index=0, end_index=1000)

train_images3 , train_labels3 = reshape_data(train, start_index=3000, end_index=4000)
test_images3, test_labels3 = reshape_data(test, start_index=3000, end_index=4000)


In [9]:
# checking the shapes of all the items
train_images1.shape, train_images2.shape, train_images3.shape, test_images1.shape, test_images3.shape

((1000, 784), (1000, 784), (1000, 784), (1000, 784), (1000, 784))

In [10]:
# looking at the test data
test_images1.shape, test_images2.shape, test_images3.shape

((1000, 784), (1000, 784), (1000, 784))

In [11]:
# now building all the models
model1 = create_model()
model2 = create_model()
model3 = create_model()

In [12]:
# now doing the trainig of each of the models
h1 = model1.fit(x=train_images1, y=train_labels1, batch_size=128, epochs=10,
            validation_data=(test_images1, test_labels1))
            

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [13]:
h2 = model2.fit(x=train_images2, y=train_labels2, batch_size=128, epochs=10,
            validation_data=(test_images2, test_labels2))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
h3 = model3.fit(x=train_images3, y=train_labels3, batch_size=128, epochs=10, 
                        validation_data=(test_images3, test_labels3))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [28]:
histList = get_loss_or_acc([h1, h2, h3], loss="val_loss")
histList2 = get_loss_or_acc([h1,h2,h3], acc="val_sparse_categorical_accuracy")

In [29]:
histList

[0.41800546646118164, 0.4295928478240967, 0.4458629786968231]

In [30]:
histList2

[0.8820000290870667, 0.8619999885559082, 0.8610000014305115]

In [31]:
# Trying to get the new weights for the models
w1 = model1.get_weights()
w2 = model2.get_weights()
w3 = model3.get_weights()
type(w1) 

list

In [40]:
# getting the new weights
new_weights_from_loss = create_weight_avg(allWeights=[w1, w2, w3], amount=.01, loss=histList)
new_weights_from_acc = create_weight_avg(allWeights=[w1, w2, w3], acc=histList2, amount=.01)

In [41]:
type(new_weights_from_loss) ,len(new_weights_from_loss), new_weights_from_loss[0].shape

(list, 4, (784, 512))

In [42]:
# comparing to the first weights
type(w1), len(w1), w1[0].shape

(list, 4, (784, 512))

In [45]:
# creating a totally new model
loss_model = create_model()
loss_model.set_weights(new_weights_from_loss)


In [46]:
acc_model = create_model()
acc_model.set_weights(new_weights_from_acc)

In [47]:
# running through all the data
test_images, test_labels = reshape_data(test, start_index=0, end_index=len(test[0]))

In [49]:
model1.evaluate(x=test_images, y=test_labels)



[0.5259408950805664, 0.843500018119812]

In [50]:
model2.evaluate(x=test_images, y=test_labels)



[0.38773971796035767, 0.8809000253677368]

In [51]:
model3.evaluate(x=test_images, y=test_labels)



[0.38086366653442383, 0.8816999793052673]

In [48]:
# doing the evaluation of the models 
loss_model.evaluate(x=test_images, y=test_labels)



[2.0591301918029785, 0.8554999828338623]

In [52]:
# doing the evaluation of the model where we looked at the best accuracy
acc_model.evaluate(x=test_images, y=test_labels)



[2.058875322341919, 0.85589998960495]

In [6]:
# checking to see what the accuracy will be with the model trained on all the 
# data.
all_data_model = create_model()
# getting the data for the model
train, test = load_images()
# getting the data in the correct format
train_images, train_labels = reshape_data(train, start_index=0, end_index=len(train[0]))
test_images , test_labels = reshape_data(test, start_index=0, end_index=len(test[0]))

In [25]:
# creating a callback for the tensorboard
the_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# creating the tensorboard callback
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=the_dir, histogram_freq=1)

In [26]:
# now fitting the model and then will evaluate with the test data
all_data_model.fit(x=train_images, y=train_labels, batch_size=128, epochs=10, validation_data=(test_images, test_labels), 
                    callbacks=[tensorboard_callback])
all_data_model.evaluate(x=test_images, y=test_labels)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[0.09707936644554138, 0.9829999804496765]

In [33]:
w = all_data_model.get_weights()
print(len(w))
print(len(w[0]), len(w[1]), len(w[2]), len(w[3]))
w[0].shape, w[1].shape, w[2].shape,   w[3].shape

4
784 512 512 10


((784, 512), (512,), (512, 10), (10,))

In [34]:
all_data_model.summary()

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               401920    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                5130      
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________


In [None]:
# I am going to try the three models again, but this time I will 
# mix in the data so that some of the data is seen by more than one model
