In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


*## Weight sharing (Pytorch)*

In [23]:
import torch
import torch.quantization
import torch.nn as nn

from sklearn.cluster import KMeans

In [24]:
# set seed for reproducibility
torch.manual_seed(0)

class SampleLinearModel(nn.Module):
  def __init__(self):
    super(SampleLinearModel, self).__init__()
    self.linear = nn.Linear(10, 10)

  def cluster_weights(self, num_cluster):
    # cluster weights of the layer
    km =KMeans(
        n_clusters=num_cluster, init='random',
        n_init=10, max_iter=300, tol=1e-04, random_state=0
    )

    # construct a mapping from a cluster index to a centroid value and store
    # at self.weight_mapping
    weights = model.linear.weight.reshape([-1, 1]).detach().numpy()
    self.weights_cluster = km.fit_predict(weights)

    # find cluster index for each weight value and store at self.weights_cluster
    self.weights_mapping = km.cluster_centers_

    # drop the original weights to reduce the model size
    self.linear.weight = None

  def forward(self, x):
    if self.training:
      x = self.linear(x)
    else: # in eval mode
      # update weights of the self.layer by reassigning each value based on
      # self.weights_cluster and self.weights_mapping
      self.linear.weight = torch.nn.Parameter(torch.Tensor(self.weights_mapping[self.weights_cluster]).reshape(10, 10))
      x = self.linear(x)
    return x

In [25]:
class CustomDataset(torch.utils.data.Dataset):
  def __init__(self):
    self.num_samples = 100
    self.data = torch.rand([self.num_samples, 10])
    self.label = torch.rand([self.num_samples, 1])

  def __len__(self):
    return self.num_samples

  def __getitem__(self, idx):
    return self.data[idx], self.label[idx]

train_dataset = CustomDataset()
training_data_loader = torch.utils.data.DataLoader(train_dataset)

In [29]:
model = SampleLinearModel()

# train the model
model.train()
mse_loss = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

for data, label in training_data_loader:
  optimizer.zero_grad()
  pred = model(data)
  loss = mse_loss(pred, label)
  loss.backward()
  optimizer.step()

  return F.mse_loss(input, target, reduction=self.reduction)


In [30]:
original_output = model(train_dataset[0][0])
print(original_output)

tensor([ 3.4836e-01,  2.3107e-01,  1.1571e-01,  3.8330e-01, -7.5996e-05,
         5.0055e-01,  4.1884e-01,  3.7552e-01,  8.0600e-02,  2.8557e-01],
       grad_fn=<ViewBackward0>)


In [31]:
# check the original model size

import os

# save the model and check the model size
def print_size_of_model(model, label=""):
  torch.save(model.state_dict(), 'temp.p')
  size = os.path.getsize("temp.p")
  print("model: ", label, '\t', 'Size (Kb): ', size/1e3)
  os.remove('temp.p')
  return size

In [33]:
clustered_model = SampleLinearModel()
original_model_size = print_size_of_model(model, "original_model")

model:  original_model 	 Size (Kb):  1.926


## apply weight clustering

In [34]:
model.eval()
model.cluster_weights(5)

In [35]:
print('linear.weights_mapping:\n', model.weights_mapping)
print('linear.weights_cluster:\n', model.weights_cluster)
print('linear.weigth:\n', model.linear.weight)

linear.weights_mapping:
 [[-0.05362369]
 [ 0.28887314]
 [-0.21338633]
 [ 0.19156776]
 [ 0.08644137]]
linear.weights_cluster:
 [3 2 2 2 3 3 2 4 1 4 0 4 0 3 2 3 3 2 1 1 0 0 3 2 3 4 3 2 2 3 3 2 2 2 2 0 1
 0 3 1 3 2 3 3 1 2 1 1 2 4 1 2 4 4 4 1 1 1 0 1 2 2 0 0 1 3 3 3 2 0 4 4 3 0
 0 1 0 2 3 3 0 0 2 2 3 0 1 2 0 4 3 1 3 3 0 2 2 4 1 0]
linear.weigth:
 None


In [36]:
clustered_output = model(train_dataset[0][0])
print(clustered_output)

tensor([ 0.3074,  0.1611,  0.1342,  0.4141,  0.0722,  0.5464,  0.3381,  0.3777,
        -0.0066,  0.3722], grad_fn=<ViewBackward0>)


In [37]:
## compare difference of the moudel size

clustered_model_size = print_size_of_model(model, "clustered model")
print("{0:.2f} times smaller".format(original_model_size/clustered_model_size))

model:  clustered model 	 Size (Kb):  1.926
1.00 times smaller


# Weight Sharing Tensoflow

In [38]:
import tensorflow_model_optimization as tfmot
import tensorflow as tf
print(tf.__version__)
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
import numpy as np

2.15.0


In [39]:
# data
data = pd.read_csv("sample_google_scholar.csv")
data = data.dropna()
def convert_first_ten_characters_into_tensor(data):
    first_ten_characters = data[:10]
    converted = [ord(char)/256 for char in first_ten_characters]
    while len(converted) < 10:
        converted.append(0.0)
    return np.array(converted)
converted_affiliation = data['affiliation'].map(convert_first_ten_characters_into_tensor)
affiliation = np.vstack(converted_affiliation.values)
converted_email = data['email'].str.contains('.edu')
labels = converted_email.values
# model
input_shape = 10

In [40]:
tf_model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Dense(128, activation="relu", name="layer1"),
        layers.Dense(64, activation="relu", name="layer2"),
        layers.Dense(1, activation="sigmoid", name="layer3"),
    ])
loss = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam()

In [41]:
tf_model.compile(loss=loss, optimizer=optimizer)
# model fit
tf_model.fit(affiliation, labels, batch_size=16, epochs=5, validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x78177a12a5c0>

In [47]:
CentroidInitialization = tfmot.clustering.keras.CentroidInitialization
clustering_params = {
    'number_of_clusters': 10,
    'cluster_centroids_init': CentroidInitialization.LINEAR
}

In [48]:
clustered_model = tfmot.clustering.keras.cluster_weights(tf_model, **clustering_params)

In [49]:
clustered_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 cluster_layer1 (ClusterWei  (None, 128)               2698      
 ghts)                                                           
                                                                 
 cluster_layer2 (ClusterWei  (None, 64)                16458     
 ghts)                                                           
                                                                 
 cluster_layer3 (ClusterWei  (None, 1)                 139       
 ghts)                                                           
                                                                 
Total params: 19295 (112.62 KB)
Trainable params: 9759 (38.12 KB)
Non-trainable params: 9536 (74.50 KB)
_________________________________________________________________


In [50]:
final_model = tfmot.clustering.keras.strip_clustering(clustered_model)

In [51]:
final_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 layer1 (Dense)              (None, 128)               1408      
                                                                 
 layer2 (Dense)              (None, 64)                8256      
                                                                 
 layer3 (Dense)              (None, 1)                 65        
                                                                 
Total params: 9729 (38.00 KB)
Trainable params: 9729 (38.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
