In [1]:
import socket
import torch
import torch.nn as nn
import pickle

# Client setup
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_ip = '172.16.19.59'  # Replace with the server's (Laptop B's) IP address
server_port = 10300
client_socket.connect((server_ip, server_port))
print(f"Connected to server at {server_ip}:{server_port}")

# Load TinyBERT part 1
from transformers import AutoModel
tinybert = AutoModel.from_pretrained('huawei-noah/TinyBERT_General_4L_312D')

class TinyBERTPart1(nn.Module):
    def __init__(self, original_model):
        super(TinyBERTPart1, self).__init__()
        self.embeddings = original_model.embeddings
        self.encoder_layers = nn.ModuleList(original_model.encoder.layer[:2])

    def forward(self, x):
        x = self.embeddings(x)
        for layer in self.encoder_layers:
            x = layer(x)[0]  # First element of the output tuple
        return x

model_part1 = TinyBERTPart1(tinybert).to('cpu')  # Keep it on CPU for simplicity in sending over network

# Example input
input_data = torch.randint(0, 30522, (1, 16))  # Simulated input for TinyBERT

# Perform forward pass through part 1
intermediate_output = model_part1(input_data)

# Serialize the intermediate tensor to send over network
data = pickle.dumps(intermediate_output)

# Send the intermediate tensor to server
client_socket.sendall(data)

# Close the socket connection
client_socket.close()


Connected to server at 172.16.19.59:10300


In [None]:
import socket

# Create a socket object
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

# Define the server's IP address and port number
server_ip = '172.16.19.59'  # Replace with the server's IP address
server_port = 10300              # Port must match the one used by the server

# Connect to the server
client_socket.connect((server_ip, server_port))
print(f"Connected to server at {server_ip}:{server_port}")

# Communication loop
while True:
    # Send a message to the server
    message = input("Client (you): ")
    client_socket.send(message.encode())

    # Receive a reply from the server (max 1024 bytes)
    data = client_socket.recv(1024).decode()
    if not data:
        break
    print(f"Server: {data}")

# Close the connection
client_socket.close()

Connected to server at 172.16.19.59:10300


Client (you):  hi


Server: Hello Client


Client (you):  careless you are


Server: Thank you


In [6]:
import socket
import torch
import pickle
from transformers import AutoTokenizer, AutoModel

# Setup
host = '172.16.19.59'  # Server address
port = 10300
tokenizer = AutoTokenizer.from_pretrained('huawei-noah/TinyBERT_General_4L_312D')
model = AutoModel.from_pretrained('huawei-noah/TinyBERT_General_4L_312D')

# Prepare input data
text = "This is an example sentence for sentiment analysis."
inputs = tokenizer(text, return_tensors='pt')

# Forward pass through the model
with torch.no_grad():
    output = model(**inputs)  # Forward pass
    intermediate_output = output.last_hidden_state  # Access the last hidden state

# Serialize and send the intermediate tensor to the server
data = pickle.dumps(intermediate_output)
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client_socket.connect((host, port))
client_socket.sendall(data)
client_socket.close()
print("Intermediate output sent to server.")


Intermediate output sent to server.


In [7]:
import socket
import torch
import pickle
from transformers import AutoTokenizer, AutoModel

# Setup
host = '172.16.19.59'  # Server address
port = 10300

print("Loading tokenizer and model...")
tokenizer = AutoTokenizer.from_pretrained('huawei-noah/TinyBERT_General_4L_312D')
model = AutoModel.from_pretrained('huawei-noah/TinyBERT_General_4L_312D')
print("Tokenizer and model loaded successfully.")

# Prepare input data
text = "This is an example sentence for sentiment analysis."
print(f"Tokenizing input text: '{text}'")
inputs = tokenizer(text, return_tensors='pt')
print("Input tokenized successfully.")

# Forward pass through the model
print("Performing forward pass through the model...")
with torch.no_grad():
    output = model(**inputs)  # Forward pass
    intermediate_output = output.last_hidden_state  # Access the last hidden state
    print(f"Output shape: {intermediate_output.shape}")

# Serialize and send the intermediate tensor to the server
data = pickle.dumps(intermediate_output)
print("Intermediate output serialized.")

# Print summary of the tensor data
print("Summary of the intermediate output tensor:")
print(intermediate_output)

try:
    print("Connecting to the server...")
    client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    client_socket.connect((host, port))
    print("Connected to the server.")

    print("Sending intermediate output to server...")
    client_socket.sendall(data)
    print("Data sent successfully.")

except ConnectionRefusedError:
    print(f"Connection refused. Ensure the server is running at {host}:{port}.")

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    client_socket.close()
    print("Connection closed.")


Loading tokenizer and model...
Tokenizer and model loaded successfully.
Tokenizing input text: 'This is an example sentence for sentiment analysis.'
Input tokenized successfully.
Performing forward pass through the model...
Output shape: torch.Size([1, 11, 312])
Intermediate output serialized.
Summary of the intermediate output tensor:
tensor([[[-0.1936,  0.2849,  0.2053,  ..., -0.1179,  0.1213,  0.1366],
         [-0.0972,  0.3074,  0.0534,  ..., -0.5702,  0.5079,  0.0612],
         [-0.1953,  0.4485, -0.0143,  ..., -0.8655,  0.1390,  0.5142],
         ...,
         [ 0.1187,  0.3189,  0.3829,  ...,  0.3394,  0.4038,  0.2010],
         [-0.0102, -0.1025,  0.0466,  ..., -0.2004, -0.8425,  0.2412],
         [-0.0360, -0.2039,  0.0244,  ..., -0.1843, -0.4815,  0.2304]]])
Connecting to the server...
Connected to the server.
Sending intermediate output to server...
Data sent successfully.
Connection closed.


In [13]:
import torch
import sys
from transformers import AutoTokenizer, AutoModel

# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('huawei-noah/TinyBERT_General_4L_312D')
model = AutoModel.from_pretrained('huawei-noah/TinyBERT_General_4L_312D')

# Assume you want to split the model in half (for pipeline parallelism)
half = len(list(model.parameters())) // 2

# Calculate size of each part
part1_params = list(model.parameters())[:half]
part2_params = list(model.parameters())[half:]

def calculate_model_size(params):
    return sum(param.numel() for param in params) * 4 / (1024 * 1024)  # Convert to MB

part1_size_mb = calculate_model_size(part1_params)
part2_size_mb = calculate_model_size(part2_params)

print(f"Size of Part 1: {part1_size_mb:.6f} MB")
print(f"Size of Part 2: {part2_size_mb:.6f} MB")


Size of Part 1: 45.652771 MB
Size of Part 2: 9.089081 MB


In [14]:
# client is serializing the data and sending to server. it is deserializing it.

In [19]:
import socket
import torch
from transformers import AutoTokenizer, AutoModel
import pickle

# Load TinyBERT model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('huawei-noah/TinyBERT_General_4L_312D')
tinybert = AutoModel.from_pretrained('huawei-noah/TinyBERT_General_4L_312D')

# Example input texts (batch)
texts = [
    "I love this product!",
    "This is the worst experience I've ever had.",
    "The service was fantastic!",
    "Not worth the price.",
    "I would recommend this!"
]

# Tokenize the input texts
inputs = tokenizer(texts, return_tensors='pt', padding=True, truncation=True)

# Forward pass through the first two layers
model_part1 = tinybert.encoder.layer[:2]

with torch.no_grad():
    # Get the initial hidden states from the embedding layer
    hidden_states = tinybert.embeddings(inputs['input_ids'])
    
    # Pass through the first two layers
    for layer in model_part1:
        hidden_states = layer(hidden_states)[0]  # Get the output from each layer

# Serialize and send the intermediate tensor to the server
data = pickle.dumps(hidden_states)
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client_socket.connect(('172.16.19.59', 10300))  # Adjust host and port as needed
client_socket.sendall(data)
client_socket.close()
print("Intermediate output sent to server.")


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Intermediate output sent to server.


In [20]:
model_part1

ModuleList(
  (0-1): 2 x BertLayer(
    (attention): BertAttention(
      (self): BertSdpaSelfAttention(
        (query): Linear(in_features=312, out_features=312, bias=True)
        (key): Linear(in_features=312, out_features=312, bias=True)
        (value): Linear(in_features=312, out_features=312, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (output): BertSelfOutput(
        (dense): Linear(in_features=312, out_features=312, bias=True)
        (LayerNorm): LayerNorm((312,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
    (intermediate): BertIntermediate(
      (dense): Linear(in_features=312, out_features=1200, bias=True)
      (intermediate_act_fn): GELUActivation()
    )
    (output): BertOutput(
      (dense): Linear(in_features=1200, out_features=312, bias=True)
      (LayerNorm): LayerNorm((312,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
  )
)