In [3]:
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(os.getcwd())))

In [4]:
import torch
import torch.nn.utils.prune as prune

from src.mslm.utils import ConfigLoader
from src.mslm.models import Imitator, PositionalEncoding


In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"

batch_size = 32
train_ratio = 0.2
epochs = 10
log_interval = 2
checkpoint_interval = 2

In [6]:
model_parameters = ConfigLoader("config/model/config.toml").load_config()
model_parameters.update({
    "device": device if model_parameters.get("device") == "auto" else model_parameters.get("device", device),
    "input_size": 543 * 2,
    "output_size": 3072,
    "T_size": 15 * 35,
    "compile": False
})

# --- config de entrenamiento ---
train_config = ConfigLoader("config/training/train_config.toml").load_config()
train_ratio = train_config.get("train_ratio", train_ratio)
train_config.update({
    "checkpoint": 1,
    "learning_rate": train_config.get("learning_rate", 0.00238),
    "epochs": epochs if epochs else train_config.get("epochs", 100),
    "batch_size": batch_size if batch_size else train_config.get("batch_size", 32),
    "checkpoint_interval": checkpoint_interval if checkpoint_interval else train_config.get("checkpoint_interval", 5),
    "log_interval": log_interval if log_interval else train_config.get("log_interval", 2),
    "train_ratio": train_ratio,
    "validation_ratio": round(1 - train_ratio, 2),
    "device": device if model_parameters.get("device") == "auto" else model_parameters.get("device", device),
})

In [7]:
os.path.join(os.path.dirname(os.getcwd()),"outputs", "checkpoints", "41", "1", "5")

'/home/giorgio6846/Code/Sign-AI/outputs/checkpoints/41/1/5'

In [8]:
import sys
import types
import torch

dummy_module = types.ModuleType("src.train.Imitator")
pe_dummy = types.ModuleType("src.train.PositionalEncoding")

class DummyImitator(torch.nn.Module):
    def __init__(self):
        super().__init__()
    
    def forward(self, x):
        # x -> [batch_size, T, input_size]
        B, T, D, C = x.shape
        x = x.view(B, T,  D * C)
        x = F.relu(self.linear(x))
        x = self.norm1(x)

        x = self.pe(x)
        x = self.transformer(x)

        x = x.transpose(1, 2)    # [B, hidden, 525]
        x = self.temporal_adjuster(x)  # [B, hidden, 128]
        x = x.transpose(1, 2)
        
        x = self.linear_out(x)
        
        # x = F.relu(self.linear_out(x))

        # x = x.transpose(1, 2)
        # x = F.relu(self.pooling(x))
        # x = x.transpose(1, 2)

        return x

dummy_module.Imitator = DummyImitator
pe_dummy.PositionalEncoding = PositionalEncoding

sys.modules["src.train.Imitator"] = dummy_module
sys.modules["src.train.PositionalEncoding"] = pe_dummy

checkpoint_path = "/home/giorgio6846/Code/Sign-AI/Sign-Multimodal-Language-Model/outputs/model/checkpoints/35/1/15/model.pt"
model = torch.load(checkpoint_path, map_location='cpu', weights_only=False)

print("✅ Modelo cargado exitosamente")


✅ Modelo cargado exitosamente


In [11]:
model

OptimizedModule(
  (_orig_mod): DummyImitator(
    (linear): Linear(in_features=1086, out_features=1024, bias=True)
    (norm1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
    (pe): PositionalEncoding(
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): TransformerEncoder(
      (layers): ModuleList(
        (0-5): 6 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=1024, out_features=1024, bias=True)
          )
          (linear1): Linear(in_features=1024, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=2048, out_features=1024, bias=True)
          (norm1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dropout(p=0.1, inplace=False)
        

In [9]:
parameters_to_prune = (
    (model.stgcn.blocks[0].spatial_conv, 'weight'),
    (model.stgcn.blocks[1].spatial_conv, 'weight'),
    (model.temporal_adjuster[0], 'weight'),
    (model.temporal_adjuster[0], 'bias'),
    (model.linear_out, 'weight'),
    (model.linear_out, 'bias'),
)

prune.global_unstructured(
    parameters_to_prune,
    pruning_method=prune.L1Unstructured,
    amount=0.2
)

AttributeError: 'DummyImitator' object has no attribute 'stgcn'

In [None]:
list(model.named_parameters())

[('stgcn.blocks.0.spatial_conv.weight',
  Parameter containing:
  tensor([[[[-1.1450e-03, -2.7274e-03,  5.5406e-03,  ..., -5.7048e-03,
              1.3995e-04,  3.1396e-03]],
  
           [[ 3.7453e-03,  5.8628e-03,  3.8650e-03,  ..., -1.8009e-03,
              1.2515e-03,  3.1143e-03]],
  
           [[-2.5976e-03, -9.7501e-04,  9.5123e-04,  ...,  3.2989e-03,
             -1.4781e-03, -2.8450e-03]],
  
           ...,
  
           [[ 2.4252e-04,  5.5939e-03,  5.4386e-03,  ..., -3.8444e-03,
              3.3135e-03, -5.5893e-03]],
  
           [[-2.6196e-03, -1.9959e-03,  4.8427e-03,  ..., -3.1274e-03,
              3.2739e-03,  5.4719e-03]],
  
           [[ 5.2957e-03,  5.4477e-03,  1.7278e-03,  ...,  2.2188e-03,
             -2.1019e-03, -4.3358e-03]]],
  
  
          [[[ 1.3562e-03, -4.9601e-03,  2.3522e-03,  ...,  3.9107e-03,
             -4.9076e-03, -5.9967e-03]],
  
           [[ 3.5477e-03,  3.5037e-03, -4.5067e-03,  ..., -1.8341e-03,
              3.5258e-03, -5.6999e-03

In [None]:
list(model.named_buffers())

[('stgcn.blocks.0.spatial_conv.weight_mask',
  tensor([[[[0., 0., 1.,  ..., 1., 1., 1.]],
  
           [[1., 1., 1.,  ..., 1., 1., 1.]],
  
           [[1., 1., 1.,  ..., 0., 1., 1.]],
  
           ...,
  
           [[1., 1., 1.,  ..., 1., 0., 0.]],
  
           [[1., 0., 1.,  ..., 1., 1., 1.]],
  
           [[1., 1., 1.,  ..., 1., 1., 0.]]],
  
  
          [[[1., 1., 1.,  ..., 1., 1., 1.]],
  
           [[1., 1., 1.,  ..., 1., 1., 0.]],
  
           [[0., 1., 1.,  ..., 0., 1., 1.]],
  
           ...,
  
           [[1., 1., 1.,  ..., 1., 0., 1.]],
  
           [[1., 1., 1.,  ..., 1., 1., 1.]],
  
           [[1., 1., 1.,  ..., 0., 1., 1.]]],
  
  
          [[[1., 0., 1.,  ..., 1., 1., 1.]],
  
           [[1., 1., 1.,  ..., 1., 0., 1.]],
  
           [[1., 1., 0.,  ..., 1., 1., 1.]],
  
           ...,
  
           [[1., 1., 1.,  ..., 0., 0., 1.]],
  
           [[0., 1., 1.,  ..., 1., 1., 1.]],
  
           [[1., 1., 0.,  ..., 1., 0., 1.]]],
  
  
          ...,
  
  
 

In [None]:
print(
    "Sparsity in block1 weight: {:.2f}%".format(
        100. * float(torch.sum(model.stgcn.blocks[0].spatial_conv.weight == 0))
        / float(model.stgcn.blocks[0].spatial_conv.weight.nelement())
    )
)
print(
    "Sparsity in block1 weight: {:.2f}%".format(
        100. * float(torch.sum(model.stgcn.blocks[1].spatial_conv.weight == 0))
        / float(model.stgcn.blocks[1].spatial_conv.weight.nelement())
    )
)
print(
    "Sparsity in ta[0] weight: {:.2f}%".format(
        100. * float(torch.sum(model.temporal_adjuster[0].weight == 0))
        / float(model.temporal_adjuster[0].weight.nelement())
    )
)
print(
    "Sparsity in ta[0] bias: {:.2f}%".format(
        100. * float(torch.sum(model.temporal_adjuster[0].bias == 0))
        / float(model.temporal_adjuster[0].bias.nelement())
    )
)
print(
    "Sparsity in linear_out bias: {:.2f}%".format(
        100. * float(torch.sum(model.linear_out.weight == 0))
        / float(model.linear_out.weight.nelement())
    )
)
print(
    "Sparsity in linear_out bias: {:.2f}%".format(
        100. * float(torch.sum(model.linear_out.bias == 0))
        / float(model.linear_out.bias.nelement())
    )
)
print(
    "Global sparsity: {:.2f}%".format(
        100. * float(
              torch.sum(model.stgcn.blocks[0].spatial_conv.weight == 0)
            + torch.sum(model.stgcn.blocks[1].spatial_conv.weight == 0)
            + torch.sum(model.temporal_adjuster[0].weight == 0)
            + torch.sum(model.temporal_adjuster[0].bias == 0)
            + torch.sum(model.linear_out.weight == 0)
            + torch.sum(model.linear_out.bias == 0)
        )
        / float(
            model.stgcn.blocks[0].spatial_conv.weight.nelement()
            + model.stgcn.blocks[1].spatial_conv.weight.nelement()
            + model.temporal_adjuster[0].weight.nelement()
            + model.temporal_adjuster[0].bias.nelement()
            + model.linear_out.weight.nelement()
            + model.linear_out.bias.nelement()
        )
    )
)

Sparsity in block1 weight: 30.88%
Sparsity in block1 weight: 38.31%
Sparsity in ta[0] weight: 21.47%
Sparsity in ta[0] bias: 21.88%
Sparsity in linear_out bias: 23.66%
Sparsity in linear_out bias: 23.96%
Global sparsity: 36.00%


In [None]:
print(model.state_dict().keys())

odict_keys(['stgcn.blocks.0.spatial_conv.bias', 'stgcn.blocks.0.spatial_conv.weight_orig', 'stgcn.blocks.0.spatial_conv.weight_mask', 'stgcn.blocks.0.temp_conv.weight', 'stgcn.blocks.0.temp_conv.bias', 'stgcn.blocks.0.norm.weight', 'stgcn.blocks.0.norm.bias', 'stgcn.blocks.0.norm.running_mean', 'stgcn.blocks.0.norm.running_var', 'stgcn.blocks.0.norm.num_batches_tracked', 'stgcn.blocks.1.spatial_conv.bias', 'stgcn.blocks.1.spatial_conv.weight_orig', 'stgcn.blocks.1.spatial_conv.weight_mask', 'stgcn.blocks.1.temp_conv.weight', 'stgcn.blocks.1.temp_conv.bias', 'stgcn.blocks.1.norm.weight', 'stgcn.blocks.1.norm.bias', 'stgcn.blocks.1.norm.running_mean', 'stgcn.blocks.1.norm.running_var', 'stgcn.blocks.1.norm.num_batches_tracked', 'temporal_adjuster.0.weight_orig', 'temporal_adjuster.0.bias_orig', 'temporal_adjuster.0.weight_mask', 'temporal_adjuster.0.bias_mask', 'linear_out.weight_orig', 'linear_out.bias_orig', 'linear_out.weight_mask', 'linear_out.bias_mask'])
