# Tutorial #5

In this tutorial, we are importing a production model from the collection to apply Transfer Learning on a different task.

### Pre-tutorial

In [2]:
# Cell to be removed once the package is stable
import sys
import os

# Get the absolute path of your project directory
project_path = os.path.abspath("..")

# Add the project directory to sys.path
if project_path not in sys.path:
    sys.path.append(project_path)

### Imports

In [3]:
from NeuralLib.model_hub import TLModel, TLFactory, list_production_models
from NeuralLib.architectures import GRUseq2seq
from NeuralLib.config import DATASETS_ECG_G
import os

### Data paths

In [4]:
X = os.path.join(DATASETS_ECG_G, 'x')
Y_BIN = os.path.join(DATASETS_ECG_G, 'y_bin')

### Initialize Factory

In [5]:
factory = TLFactory()

### Check the list of Production Models in the NeuralLib collection

In [6]:
list_production_models()

ECGPeakDetector
ECGDenoiser


### Load a Production Model from the collection

In [8]:
factory.load_production_model(model_name="ECGDenoiser")

prod_model = factory.models['ECGDenoiser']

Using cached model files at: C:\Users\Catia Bastos\dev\hugging_prodmodels\ECGPeakDetector
✅ ECGPeakDetector successfully initialized.
✅ Loaded ProductionModel: ECGPeakDetector


In [10]:
# Inspect model structure
print(prod_model.model)

GRUseq2seq(
  (gru_layers): ModuleList(
    (0): GRU(1, 32, batch_first=True, bidirectional=True)
    (1): GRU(64, 64, batch_first=True, bidirectional=True)
    (2): GRU(128, 64, batch_first=True, bidirectional=True)
  )
  (dropout_layers): ModuleList(
    (0-2): 3 x Dropout(p=0, inplace=False)
  )
  (fc_out): Linear(in_features=128, out_features=1, bias=True)
  (criterion): BCEWithLogitsLoss()
)


### Define hyperparameters of TLModel

In [11]:
arch_params = {
    'model_name': 'ECGPeakDetectorTLDenoiser',
    'n_features': 1,
    'hid_dim': [64, 64, 32],  # Hidden dimensions per layer
    'n_layers': 3,            # Total layers
    'dropout': 0,             # Dropout rate
    'learning_rate': 0.001,
    'bidirectional': True,
    'task': 'classification',
    'num_classes': 1
}

### Initialize TLModel

In [13]:
tl_model = TLModel('GRUseq2seq', **arch_params)

✅ TransferLearningModel initialized with architecture: GRUseq2seq


### (optional) inspect layer names and keys in state_dict

In [16]:
# Print all layer names and their types
for name, module in tl_model.named_modules():
    print("Name: ", name, "->", "Module: ", module)

Name:   -> Module:  TLModel(
  (model): GRUseq2seq(
    (gru_layers): ModuleList(
      (0): GRU(1, 64, batch_first=True, bidirectional=True)
      (1): GRU(128, 64, batch_first=True, bidirectional=True)
      (2): GRU(128, 32, batch_first=True, bidirectional=True)
    )
    (dropout_layers): ModuleList(
      (0-2): 3 x Dropout(p=0, inplace=False)
    )
    (fc_out): Linear(in_features=64, out_features=1, bias=True)
    (criterion): BCEWithLogitsLoss()
  )
)
Name:  model -> Module:  GRUseq2seq(
  (gru_layers): ModuleList(
    (0): GRU(1, 64, batch_first=True, bidirectional=True)
    (1): GRU(128, 64, batch_first=True, bidirectional=True)
    (2): GRU(128, 32, batch_first=True, bidirectional=True)
  )
  (dropout_layers): ModuleList(
    (0-2): 3 x Dropout(p=0, inplace=False)
  )
  (fc_out): Linear(in_features=64, out_features=1, bias=True)
  (criterion): BCEWithLogitsLoss()
)
Name:  model.gru_layers -> Module:  ModuleList(
  (0): GRU(1, 64, batch_first=True, bidirectional=True)
  (1): 

In [17]:
# Print all keys (TLModel) in the state_dict
for key in tl_model.state_dict().keys():
    print(key)

model.gru_layers.0.weight_ih_l0
model.gru_layers.0.weight_hh_l0
model.gru_layers.0.bias_ih_l0
model.gru_layers.0.bias_hh_l0
model.gru_layers.0.weight_ih_l0_reverse
model.gru_layers.0.weight_hh_l0_reverse
model.gru_layers.0.bias_ih_l0_reverse
model.gru_layers.0.bias_hh_l0_reverse
model.gru_layers.1.weight_ih_l0
model.gru_layers.1.weight_hh_l0
model.gru_layers.1.bias_ih_l0
model.gru_layers.1.bias_hh_l0
model.gru_layers.1.weight_ih_l0_reverse
model.gru_layers.1.weight_hh_l0_reverse
model.gru_layers.1.bias_ih_l0_reverse
model.gru_layers.1.bias_hh_l0_reverse
model.gru_layers.2.weight_ih_l0
model.gru_layers.2.weight_hh_l0
model.gru_layers.2.bias_ih_l0
model.gru_layers.2.bias_hh_l0
model.gru_layers.2.weight_ih_l0_reverse
model.gru_layers.2.weight_hh_l0_reverse
model.gru_layers.2.bias_ih_l0_reverse
model.gru_layers.2.bias_hh_l0_reverse
model.fc_out.weight
model.fc_out.bias


In [18]:
# Print all keys (ProdModel) in the state_dict
for key in prod_model.state_dict().keys():
    print(key)

model.gru_layers.0.weight_ih_l0
model.gru_layers.0.weight_hh_l0
model.gru_layers.0.bias_ih_l0
model.gru_layers.0.bias_hh_l0
model.gru_layers.0.weight_ih_l0_reverse
model.gru_layers.0.weight_hh_l0_reverse
model.gru_layers.0.bias_ih_l0_reverse
model.gru_layers.0.bias_hh_l0_reverse
model.gru_layers.1.weight_ih_l0
model.gru_layers.1.weight_hh_l0
model.gru_layers.1.bias_ih_l0
model.gru_layers.1.bias_hh_l0
model.gru_layers.1.weight_ih_l0_reverse
model.gru_layers.1.weight_hh_l0_reverse
model.gru_layers.1.bias_ih_l0_reverse
model.gru_layers.1.bias_hh_l0_reverse
model.gru_layers.2.weight_ih_l0
model.gru_layers.2.weight_hh_l0
model.gru_layers.2.bias_ih_l0
model.gru_layers.2.bias_hh_l0
model.gru_layers.2.weight_ih_l0_reverse
model.gru_layers.2.weight_hh_l0_reverse
model.gru_layers.2.bias_ih_l0_reverse
model.gru_layers.2.bias_hh_l0_reverse
model.fc_out.weight
model.fc_out.bias


### Extract desired weights from the Production Model

In [19]:
layer_mapping = {
    'gru_layers.0': prod_model.model.gru_layers[0].state_dict(),  # First GRU layer weights
    'gru_layers.1': prod_model.model.gru_layers[1].state_dict()   # Second GRU layer weights
}

### Define freezing and unfreezing strategies

In [20]:
freeze_layers = ['gru_layers.0']
unfreeze_layers = ['gru_layers.1']

### Configure the TLModel

In [21]:
factory.configure_tl_model(
    tl_model=tl_model,
    layer_mapping=layer_mapping,  # inject mapped weights from the prod model
    freeze_layers=freeze_layers,
    unfreeze_layers=unfreeze_layers
)

🔄 Injecting weights into TLModel layers...
layer mapping items: dict_items([('gru_layers.0', OrderedDict([('weight_ih_l0', tensor([[ 0.0563],
        [ 0.0229],
        [ 0.0062],
        [-0.0140],
        [ 0.0470],
        [ 0.1731],
        [-0.0208],
        [-0.0011],
        [-0.0592],
        [ 0.0285],
        [ 0.1479],
        [ 0.0681],
        [ 0.1086],
        [ 0.1445],
        [ 0.2109],
        [ 0.0465],
        [ 0.2136],
        [ 0.1798],
        [ 0.0240],
        [ 0.0943],
        [ 0.0089],
        [ 0.1835],
        [ 0.0398],
        [ 0.1905],
        [-0.1278],
        [ 0.1058],
        [-0.0287],
        [-0.1388],
        [ 0.0355],
        [ 0.0268],
        [ 0.1111],
        [ 0.2347],
        [-0.2872],
        [ 0.0392],
        [-0.0167],
        [-0.0975],
        [-0.1583],
        [-0.1761],
        [-0.1317],
        [-0.1989],
        [ 0.0267],
        [-0.1534],
        [-0.3157],
        [-0.1730],
        [-0.3113],
        [-0.3008],
   

RuntimeError: Error(s) in loading state_dict for GRU:
	size mismatch for weight_ih_l0: copying a param with shape torch.Size([96, 1]) from checkpoint, the shape in current model is torch.Size([192, 1]).
	size mismatch for weight_hh_l0: copying a param with shape torch.Size([96, 32]) from checkpoint, the shape in current model is torch.Size([192, 64]).
	size mismatch for bias_ih_l0: copying a param with shape torch.Size([96]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for bias_hh_l0: copying a param with shape torch.Size([96]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for weight_ih_l0_reverse: copying a param with shape torch.Size([96, 1]) from checkpoint, the shape in current model is torch.Size([192, 1]).
	size mismatch for weight_hh_l0_reverse: copying a param with shape torch.Size([96, 32]) from checkpoint, the shape in current model is torch.Size([192, 64]).
	size mismatch for bias_ih_l0_reverse: copying a param with shape torch.Size([96]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for bias_hh_l0_reverse: copying a param with shape torch.Size([96]) from checkpoint, the shape in current model is torch.Size([192]).

### Define training parameters for training TLModel

In [22]:
train_params = {
    'path_x': X,
    'path_y': Y_BIN,
    'epochs': 1,
    'batch_size': 1,
    'patience': 2,
    'dataset_name': 'private_gib01',
    'trained_for': 'fine-tuning peak detection',
    'all_samples': False,
    'samples': 3,
    'gpu_id': None,
    'enable_tensorboard': True
}
# TODO: in this case it is not "retraining", right? 

### Train TLModel

In [None]:
tl_model.train_tl(**train_params)