In [2]:
import pickle
import numpy as np
from scipy.stats import sem
import matplotlib.pyplot as plt

from transformers import AutoModel, AutoModelForCausalLM, AutoModelForMaskedLM, AutoModelForSeq2SeqLM
from transformers import BertModel, AlbertModel, DistilBertModel, RobertaModel, BartModel, OpenAIGPTModel, GPT2Model

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def decomposition_SN(A):
    
    S = np.linalg.norm(.5 * (A + A.T), 'fro') / np.linalg.norm(A, 'fro')
    N = np.linalg.norm(.5 * (A - A.T), 'fro') / np.linalg.norm(A, 'fro')

    return S, N

def decomposition_blocks(d,l,h,dh,A):
    
    S = np.zeros((l,h))
    N = np.zeros((l,h))

    for i, layer in enumerate(range(l)):
        for j, head in enumerate(range(0,d,dh)):
            S[i,j], N[i,j] = decomposition_SN(A[layer][head:head+dh,head:head+dh])
    return  S, N

In [8]:
def get_matricesBERT(model):

    layers = len(model.encoder.layer)    
    M = []
    for l in range(layers):
        Wq = model.encoder.layer[l].attention.self.query.weight.detach().numpy()
        Wk = model.encoder.layer[l].attention.self.key.weight.detach().numpy()
        M.append(Wq@(Wk.T))        

    return M

def get_matricesALBERT(model):

    layers = len(model.encoder.albert_layer_groups)    
    M = []
    for l in range(layers):
        Wq = model.encoder.albert_layer_groups[l].albert_layers[0].attention.query.weight.detach().numpy()
        Wk = model.encoder.albert_layer_groups[l].albert_layers[0].attention.key.weight.detach().numpy()
        M.append(Wq@(Wk.T))        

    return M

def get_matricesDistillBERT(model):

    layers = len(model.transformer.layer)
    M = []
    for l in range(layers):
        Wq = model.transformer.layer[l].attention.q_lin.weight.detach().numpy()
        Wk = model.transformer.layer[l].attention.k_lin.weight.detach().numpy()
        M.append(Wq@(Wk.T))        

    return M

def get_matricesBART(model):

    layers = len(model.encoder.layers)  
    M = []
    for l in range(layers):
        Wq = model.encoder.layers[l].self_attn.q_proj.weight.detach().numpy()
        Wk = model.encoder.layers[l].self_attn.k_proj.weight.detach().numpy()
        M.append(Wq@(Wk.T))        

    return M

def get_matricesGPT(d,model):

    layers = len(model.h)  
    M = []
    for l in range(layers):
        Wq = model.h[l].attn.c_attn.weight[:,:d].detach().numpy()
        Wk = model.h[l].attn.c_attn.weight[:,d:2*d].detach().numpy()
        M.append(Wq@(Wk.T))        

    return M

def get_matricesDistillGPT(d,model):

    layers = len(model.transformer.h)  
    M = []
    for l in range(layers):
        Wq = model.transformer.h[l].attn.c_attn.weight[:,:d].detach().numpy()
        Wk = model.transformer.h[l].attn.c_attn.weight[:,d:2*d].detach().numpy()
        M.append(Wq@(Wk.T))        

    return M

def get_matricesDistillROBERTA(model):

    layers = len(model.roberta.encoder.layer)    
    M = []
    for l in range(layers):
        Wq = model.roberta.encoder.layer[l].attention.self.query.weight.detach().numpy()
        Wk = model.roberta.encoder.layer[l].attention.self.key.weight.detach().numpy()
        M.append(Wq@(Wk.T))        

    return M

def get_matricesT5(model):

    layers = len(model.encoder.block)
    M = []
    for l in range(layers):
        Wq = model.encoder.block[l].layer[0].SelfAttention.q.weight.detach().numpy()
        Wk = model.encoder.block[l].layer[0].SelfAttention.k.weight.detach().numpy()
        M.append(Wq@(Wk.T))        

    return M

def get_matricesOPT(model):

    layers = len(model.model.decoder.layers)
    M = []
    for l in range(layers):
        Wq = model.model.decoder.layers[l].self_attn.q_proj.weight.detach().numpy()
        Wk = model.model.decoder.layers[l].self_attn.k_proj.weight.detach().numpy()
        M.append(Wq@(Wk.T))        

    return M

def get_matricesGPTneo(model):

    layers = len(model.transformer.h)
    M = []
    for l in range(layers):
        Wq = model.transformer.h[l].attn.attention.q_proj.weight.detach().numpy()
        Wk = model.transformer.h[l].attn.attention.k_proj.weight.detach().numpy()
        M.append(Wq@(Wk.T))        

    return M

def get_matricesGPTneox(d,l,h,dh,model):
    
    S = np.zeros((l,h))
    N = np.zeros((l,h))

    for i in range(l):
        print(l)

        Wq = model.gpt_neox.layers[i].attention.query_key_value.weight[:d,:].detach().numpy()
        Wk = model.gpt_neox.layers[i].attention.query_key_value.weight[d:2*d,:].detach().numpy()
        M = Wq@(Wk.T)

        for j, head in enumerate(range(0,d,dh)):
            S[i,j], N[i,j] = decomposition_SN(M[head:head+dh,head:head+dh])    

    return S ,N

def get_matricesGPTj(model):

    layers = len(model.transformer.h)
    M = []
    for l in range(layers):
        Wq = model.transformer.h[l].attn.q_proj.weight.detach().numpy()
        Wk = model.transformer.h[l].attn.k_proj.weight.detach().numpy()
        M.append(Wq@(Wk.T))        

    return M

def decomposition_blocks(d,l,h,dh,A):
    
    S = np.zeros((l,h))
    N = np.zeros((l,h))

    for i, layer in enumerate(range(l)):
        for j, head in enumerate(range(0,d,dh)):
            S[i,j], N[i,j] = decomposition_SN(A[layer][head:head+dh,head:head+dh])
    return  S, N

In [5]:
"""
- KEY (str): model name
- VALUES (list): [layers (int), embedding dim (int), heads (int), head dim (int), S scores, N scores]
"""
models = {}

In [62]:
""" 
BERT models 
 - MODEL: Bidirectional, Encoder-only Transformer
 - DATASETS: BookCorpus & English Wikipedia
 - OBJECTIVES: Masked Language Modeling (MLM), Next Sentence Prediction (NSP)
 - METRICS: perplexity, cross-entropy

 MLM: randomly masked some words in the sentence, predict masked words with cross-entropy 
 over the vocabulary 
 NSP: 

The idea is that these models have a better understanding of context, where each word is represented as a 
linear combination of all the other words in the sentence, bi-directionally.
"""
dh = 64

'BERT tiny (l = 2, d = 128, h = 2 ; 4.40M parameters)'
l = 2
d = 128
h = d // dh
model = AutoModel.from_pretrained("google/bert_uncased_L-2_H-128_A-2")
M = get_matricesBERT(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['BERTtiny'] = [l,d,h,dh,S,N]

'BERT mini (l = 4, d = 256, h = 4 ; 11.3M parameters)'
l = 4
d = 256
h = d // dh
model = AutoModel.from_pretrained("google/bert_uncased_L-4_H-256_A-4")
M = get_matricesBERT(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['BERTmini'] = [l,d,h,dh,S,N]

'BERT small (l = 4, d = 512, h = 8 ; 29.1M parameters)'
l = 4
d = 512
h = d // dh
model = AutoModel.from_pretrained("google/bert_uncased_L-4_H-512_A-8")
M = get_matricesBERT(model)
SBert_small, NBert_small = decomposition_blocks(d,l,h,dh,M)

'BERT medium (l = 8, d = 512, h = 8 ; 41.7M parameters)'
l = 8
d = 512
h = d // dh
model = AutoModel.from_pretrained("google/bert_uncased_L-8_H-512_A-8")
M = get_matricesBERT(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['BERTmedium'] = [l,d,h,dh,S,N]

'BERT base (l = 12, d = 768, h = 12 ; 110M parameters)'
l = 12
d = 768
h = d // dh
model = BertModel.from_pretrained("bert-base-uncased")
M = get_matricesBERT(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['BERTbase'] = [l,d,h,dh,S,N]

'BERT large (l = 24, d = 1024, h = 16 ; 340M parameters)'
l = 24
d = 1024
h = d // dh
model = BertModel.from_pretrained("bert-large-uncased")
M = get_matricesBERT(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['BERTlarge'] = [l,d,h,dh,S,N]

'BERT large (masking) (l = 24, d = 1024, h = 16 ; 340M parameters)'
l = 24
d = 1024
h = d // dh
model = BertModel.from_pretrained("bert-large-uncased-whole-word-masking")
M = get_matricesBERT(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['BERTlarge_mask'] = [l,d,h,dh,S,N]

'DistillBERT base model (l = 6, d = 768, h = 12 ; tot num parameters 66M)'
l = 6
d = 768
h = d // dh
model = DistilBertModel.from_pretrained("distilbert-base-uncased")
M = get_matricesDistillBERT(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['DistillBERT'] = [l,d,h,dh,S,N]

'save'
with open('../data/fig_scores/models.pkl', 'wb') as file:
    pickle.dump(models, file)

In [63]:
""" 
ROBERTA models 
 - MODEL: Bidirectional, Encoder-only Transformer
 - DATASETS: BookCorpus & English Wikipedia
 - OBJECTIVES: Masked Language Modeling (MLM), Next Sentence Prediction (NSP)
 - METRICS: perplexity, cross-entropy

 MLM: randomly masked some words in the sentence, predict masked words with cross-entropy 
 over the vocabulary 
 NSP: 

The idea is that these models have a better understanding of context, where each word is represented as a 
linear combination of all the other words in the sentence, bi-directionally.
"""
with open('../data/fig_scores/models.pkl', 'rb') as file:
    models = pickle.load(file)

dh = 64

'ROBERTA base (l = 24, d = 1024, h = 16 ; 340M parameters)'
l = 12
d = 768
h = d // dh
model = RobertaModel.from_pretrained('roberta-base')
M = get_matricesBERT(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['ROBERTAbase'] = [l,d,h,dh,S,N]

'ROBERTA large (l = 24, d = 1024, h = 16 ; 340M parameters)'
l = 24
d = 1024
h = d // dh
model = RobertaModel.from_pretrained('roberta-large')
M = get_matricesBERT(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['ROBERTAlarge'] = [l,d,h,dh,S,N]

'DistillROBERTA base (l = 6, d = 768, h = 12 ; tot num parameters 82M)'
l = 6
d = 768
h = d // dh
model = AutoModelForMaskedLM.from_pretrained("distilbert/distilroberta-base")
M = get_matricesDistillROBERTA(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['DistillROBERTA'] = [l,d,h,dh,S,N]

'save'
with open('../data/fig_scores/models.pkl', 'wb') as file:
    pickle.dump(models, file)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of the model checkpoint at distilbert/distilroberta-base were not used when initializing RobertaForMaskedLM: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model

In [30]:
""" 
ALBERT models 
 - MODEL: Bidirectional, Encoder-only Transformer
 - DATASETS: BookCorpus & English Wikipedia
 - OBJECTIVES: Masked Language Modeling (MLM), Next Sentence Prediction (NSP)
 - METRICS: perplexity, cross-entropy

 MLM: randomly masked some words in the sentence, predict masked words with cross-entropy 
 over the vocabulary 
 NSP: 

The idea is that these models have a better understanding of context, where each word is represented as a 
linear combination of all the other words in the sentence, bi-directionally.
"""
with open('../data/fig_scores/models.pkl', 'rb') as file:
    models = pickle.load(file)
    
dh = 64

'ALBERT base model (l = 12, d = 768, h = 12 ; tot num parameters 11M)'
l = 1
d = 768
h = d // dh
model = AlbertModel.from_pretrained("albert-base-v2")
M = get_matricesALBERT(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['ALBERTbase'] = [l,d,h,dh,S,N]

'ALBERT large model (l = 24, d = 1024, h = 16 ; tot num parameters 17M)'
l = 1
d = 1024
h = d // dh
model = AlbertModel.from_pretrained("albert-large-v2")
M = get_matricesALBERT(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['ALBERTlarge'] = [l,d,h,dh,S,N]

'ALBERT xlarge model (l = 24, d = 2048, h = 16 ; tot num parameters 58M)'
dh = 64
l = 1
d = 2048
h = d // dh
model = AlbertModel.from_pretrained("albert-xlarge-v2")
M = get_matricesALBERT(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['ALBERTxlarge'] = [l,d,h,dh,S,N]

'ALBERT xxlarge model (l = 12, d = 4096, h = 64 ; tot num parameters 223M)'
dh = 64
l = 1
d = 4096
h = d // dh
model = AlbertModel.from_pretrained("albert-xxlarge-v2")
M = get_matricesALBERT(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['ALBERTxxlarge'] = [l,d,h,dh,S,N]

'save'
with open('../data/fig_scores/models.pkl', 'wb') as file:
    pickle.dump(models, file)

In [64]:
""" 
Generative Pre-trained Transformers (GPT) models 
 - MODEL: Unidirectional (causal), Decoder-only Transformer
 - DATASETS: BookCorpus & English Wikipedia
 - OBJECTIVES: Masked Language Modeling (MLM), Next Sentence Prediction (NSP)

The idea is that these models have a better understanding of context, where each word is represented as a 
linear combination of all the other words in the sentence.
"""
with open('../data/fig_scores/models.pkl', 'rb') as file:
    models = pickle.load(file)

dh = 64

'GPT 1 (l = 12, d = 768, h = 12 ; 110M parameters)'
l = 12
d = 768
h = d // dh
model = OpenAIGPTModel.from_pretrained("openai-gpt")
M = get_matricesGPT(d,model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['GPT'] = [l,d,h,dh,S,N]

'GPT2 (l = 12, d = 768, h = 12 ; 117M parameters)'
l = 12
d = 768
h = d // dh
model = GPT2Model.from_pretrained('gpt2')
M = get_matricesGPT(d,model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['GPT2'] = [l,d,h,dh,S,N]

'GPT2 medium (l = 24, d = 1024, h = 16 ; 345M parameters)'
l = 24
d = 1024
h = d // dh
model = GPT2Model.from_pretrained('gpt2-medium')
M = get_matricesGPT(d,model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['GPT2medium'] = [l,d,h,dh,S,N]

'GPT2 large (l = 36, d = 1280, h = 20 ; 774M parameters)'
l = 36
d = 1280
h = d // dh
model = GPT2Model.from_pretrained('gpt2-large')
M = get_matricesGPT(d,model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['GPT2large'] = [l,d,h,dh,S,N]

'GPT2 xl (l = 48, d = 1600, h = 25 ; 1558M parameters)'
l = 48
d = 1600
h = d // dh
model = GPT2Model.from_pretrained('gpt2-xl')
M = get_matricesGPT(d,model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['GPT2xl'] = [l,d,h,dh,S,N]

'DistillGPT2 base model (l = 12, d = 768, h = 12 ; tot num parameters 82M)'
l = 6
d = 768
h = d // dh
model = AutoModelForCausalLM.from_pretrained("distilbert/distilgpt2")
M = get_matricesDistillGPT(d,model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['DistillGPT2'] = [l,d,h,dh,S,N]

'save'
with open('../data/fig_scores/models.pkl', 'wb') as file:
    pickle.dump(models, file)

In [13]:
""" 
GPT Neo models (EleutherAI)
 - MODEL: Unidirectional (causal), Decoder-only Transformer
 - DATASETS: BookCorpus & English Wikipedia
 - OBJECTIVES: Masked Language Modeling (MLM), Next Sentence Prediction (NSP)

The idea is that these models have a better understanding of context, where each word is represented as a 
linear combination of all the other words in the sentence.
"""
with open('../data/fig_scores/models.pkl', 'rb') as file:
    models = pickle.load(file)

'GPT neo 125m (l = 12, d = 768, h = 12)'
dh = 64
l = 12
d = 768
h = d // dh
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-125m")
M = get_matricesGPTneo(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['GPTneo-125m'] = [l,d,h,dh,S,N]

'GPT neo 1.3b (l = 12, d = 768, h = 16)'
dh = 128
l = 24
d = 2048
h = d // dh
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
M = get_matricesGPTneo(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['GPTneo-1.3b'] = [l,d,h,dh,S,N]

'GPT neo 2.7b (l = 12, d = 768, h = 20)'
dh = 128
l = 32
d = 2560
h = d // dh
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-2.7B")
M = get_matricesGPTneo(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['GPTneo-2.7b'] = [l,d,h,dh,S,N]

'GPT neox 20b (l = 44, d = 768, h = 64)'
dh = 96
l = 44
d = 6144
h = d // dh
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b")
S, N = get_matricesGPTneox(d,l,h,dh,model)
models['GPTneox-20b'] = [l,d,h,dh,S,N]

'GPT-j 6b (l = 28, d = 4096, h = 16)'
dh = 256
l = 28
d = 4096
h = d // dh
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6b")
M = get_matricesGPTj(model)
S, N = decomposition_blocks(d,l,h,dh,M)
models['GPTj-6b'] = [l,d,h,dh,S,N]

'save'
with open('../data/fig_scores/models.pkl', 'wb') as file:
    pickle.dump(models, file)

44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44
44


In [7]:
""" 
Open Pre-trained Transformers (øPT) models 
 - MODEL: Unidirectional (causal), Decoder-only Transformer
 - DATASETS: BookCorpus & English Wikipedia
 - OBJECTIVES: Masked Language Modeling (MLM), Next Sentence Prediction (NSP)

The idea is that these models have a better understanding of context, where each word is represented as a 
linear combination of all the other words in the sentence.
"""
dh = 64

'OPT-125m (l = 12, d = 768, h = 12)'
l = 12
d = 768
h = d // dh
model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m")
M = get_matricesOPT(model)
SOPT125m, NOPT125m = decomposition_blocks(d,l,h,dh,M)

'OPT-350m (l = 24, d = 1024, h = 16)'
l = 24
d = 1024
h = d // dh
model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m")
M = get_matricesOPT(model)
SOPT350m, NOPT350m = decomposition_blocks(d,l,h,dh,M)

'OPT-1.3b (l = 24, d = 2048, h = 32)'
l = 24
d = 2048
h = d // dh
model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b")
M = get_matricesOPT(model)
SOPT1_3b, NOPT1_3b = decomposition_blocks(d,l,h,dh,M)

'OPT-2.7b (l = 24, d = 2048, h = 32)'
l = 24
d = 2048
h = d // dh
model = AutoModelForCausalLM.from_pretrained("facebook/opt-2.7b")
M = get_matricesOPT(model)
SOPT2_7b, NOPT2_7b = decomposition_blocks(d,l,h,dh,M)

'OPT-6.7b (l = 24, d = 2048, h = 32)'
l = 24
d = 2048
h = d // dh
model = AutoModelForCausalLM.from_pretrained("facebook/opt-6.7b")
M = get_matricesOPT(model)
SOPT6_7b, NOPT6_7b = decomposition_blocks(d,l,h,dh,M)

'OPT-13b (l = 24, d = 2048, h = 32)'
l = 24
d = 2048
h = d // dh
model = AutoModelForCausalLM.from_pretrained("facebook/opt-13b")
M = get_matricesOPT(model)
SOPT13b, NOPT13b = decomposition_blocks(d,l,h,dh,M)

print('done')

'OPT-30b (l = 24, d = 2048, h = 32)'
l = 24
d = 2048
h = d // dh
model = AutoModelForCausalLM.from_pretrained("facebook/opt-30b")
M = get_matricesOPT(model)
SOPT30b, NOPT30b = decomposition_blocks(d,l,h,dh,M)

print('done')

'OPT-66b (l = 24, d = 2048, h = 32)'
l = 24
d = 2048
h = d // dh
model = AutoModelForCausalLM.from_pretrained("facebook/opt-66b")
M = get_matricesOPT(model)
SOPT66b, NOPT66b = decomposition_blocks(d,l,h,dh,M)

Loading checkpoint shards: 100%|██████████| 2/2 [00:43<00:00, 21.51s/it]
Loading checkpoint shards: 100%|██████████| 3/3 [01:32<00:00, 30.89s/it]


done


KeyboardInterrupt: 

In [11]:
""" 

T5 models 
 - MODEL: 
 - DATASETS: BookCorpus & English Wikipedia
 - OBJECTIVES: Masked Language Modeling (MLM), Next Sentence Prediction (NSP)

BERT base (l = 12, d = 768, h = 12 ; 110M parameters)
"""

'T5 small model (l = 12, d = 768, h = 12 ; tot num parameters 110M)'
dh = 64

l = 6
d = 512
h = d // dh

model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")
M = get_matricesT5(model)
ST5_small, NT5_small = decomposition_blocks(d,l,h,dh,M)

'T5 base model (l = 12, d = 768, h = 12 ; tot num parameters 110M)'
dh = 64

l = 12
d = 768
h = d // dh

model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-base")
M = get_matricesT5(model)
ST5_base, NT5_base = decomposition_blocks(d,l,h,dh,M)

'T5 large model (l = 12, d = 768, h = 12 ; tot num parameters 110M)'
dh = 32

l = 24
d = 1024
h = d // dh

model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-large")
M = get_matricesT5(model)
ST5_large, NT5_large = decomposition_blocks(d,l,h,dh,M)

'T5 3B model (l = 12, d = 768, h = 12 ; tot num parameters 110M)'
dh = 8

l = 24
d = 1024
h = d // dh

model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-3B")
M = get_matricesT5(model)
ST5_3B, NT5_3B = decomposition_blocks(d,l,h,dh,M)