In [1]:
import torch
import numpy as np
from sklearn.metrics import mean_squared_error

# Sample data
y_true_np = np.array([1, 2, 3, 4])
y_pred_np = np.array([1.1, 1.9, 3.2, 3.8])

# Convert numpy arrays to torch tensors
y_true_torch = torch.tensor(y_true_np, dtype=torch.float32)
y_pred_torch = torch.tensor(y_pred_np, dtype=torch.float32)

# Calculate MSE using PyTorch's nn.MSELoss
mse_loss_torch = torch.nn.MSELoss()
mse_torch = mse_loss_torch(y_true_torch, y_pred_torch).item()

# Calculate MSE using sklearn's mean_squared_error
mse_sklearn = mean_squared_error(y_true_np, y_pred_np)

print("MSE using PyTorch:", mse_torch)
print("MSE using scikit-learn:", mse_sklearn)


MSE using PyTorch: 0.025000009685754776
MSE using scikit-learn: 0.025000000000000043


In [1]:
import os
import numpy as np
from tqdm import tqdm
from multiprocessing import Pool, cpu_count

import torch

import neurox.interpretation.utils as utils
import neurox.interpretation.ablation as ablation
import neurox.interpretation.linear_probe as linear_probe

from ttsxai.utils.utils import read_ljs_metadata


log_dir = '/nas/users/dahye/kw/tts/ttsxai/logs/probe_tacotron2_duration'
data_activation_dir = "/nas/users/dahye/kw/tts/ttsxai/data_activation/LJSpeech/tacotron2_waveglow"

In [20]:
mode = 'test'    

# Dictionary keys to filter
keys_to_filter = read_ljs_metadata(mode=mode)

# List to store filtered paths
npz_files = []

# Iterate over all files in the directory
for file in os.listdir(data_activation_dir):
    # Check only for .npz files
    if file.endswith('.npz'):
        # Extract the identifier part from the file name (e.g., 'LJ037-0213')
        identifier = file.split('.')[0]

        # If this identifier is included in the dictionary keys, add to the list
        if identifier in keys_to_filter:
            full_path = os.path.join(data_activation_dir, file)
            npz_files.append(full_path)

In [21]:
tokens = {'source': [], 'target': []}
articulatory_features = []
dict_activations = []
for file in tqdm(npz_files):
    data_dict = np.load(file, allow_pickle=True)
    tokens['source'].append(list(data_dict['phonesymbols']))
    tokens['target'].append(list(data_dict['duration']))
    articulatory_features.append(list(data_dict['articulatory_features']))
    dict_activations.append(data_dict['activations'].item())
# return tokens, dict_activations

100%|██████████| 500/500 [00:02<00:00, 240.73it/s]


In [22]:
# Concatenate activations
activations = [np.concatenate(list(d.values()), axis=1) for d in dict_activations]

In [32]:
# filtering
ignore_tags = ['Space', 'Punctuation']

filtered_source_tokens = []
filtered_target_tokens = []
filtered_activations = []

for source_sentence, target_sentence, articulatory_feature, activation in zip(tokens['source'], tokens['target'], articulatory_features, activations):
    filtered_source_sentence = []
    filtered_target_sentence = []
    filtered_activation = []
    for source_token, target_token, af, a in zip(source_sentence, target_sentence, articulatory_feature, activation):
        if af not in ignore_tags:
            filtered_source_sentence.append(source_token)
            filtered_target_sentence.append(target_token)
            if target_token == 0:
                print(source_token, af)
            filtered_activation.append(a)
    filtered_source_tokens.append(np.array(filtered_source_sentence))
    filtered_target_tokens.append(np.array(filtered_target_sentence))
    filtered_activations.append(np.array(filtered_activation))

AH0 Labiodental
DH Vowel
T Vowel
N Alveolar
DH Vowel
AH0 Alveolar
N Vowel
AO1 Post-Alveolar
AH0 Bilabial
W Vowel
AH0 Vowel
AO2 Dental
AO1 Alveolar
AO1 Velar
B Velar
DH Labiodental
T Alveolar
N Alveolar
N Alveolar
B Vowel
AO1 Vowel
B Vowel
AO1 Labiodental
R Vowel
R Labiodental
R Alveolar
DH Vowel
DH Alveolar
DH Vowel
R Alveolar
AH0 Vowel
N Vowel
N Vowel
DH Labiodental
N Vowel
AO1 Bilabial
AO1 Alveolar
R Alveolar
AO1 Vowel
R Alveolar
JH Alveolar
AO1 Alveolar
AO1 Alveolar
B Alveolar
T Vowel
AH0 Alveolar
N Bilabial
IH1 Labio-Velar
D Vowel
N Labiodental
AE1 Vowel
AH0 Bilabial
N Vowel
K Labiodental
AO1 Vowel
AO1 Vowel
R Vowel
N Vowel
P Vowel
N Vowel
DH Vowel
AO1 Vowel
N Alveolar
AO1 Alveolar
DH Vowel
AO1 Alveolar
AO1 Alveolar
R Vowel
AH0 Alveolar
N Bilabial
AO1 Vowel
G Bilabial
N Vowel
DH Labiodental
L Alveolar
AO1 Alveolar
P Alveolar
HH Post-Alveolar
N Vowel
K Alveolar
AO1 Vowel
AH0 Labiodental
AO1 Vowel
B Vowel
N Post-Alveolar
N Labiodental
AO1 Dental
DH Alveolar
AH0 Vowel
R Alveolar
AO1 B

In [17]:
tokens['source'][0]

['DH',
 'AH0',
 ' ',
 'F',
 'AO1',
 'R',
 'M',
 'Z',
 ' ',
 'AH1',
 'V',
 ' ',
 'P',
 'R',
 'IH1',
 'N',
 'T',
 'AH0',
 'D',
 ' ',
 'L',
 'EH1',
 'T',
 'ER0',
 'Z',
 ' ',
 'SH',
 'UH1',
 'D',
 ' ',
 'B',
 'IY1',
 ' ',
 'B',
 'Y',
 'UW1',
 'T',
 'AH0',
 'F',
 'AH0',
 'L',
 ',',
 ' ',
 'AH0',
 'N',
 'D',
 ' ',
 'DH',
 'AE1',
 'T',
 ' ',
 'DH',
 'EH1',
 'R',
 ' ',
 'ER0',
 'EY1',
 'N',
 'JH',
 'M',
 'AH0',
 'N',
 'T',
 ' ',
 'AA1',
 'N',
 ' ',
 'DH',
 'AH0',
 ' ',
 'P',
 'EY1',
 'JH',
 ' ',
 'SH',
 'UH1',
 'D',
 ' ',
 'B',
 'IY1',
 ' ',
 'R',
 'IY1',
 'Z',
 'AH0',
 'N',
 'AH0',
 'B',
 'AH0',
 'L',
 ' ',
 'AH0',
 'N',
 'D',
 ' ',
 'EY1',
 ' ',
 'HH',
 'EH1',
 'L',
 'P',
 ' ',
 'T',
 'UW1',
 ' ',
 'DH',
 'AH0',
 ' ',
 'SH',
 'EY1',
 'P',
 'AH0',
 'L',
 'IH2',
 'N',
 'Z',
 ' ',
 'AH1',
 'V',
 ' ',
 'DH',
 'AH0',
 ' ',
 'L',
 'EH1',
 'T',
 'ER0',
 'Z',
 ' ',
 'DH',
 'EH0',
 'M',
 'S',
 'EH1',
 'L',
 'V',
 'Z',
 '.']

In [19]:
for source_sentence, target_sentence, activation in zip(tokens['source'], tokens['target'], activations):
    

[1,
 2,
 1,
 11,
 9,
 7,
 7,
 8,
 0,
 3,
 4,
 0,
 7,
 3,
 6,
 3,
 4,
 7,
 4,
 0,
 6,
 10,
 3,
 12,
 9,
 0,
 8,
 4,
 3,
 0,
 4,
 6,
 2,
 8,
 4,
 10,
 2,
 6,
 11,
 4,
 8,
 23,
 25,
 2,
 5,
 3,
 0,
 3,
 9,
 5,
 0,
 3,
 5,
 7,
 0,
 16,
 13,
 3,
 6,
 6,
 4,
 2,
 3,
 0,
 9,
 3,
 0,
 2,
 2,
 1,
 6,
 17,
 5,
 0,
 10,
 3,
 3,
 0,
 4,
 5,
 2,
 9,
 12,
 6,
 1,
 4,
 4,
 5,
 5,
 7,
 6,
 2,
 5,
 5,
 0,
 13,
 0,
 8,
 7,
 5,
 8,
 0,
 4,
 5,
 3,
 3,
 2,
 1,
 13,
 9,
 8,
 2,
 7,
 7,
 8,
 7,
 0,
 5,
 4,
 0,
 3,
 3,
 2,
 7,
 9,
 2,
 11,
 7,
 0,
 4,
 2,
 5,
 13,
 8,
 11,
 6,
 13,
 17]

In [25]:
tokens['source'] = filtered_source_tokens
tokens['target'] = filtered_target_tokens
activations = filtered_activations

In [26]:
X, y, mapping = utils.create_tensors(
    tokens, activations, 'NN', task_type='regression')
src2idx, idx2src = mapping

Number of tokens:  34245
length of source dictionary:  68
34245
Total instances: 34245
['B', 'JH', 'AA0', 'ER1', 'ZH', 'D', 'IH2', 'K', 'AY1', 'AA1', 'IH1', 'OW1', 'Z', 'V', 'AO2', 'SH', 'EY1', 'Y', 'EY0', 'AY2']
Number of samples:  34245


In [21]:
X.shape

(43250, 2048)

In [13]:
activations.shape

AttributeError: 'list' object has no attribute 'shape'

In [6]:
tokens['source']

[['DH',
  'AH0',
  ' ',
  'F',
  'AO1',
  'R',
  'M',
  'Z',
  ' ',
  'AH1',
  'V',
  ' ',
  'P',
  'R',
  'IH1',
  'N',
  'T',
  'AH0',
  'D',
  ' ',
  'L',
  'EH1',
  'T',
  'ER0',
  'Z',
  ' ',
  'SH',
  'UH1',
  'D',
  ' ',
  'B',
  'IY1',
  ' ',
  'B',
  'Y',
  'UW1',
  'T',
  'AH0',
  'F',
  'AH0',
  'L',
  ',',
  ' ',
  'AH0',
  'N',
  'D',
  ' ',
  'DH',
  'AE1',
  'T',
  ' ',
  'DH',
  'EH1',
  'R',
  ' ',
  'ER0',
  'EY1',
  'N',
  'JH',
  'M',
  'AH0',
  'N',
  'T',
  ' ',
  'AA1',
  'N',
  ' ',
  'DH',
  'AH0',
  ' ',
  'P',
  'EY1',
  'JH',
  ' ',
  'SH',
  'UH1',
  'D',
  ' ',
  'B',
  'IY1',
  ' ',
  'R',
  'IY1',
  'Z',
  'AH0',
  'N',
  'AH0',
  'B',
  'AH0',
  'L',
  ' ',
  'AH0',
  'N',
  'D',
  ' ',
  'EY1',
  ' ',
  'HH',
  'EH1',
  'L',
  'P',
  ' ',
  'T',
  'UW1',
  ' ',
  'DH',
  'AH0',
  ' ',
  'SH',
  'EY1',
  'P',
  'AH0',
  'L',
  'IH2',
  'N',
  'Z',
  ' ',
  'AH1',
  'V',
  ' ',
  'DH',
  'AH0',
  ' ',
  'L',
  'EH1',
  'T',
  'ER0',
  'Z',
  ' ',
  'DH',

In [5]:
tokens['target']

[[1,
  2,
  1,
  11,
  9,
  7,
  7,
  8,
  0,
  3,
  4,
  0,
  7,
  3,
  6,
  3,
  4,
  7,
  4,
  0,
  6,
  10,
  3,
  12,
  9,
  0,
  8,
  4,
  3,
  0,
  4,
  6,
  2,
  8,
  4,
  10,
  2,
  6,
  11,
  4,
  8,
  23,
  25,
  2,
  5,
  3,
  0,
  3,
  9,
  5,
  0,
  3,
  5,
  7,
  0,
  16,
  13,
  3,
  6,
  6,
  4,
  2,
  3,
  0,
  9,
  3,
  0,
  2,
  2,
  1,
  6,
  17,
  5,
  0,
  10,
  3,
  3,
  0,
  4,
  5,
  2,
  9,
  12,
  6,
  1,
  4,
  4,
  5,
  5,
  7,
  6,
  2,
  5,
  5,
  0,
  13,
  0,
  8,
  7,
  5,
  8,
  0,
  4,
  5,
  3,
  3,
  2,
  1,
  13,
  9,
  8,
  2,
  7,
  7,
  8,
  7,
  0,
  5,
  4,
  0,
  3,
  3,
  2,
  7,
  9,
  2,
  11,
  7,
  0,
  4,
  2,
  5,
  13,
  8,
  11,
  6,
  13,
  17],
 [5,
  2,
  0,
  7,
  4,
  5,
  0,
  11,
  13,
  3,
  0,
  6,
  5,
  0,
  10,
  20,
  8,
  4,
  0,
  8,
  12,
  17,
  10,
  0,
  4,
  8,
  5,
  5,
  4,
  6,
  11,
  4,
  4,
  5,
  0,
  6,
  5,
  11,
  6,
  11,
  7,
  8,
  6,
  14],
 [1,
  7,
  0,
  15,
  10,
  6,
  6,
  3,
  5,
  11,
  7,


In [3]:
npz_files

['/nas/users/dahye/kw/tts/ttsxai/data_activation/LJSpeech/tacotron2_waveglow/LJ001-0015.npz',
 '/nas/users/dahye/kw/tts/ttsxai/data_activation/LJSpeech/tacotron2_waveglow/LJ023-0122.npz',
 '/nas/users/dahye/kw/tts/ttsxai/data_activation/LJSpeech/tacotron2_waveglow/LJ037-0248.npz',
 '/nas/users/dahye/kw/tts/ttsxai/data_activation/LJSpeech/tacotron2_waveglow/LJ037-0249.npz',
 '/nas/users/dahye/kw/tts/ttsxai/data_activation/LJSpeech/tacotron2_waveglow/LJ037-0252.npz',
 '/nas/users/dahye/kw/tts/ttsxai/data_activation/LJSpeech/tacotron2_waveglow/LJ001-0051.npz',
 '/nas/users/dahye/kw/tts/ttsxai/data_activation/LJSpeech/tacotron2_waveglow/LJ024-0018.npz',
 '/nas/users/dahye/kw/tts/ttsxai/data_activation/LJSpeech/tacotron2_waveglow/LJ024-0019.npz',
 '/nas/users/dahye/kw/tts/ttsxai/data_activation/LJSpeech/tacotron2_waveglow/LJ001-0063.npz',
 '/nas/users/dahye/kw/tts/ttsxai/data_activation/LJSpeech/tacotron2_waveglow/LJ024-0034.npz',
 '/nas/users/dahye/kw/tts/ttsxai/data_activation/LJSpeech/ta