In [4]:
# make sure in the project's roots' directory
%cd ../
%pwd

/Users/haozhezh/Documents/Research/CMU/AudioFeaturizationAttack/Kirigami_Publish/Kirigami-private-audio


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


'/Users/haozhezh/Documents/Research/CMU/AudioFeaturizationAttack/Kirigami_Publish/Kirigami-private-audio'

In [5]:
import torch
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
import scipy as sp
import soundfile
import os

## Speech Detector

### Prepare dataset

In [6]:
def fft_features_with_tags(wav_form, speech_start, speech_end, speech_start2, window_size=256, non_overlap=128):
  _, _, stft = sp.signal.stft(x=wav_form, fs=16000, nperseg=window_size, noverlap=non_overlap)
  speech_start_window = (speech_start - window_size + non_overlap) // non_overlap
  speech_end_window = (speech_end-window_size+non_overlap) // non_overlap
  speed_start2_window = (speech_start2 - window_size + non_overlap) // non_overlap
  features = []
  stft = stft.transpose((1, 0))
  tags = np.zeros(len(stft))
  tags[:] = 0
  tags[speech_start_window:speech_end_window] = 1
  tags[speed_start2_window:] = 1
  stft = np.abs(stft)
  for fft in stft:
    features.append(fft)
  return features, tags

def data_prep_io(interleave_wavs_path, interleave_csv_path):
    train_folds = [1, 2, 3]
    valid_folds = [4]
    test_folds = [5]

    interleave_df = pd.read_csv(interleave_csv_path)
    train_x = []
    train_tags = []
    valid_x = []
    valid_tags = []
    test_x = []
    test_tags = []

    for idx, row in interleave_df.iterrows():
        wav_form, sprate = soundfile.read(interleave_wavs_path + row['wav'])
        fold_n = (idx % 5) + 1
        features, tags = fft_features_with_tags(wav_form=wav_form, speech_start=row['wrd_start'], speech_end=row['wrd_end'], speech_start2=row['wrd_start2'])
        if fold_n in train_folds:
          train_x.extend(features)
          train_tags.extend(tags)
        if fold_n in valid_folds:
          valid_x.extend(features)
          valid_tags.extend(tags)
        if fold_n in test_folds:
          test_x.extend(features)
          test_tags.extend(tags)
    train_x, valid_x, test_x = np.asarray(train_x), np.asarray(valid_x), np.asarray(test_x)
    train_tags, valid_tags, test_tags = np.asarray(train_tags), np.asarray(valid_tags), np.asarray(test_tags)
    return train_x, train_tags, valid_x, valid_tags, test_x, test_tags

In [7]:
INTERLEAVE_WAV_PATH = './datasets/timit_interleave_on_esc50/'
INTERLEAVE_CSV_PATH = './datasets/timit_interleave_on_esc50/timit_interleave_on_esc50_path.csv'

batch_size = 128
model_path = "./results/filters/"
if not os.path.exists(model_path):
    os.makedirs(model_path)
my_train_x, my_train_tags, my_valid_x, my_valid_tags, my_test_x, my_test_tags = \
data_prep_io(interleave_wavs_path=INTERLEAVE_WAV_PATH, interleave_csv_path=INTERLEAVE_CSV_PATH)

In [8]:
class LogisticRegressionClassifier(torch.nn.Module):
    def __init__(self, feature_dim=129):
        super(LogisticRegressionClassifier, self).__init__()
        self.linear1 = torch.nn.Linear(feature_dim, 1)
        self.sigmoid = torch.nn.Sigmoid()
    def forward(self, xx):
        xx = self.linear1(torch.nn.functional.normalize(xx, p=1.0, dim = 1))
        return self.sigmoid(xx)

### Train the speech detector model

In [9]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tensor_Xp_train = torch.stack([torch.Tensor(el) for el in my_train_x]).to(DEVICE)
tensor_yp_train = torch.stack([torch.Tensor([el]) for el in my_train_tags]).to(DEVICE)
dataset_p_train = TensorDataset(tensor_Xp_train, tensor_yp_train)
loader_p_train = DataLoader(dataset_p_train, shuffle=True, batch_size=batch_size)

tensor_Xp_valid = torch.stack([torch.Tensor(el) for el in my_valid_x]).to(DEVICE)
tensor_yp_valid = torch.stack([torch.Tensor([el]) for el in my_valid_tags]).to(DEVICE)
dataset_p_valid = TensorDataset(tensor_Xp_valid, tensor_yp_valid)
loader_p_valid = DataLoader(dataset_p_valid, shuffle=True, batch_size=batch_size)

tensor_Xp_test = torch.stack([torch.Tensor(el) for el in my_test_x]).to(DEVICE)
tensor_yp_test = torch.stack([torch.Tensor([el]) for el in my_test_tags]).to(DEVICE)
dataset_p_test = TensorDataset(tensor_Xp_test, tensor_yp_test)
loader_p_test = DataLoader(dataset_p_test, shuffle=True, batch_size=batch_size)

In [10]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
my_model = LogisticRegressionClassifier(feature_dim=129).to(DEVICE)

In [12]:
bce = torch.nn.BCELoss()
optim =  torch.optim.Adam(my_model.parameters(), lr=0.001)

In [1]:
from tqdm import tqdm
from sklearn.metrics import accuracy_score
max_epoch=30

s = tqdm(range(0, int(max_epoch)),desc='Training Epochs')

validating_frequency = 1

for epoch in s:
  my_model.train()
  total_loss = 0
  total_samples = 0
  for tr_x, tr_y in loader_p_train:
    y_pred = my_model(tr_x)
    current_loss = bce(y_pred, tr_y)
    current_loss.backward()
    optim.step()
    total_loss = total_loss + (current_loss.detach().cpu().numpy()) * tr_x.shape[0]
    total_samples = total_samples + tr_x.shape[0]
    batch_train_accuracy_privacy = (torch.sum((tr_y == 1) & (y_pred >= 0.5)) + torch.sum((tr_y  == 0) & (y_pred < 0.5)))/ (y_pred.shape[0])

  if (epoch+1) % validating_frequency == 0:
    my_model.eval()
    total_y_pred = []
    total_y_truth = []
    for vl_x, vl_y in loader_p_valid:
      y_pred = my_model(vl_x)
      y_pred = (y_pred >= 0.5).long()
      total_y_pred.extend(y_pred.cpu().numpy())
      total_y_truth.extend(vl_y.cpu().numpy())

    valid_accuracy = accuracy_score(total_y_truth, total_y_pred)
    s.set_postfix(validation_accuracy = valid_accuracy)
    print(f"Epoch {epoch + 1}, Validation Accuracy {valid_accuracy:.6f}")
    torch.save(my_model.state_dict(), f"{model_path}/phoneme_filter_{epoch}.ckpt")

Training Epochs:   0%|          | 0/30 [00:00<?, ?it/s]


NameError: name 'my_model' is not defined

In [None]:
my_model.eval()
total_y_pred = []
total_y_truth = []
for vl_x, vl_y in loader_p_test:
  y_pred = my_model(vl_x)
  y_pred = (y_pred >= 0.5).long()
  total_y_pred.extend(y_pred.cpu().numpy())
  total_y_truth.extend(vl_y.cpu().numpy())

test_accuracy = accuracy_score(total_y_truth, total_y_pred)
print("Test_Accuracy", test_accuracy)