In [None]:
!pip install tensorflow



In [None]:
import os
import numpy as np
import pandas as pd
import wave
from google.colab import drive
import tensorflow.compat.v1 as tf
import zipfile
import librosa
from keras import layers
import math
import shutil
from tqdm import tqdm

prefix = '/content/drive/MyDrive/DAIC-WOZ'
assert os.path.exists(prefix), "Data folder not found in Google Drive."

In [None]:
# Load CSV files
train_split_df = pd.read_csv(os.path.join(prefix, 'train_split_Depression_AVEC2017.csv'))
test_split_df = pd.read_csv(os.path.join(prefix, 'dev_split_Depression_AVEC2017.csv'))

train_split_num = train_split_df['Participant_ID'].tolist()
test_split_num = test_split_df['Participant_ID'].tolist()
train_split_label = train_split_df['PHQ8_Binary'].tolist()
test_split_label = test_split_df['PHQ8_Binary'].tolist()

# Load queries.txt
with open(os.path.join(prefix, 'queries.txt')) as f:
    queries = f.readlines()

In [None]:
def identify_topics(sentence):
    for query in queries:
        query = query.strip('\n')
        sentence = sentence.strip('\n')
        if query == sentence:
            return True
    return False

cluster_size = 16

def wav2vlad(wave_data, sr):
    global cluster_size
    signal = wave_data
    melspec = librosa.feature.melspectrogram(y=signal, n_mels=80, sr=sr).T
    melspec = np.log(np.maximum(1e-6, melspec))
    feature_size = melspec.shape[1]
    max_samples = melspec.shape[0]
    output_dim = cluster_size * 16
    feat = NetVLAD(feature_size=feature_size, max_samples=max_samples, \
                            cluster_size=cluster_size, output_dim=output_dim) \
                                (tf.convert_to_tensor(melspec))
    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        r = feat.numpy()
    return r

class NetVLAD(layers.Layer):
    """Creates a NetVLAD class.
    """
    def __init__(self, feature_size, max_samples, cluster_size, output_dim, **kwargs):

        self.feature_size = feature_size
        self.max_samples = max_samples
        self.output_dim = output_dim
        self.cluster_size = cluster_size
        super(NetVLAD, self).__init__(**kwargs)

    def build(self, input_shape):
    # Create a trainable weight variable for this layer.
        self.cluster_weights = self.add_weight(name='kernel_W1',
                                      shape=(self.feature_size, self.cluster_size),
                                      initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(self.feature_size)),
                                      trainable=True)
        self.cluster_biases = self.add_weight(name='kernel_B1',
                                      shape=(self.cluster_size,),
                                      initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(self.feature_size)),
                                      trainable=True)
        self.cluster_weights2 = self.add_weight(name='kernel_W2',
                                      shape=(1,self.feature_size, self.cluster_size),
                                      initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(self.feature_size)),
                                      trainable=True)
        self.hidden1_weights = self.add_weight(name='kernel_H1',
                                      shape=(self.cluster_size*self.feature_size, self.output_dim),
                                      initializer=tf.random_normal_initializer(stddev=1 / math.sqrt(self.cluster_size)),
                                      trainable=True)

        super(NetVLAD, self).build(input_shape)  # Be sure to call this at the end

    def call(self, reshaped_input):
        """Forward pass of a NetVLAD block.

        Args:
        reshaped_input: If your input is in that form:
        'batch_size' x 'max_samples' x 'feature_size'
        It should be reshaped in the following form:
        'batch_size*max_samples' x 'feature_size'
        by performing:
        reshaped_input = tf.reshape(input, [-1, features_size])

        Returns:
        vlad: the pooled vector of size: 'batch_size' x 'output_dim'
        """
        """
        In Keras, there are two way to do matrix multiplication (dot product)
        1) K.dot : AxB -> when A has batchsize and B doesn't, use K.dot
        2) tf.matmul: AxB -> when A and B both have batchsize, use tf.matmul

        Error example: Use tf.matmul when A has batchsize (3 dim) and B doesn't (2 dim)
        ValueError: Shape must be rank 2 but is rank 3 for 'net_vlad_1/MatMul' (op: 'MatMul') with input shapes: [?,21,64], [64,3]

        tf.matmul might still work when the dim of A is (?,64), but this is too confusing.
        Just follow the above rules.
        """
        activation = tf.matmul(reshaped_input, self.cluster_weights)

        activation += self.cluster_biases

        activation = tf.nn.softmax(activation)

        activation = tf.reshape(activation,
                [-1, self.max_samples, self.cluster_size])

        a_sum = tf.reduce_sum(activation,-2,keep_dims=True)

        a = tf.multiply(a_sum,self.cluster_weights2)

        activation = tf.transpose(activation,perm=[0,2,1])

        reshaped_input = tf.reshape(reshaped_input,[-1,
            self.max_samples, self.feature_size])

        vlad = tf.matmul(activation,reshaped_input)
        vlad = tf.transpose(vlad,perm=[0,2,1])
        vlad = tf.subtract(vlad,a)
        vlad = tf.nn.l2_normalize(vlad,1)
        vlad = tf.reshape(vlad,[-1, self.cluster_size*self.feature_size])
        vlad = tf.nn.l2_normalize(vlad,1)
        vlad = tf.matmul(vlad, self.hidden1_weights)

        return vlad

    def compute_output_shape(self, input_shape):
        return tuple([None, self.output_dim])

def extract_features(number, extract_zip = False):
    # Extract ZIP file
    if extract_zip:
        zip_path = os.path.join(prefix, f'{number}_P.zip')
        extract_dir = os.path.join(prefix, f'{number}_P')

        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_dir)

    # Load the transcript and audio data
    transcript_path = os.path.join(prefix, f'{number}_P/{number}_TRANSCRIPT.csv')
    audio_path = os.path.join(prefix, f'{number}_P/{number}_AUDIO.wav')

    transcript = pd.read_csv(transcript_path, sep='\t').fillna('')
    wavefile = wave.open(audio_path, 'r')
    sr = wavefile.getframerate()
    nframes = wavefile.getnframes()
    wave_data = np.frombuffer(wavefile.readframes(nframes), dtype=np.short)

    response = ''
    audio_feats = []
    text_feats = []
    signal = []

    for t in transcript.itertuples():
        # Check if the question is asked by "Ellie" and contains specific phrases
        if getattr(t, 'speaker') == 'Ellie' and (identify_topics(getattr(t, 'value')) or 'i think i have asked everything' in getattr(t, 'value')):
            if len(signal) == 0:
                continue
            audio_feats.append(wav2vlad(signal, sr))
            text_feats.append(response)
            response = ''
            signal = []
        elif getattr(t, 'speaker') == 'Participant':
            if 'scrubbed_entry' in getattr(t, 'value'):
                continue
            start_time = int(getattr(t, 'start_time') * sr)
            stop_time = int(getattr(t, 'stop_time') * sr)
            response += ' ' + getattr(t, 'value')
            signal = np.hstack((signal, wave_data[start_time:stop_time].astype(float)))

    print(f'{number}_P feature done size {np.shape(audio_feats)}')

    # Clean up extracted files to free up space
    if extract_zip:
        shutil.rmtree(extract_dir)
    return audio_feats, text_feats

In [None]:
save_dir = os.path.join('/content/drive/MyDrive', 'DAIC-Features')

# Ensure the directory exists; if not, create it
os.makedirs(save_dir, exist_ok=True)

audio_features_train, text_features_train, targets_train = [], [], []
audio_features_test, text_features_test, targets_test = [], [], []

# Process training set
print("Processing training data...")
for index in tqdm(range(len(train_split_num)), desc="Training Set"):
    audio_feats, text_feats = extract_features(train_split_num[index])
    audio_features_train.append(audio_feats)
    text_features_train.append(text_feats)
    targets_train.append(train_split_label[index])

print("Saving training data to Google Drive...")
# Save each audio feature and text feature list separately with participant IDs as keys
np.savez(
    os.path.join(save_dir, 'train_audio_clf.npz'),
    **{f'audio_{train_split_num[i]}': feat for i, feat in enumerate(audio_features_train)}
)
np.savez(
    os.path.join(save_dir, 'train_text_clf.npz'),
    **{f'text_{train_split_num[i]}': feat for i, feat in enumerate(text_features_train)}
)
np.savez(
    os.path.join(save_dir, 'train_label_clf.npz'),
    **{f'label_{train_split_num[i]}': targets_train[i] for i in range(len(targets_train))}
)

# Process test set
print("Processing test data...")
for index in tqdm(range(len(test_split_num)), desc="Test Set"):
    audio_feats, text_feats = extract_features(test_split_num[index])
    audio_features_test.append(audio_feats)
    text_features_test.append(text_feats)
    targets_test.append(test_split_label[index])

print("Saving test data to Google Drive...")
# Save each audio feature and text feature list separately with participant IDs as keys
np.savez(
    os.path.join(save_dir, 'test_audio_clf.npz'),
    **{f'audio_{test_split_num[i]}': feat for i, feat in enumerate(audio_features_test)}
)
np.savez(
    os.path.join(save_dir, 'test_text_clf.npz'),
    **{f'text_{test_split_num[i]}': feat for i, feat in enumerate(text_features_test)}
)
np.savez(
    os.path.join(save_dir, 'test_label_clf.npz'),
    **{f'label_{test_split_num[i]}': targets_test[i] for i in range(len(targets_test))}
)

Processing training data...


Training Set:   1%|          | 1/107 [00:07<13:00,  7.36s/it]

303_P feature done size (34, 1, 256)


Training Set:   2%|▏         | 2/107 [00:13<11:55,  6.82s/it]

304_P feature done size (46, 1, 256)


Training Set:   3%|▎         | 3/107 [00:22<13:14,  7.64s/it]

305_P feature done size (44, 1, 256)


Training Set:   4%|▎         | 4/107 [00:28<12:09,  7.08s/it]

310_P feature done size (46, 1, 256)


Training Set:   5%|▍         | 5/107 [00:35<12:02,  7.08s/it]

312_P feature done size (41, 1, 256)


Training Set:   6%|▌         | 6/107 [00:41<11:22,  6.75s/it]

313_P feature done size (42, 1, 256)


Training Set:   7%|▋         | 7/107 [00:51<12:55,  7.76s/it]

315_P feature done size (47, 1, 256)


Training Set:   7%|▋         | 8/107 [00:59<12:56,  7.84s/it]

316_P feature done size (54, 1, 256)


Training Set:   8%|▊         | 9/107 [01:08<13:06,  8.02s/it]

317_P feature done size (49, 1, 256)


Training Set:   9%|▉         | 10/107 [01:13<11:40,  7.22s/it]

318_P feature done size (38, 1, 256)


Training Set:  10%|█         | 11/107 [01:20<11:26,  7.15s/it]

319_P feature done size (43, 1, 256)


Training Set:  11%|█         | 12/107 [01:30<12:40,  8.01s/it]

320_P feature done size (62, 1, 256)


Training Set:  12%|█▏        | 13/107 [01:40<13:29,  8.61s/it]

321_P feature done size (58, 1, 256)


Training Set:  13%|█▎        | 14/107 [01:49<13:33,  8.75s/it]

322_P feature done size (53, 1, 256)


Training Set:  14%|█▍        | 15/107 [01:57<12:50,  8.37s/it]

324_P feature done size (43, 1, 256)


Training Set:  15%|█▍        | 16/107 [02:02<11:32,  7.61s/it]

325_P feature done size (37, 1, 256)


Training Set:  16%|█▌        | 17/107 [02:12<12:15,  8.18s/it]

326_P feature done size (56, 1, 256)


Training Set:  17%|█▋        | 18/107 [02:20<12:18,  8.30s/it]

327_P feature done size (55, 1, 256)


Training Set:  18%|█▊        | 19/107 [02:28<11:48,  8.06s/it]

328_P feature done size (41, 1, 256)


Training Set:  19%|█▊        | 20/107 [02:35<11:25,  7.87s/it]

330_P feature done size (46, 1, 256)


Training Set:  20%|█▉        | 21/107 [02:44<11:22,  7.94s/it]

333_P feature done size (46, 1, 256)


Training Set:  21%|██        | 22/107 [02:54<12:07,  8.56s/it]

336_P feature done size (55, 1, 256)


Training Set:  21%|██▏       | 23/107 [03:00<11:04,  7.91s/it]

338_P feature done size (41, 1, 256)


Training Set:  22%|██▏       | 24/107 [03:09<11:20,  8.20s/it]

339_P feature done size (48, 1, 256)


Training Set:  23%|██▎       | 25/107 [03:16<10:50,  7.94s/it]

340_P feature done size (48, 1, 256)


Training Set:  24%|██▍       | 26/107 [03:25<11:07,  8.24s/it]

341_P feature done size (46, 1, 256)


Training Set:  25%|██▌       | 27/107 [03:34<11:22,  8.53s/it]

343_P feature done size (62, 1, 256)


Training Set:  26%|██▌       | 28/107 [03:44<11:52,  9.02s/it]

344_P feature done size (53, 1, 256)


Training Set:  27%|██▋       | 29/107 [03:51<10:43,  8.26s/it]

345_P feature done size (36, 1, 256)


Training Set:  28%|██▊       | 30/107 [04:00<10:47,  8.41s/it]

347_P feature done size (50, 1, 256)


Training Set:  29%|██▉       | 31/107 [04:10<11:18,  8.93s/it]

348_P feature done size (56, 1, 256)


Training Set:  30%|██▉       | 32/107 [04:17<10:29,  8.40s/it]

350_P feature done size (44, 1, 256)


Training Set:  31%|███       | 33/107 [04:27<10:47,  8.75s/it]

351_P feature done size (50, 1, 256)


Training Set:  32%|███▏      | 34/107 [04:32<09:24,  7.73s/it]

352_P feature done size (32, 1, 256)


Training Set:  33%|███▎      | 35/107 [04:40<09:31,  7.94s/it]

353_P feature done size (43, 1, 256)


Training Set:  34%|███▎      | 36/107 [04:46<08:43,  7.38s/it]

355_P feature done size (38, 1, 256)


Training Set:  35%|███▍      | 37/107 [04:55<08:53,  7.63s/it]

356_P feature done size (37, 1, 256)


Training Set:  36%|███▌      | 38/107 [04:59<07:44,  6.74s/it]

357_P feature done size (30, 1, 256)


Training Set:  36%|███▋      | 39/107 [05:06<07:47,  6.87s/it]

358_P feature done size (41, 1, 256)


Training Set:  37%|███▋      | 40/107 [05:13<07:28,  6.69s/it]

360_P feature done size (33, 1, 256)


Training Set:  38%|███▊      | 41/107 [05:20<07:24,  6.73s/it]

362_P feature done size (42, 1, 256)


Training Set:  39%|███▉      | 42/107 [05:28<07:43,  7.13s/it]

363_P feature done size (34, 1, 256)


Training Set:  40%|████      | 43/107 [05:36<08:06,  7.60s/it]

364_P feature done size (43, 1, 256)


Training Set:  41%|████      | 44/107 [05:44<07:56,  7.56s/it]

366_P feature done size (34, 1, 256)


Training Set:  42%|████▏     | 45/107 [05:50<07:15,  7.02s/it]

368_P feature done size (29, 1, 256)


Training Set:  43%|████▎     | 46/107 [05:57<07:13,  7.11s/it]

369_P feature done size (31, 1, 256)


Training Set:  44%|████▍     | 47/107 [06:02<06:31,  6.52s/it]

370_P feature done size (26, 1, 256)


Training Set:  45%|████▍     | 48/107 [06:13<07:35,  7.72s/it]

371_P feature done size (55, 1, 256)


Training Set:  46%|████▌     | 49/107 [06:21<07:34,  7.84s/it]

372_P feature done size (44, 1, 256)


Training Set:  47%|████▋     | 50/107 [06:29<07:39,  8.07s/it]

374_P feature done size (40, 1, 256)


Training Set:  48%|████▊     | 51/107 [06:38<07:38,  8.18s/it]

375_P feature done size (47, 1, 256)


Training Set:  49%|████▊     | 52/107 [06:45<07:20,  8.02s/it]

376_P feature done size (40, 1, 256)


Training Set:  50%|████▉     | 53/107 [06:53<07:03,  7.84s/it]

379_P feature done size (36, 1, 256)


Training Set:  50%|█████     | 54/107 [07:01<07:09,  8.10s/it]

380_P feature done size (38, 1, 256)


Training Set:  51%|█████▏    | 55/107 [07:10<07:04,  8.17s/it]

383_P feature done size (35, 1, 256)


Training Set:  52%|█████▏    | 56/107 [07:18<06:50,  8.05s/it]

385_P feature done size (49, 1, 256)


Training Set:  53%|█████▎    | 57/107 [07:27<07:01,  8.42s/it]

386_P feature done size (42, 1, 256)


Training Set:  54%|█████▍    | 58/107 [07:35<06:46,  8.29s/it]

391_P feature done size (48, 1, 256)


Training Set:  55%|█████▌    | 59/107 [07:44<06:47,  8.49s/it]

392_P feature done size (44, 1, 256)


Training Set:  56%|█████▌    | 60/107 [07:51<06:15,  7.98s/it]

393_P feature done size (40, 1, 256)


Training Set:  57%|█████▋    | 61/107 [08:01<06:47,  8.86s/it]

397_P feature done size (55, 1, 256)


Training Set:  58%|█████▊    | 62/107 [08:12<07:08,  9.51s/it]

400_P feature done size (54, 1, 256)


Training Set:  59%|█████▉    | 63/107 [08:20<06:35,  9.00s/it]

401_P feature done size (44, 1, 256)


Training Set:  60%|█████▉    | 64/107 [08:29<06:27,  9.02s/it]

402_P feature done size (42, 1, 256)


Training Set:  61%|██████    | 65/107 [08:35<05:36,  8.01s/it]

409_P feature done size (29, 1, 256)


Training Set:  62%|██████▏   | 66/107 [08:46<06:04,  8.90s/it]

412_P feature done size (54, 1, 256)


Training Set:  63%|██████▎   | 67/107 [08:54<05:44,  8.62s/it]

414_P feature done size (36, 1, 256)


Training Set:  64%|██████▎   | 68/107 [09:02<05:34,  8.57s/it]

415_P feature done size (45, 1, 256)


Training Set:  64%|██████▍   | 69/107 [09:13<05:51,  9.25s/it]

416_P feature done size (51, 1, 256)


Training Set:  65%|██████▌   | 70/107 [09:20<05:16,  8.56s/it]

419_P feature done size (37, 1, 256)


Training Set:  66%|██████▋   | 71/107 [09:31<05:35,  9.32s/it]

423_P feature done size (52, 1, 256)


Training Set:  67%|██████▋   | 72/107 [09:41<05:25,  9.30s/it]

425_P feature done size (41, 1, 256)


Training Set:  68%|██████▊   | 73/107 [09:49<05:11,  9.17s/it]

426_P feature done size (49, 1, 256)


Training Set:  69%|██████▉   | 74/107 [10:00<05:11,  9.45s/it]

427_P feature done size (47, 1, 256)


Training Set:  70%|███████   | 75/107 [10:10<05:10,  9.69s/it]

428_P feature done size (49, 1, 256)


Training Set:  71%|███████   | 76/107 [10:19<04:59,  9.67s/it]

429_P feature done size (51, 1, 256)


Training Set:  72%|███████▏  | 77/107 [10:30<04:54,  9.81s/it]

430_P feature done size (46, 1, 256)


Training Set:  73%|███████▎  | 78/107 [10:39<04:45,  9.83s/it]

433_P feature done size (48, 1, 256)


Training Set:  74%|███████▍  | 79/107 [10:47<04:20,  9.30s/it]

434_P feature done size (38, 1, 256)


Training Set:  75%|███████▍  | 80/107 [11:01<04:45, 10.58s/it]

437_P feature done size (45, 1, 256)


Training Set:  76%|███████▌  | 81/107 [11:15<05:04, 11.72s/it]

441_P feature done size (69, 1, 256)


Training Set:  77%|███████▋  | 82/107 [11:26<04:45, 11.42s/it]

443_P feature done size (53, 1, 256)


Training Set:  78%|███████▊  | 83/107 [11:35<04:18, 10.76s/it]

444_P feature done size (38, 1, 256)


Training Set:  79%|███████▊  | 84/107 [11:45<04:01, 10.48s/it]

445_P feature done size (44, 1, 256)


Training Set:  79%|███████▉  | 85/107 [11:52<03:26,  9.40s/it]

446_P feature done size (35, 1, 256)


Training Set:  80%|████████  | 86/107 [12:01<03:15,  9.31s/it]

447_P feature done size (39, 1, 256)


Training Set:  81%|████████▏ | 87/107 [12:08<02:51,  8.56s/it]

448_P feature done size (32, 1, 256)


Training Set:  82%|████████▏ | 88/107 [12:16<02:41,  8.50s/it]

449_P feature done size (33, 1, 256)


Training Set:  83%|████████▎ | 89/107 [12:30<03:00, 10.03s/it]

454_P feature done size (65, 1, 256)


Training Set:  84%|████████▍ | 90/107 [12:37<02:33,  9.05s/it]

455_P feature done size (35, 1, 256)


Training Set:  85%|████████▌ | 91/107 [12:48<02:34,  9.65s/it]

456_P feature done size (49, 1, 256)


Training Set:  86%|████████▌ | 92/107 [12:59<02:30, 10.03s/it]

457_P feature done size (47, 1, 256)


Training Set:  87%|████████▋ | 93/107 [13:11<02:28, 10.58s/it]

459_P feature done size (59, 1, 256)


Training Set:  88%|████████▊ | 94/107 [13:21<02:18, 10.66s/it]

463_P feature done size (49, 1, 256)


Training Set:  89%|████████▉ | 95/107 [13:32<02:06, 10.51s/it]

464_P feature done size (40, 1, 256)


Training Set:  90%|████████▉ | 96/107 [13:39<01:44,  9.54s/it]

468_P feature done size (36, 1, 256)


Training Set:  91%|█████████ | 97/107 [13:49<01:38,  9.85s/it]

471_P feature done size (43, 1, 256)


Training Set:  92%|█████████▏| 98/107 [14:01<01:34, 10.51s/it]

473_P feature done size (55, 1, 256)


Training Set:  93%|█████████▎| 99/107 [14:12<01:23, 10.41s/it]

474_P feature done size (46, 1, 256)


Training Set:  93%|█████████▎| 100/107 [14:20<01:08,  9.75s/it]

475_P feature done size (38, 1, 256)


Training Set:  94%|█████████▍| 101/107 [14:31<01:01, 10.31s/it]

478_P feature done size (49, 1, 256)


Training Set:  95%|█████████▌| 102/107 [14:44<00:54, 10.91s/it]

479_P feature done size (55, 1, 256)


Training Set:  96%|█████████▋| 103/107 [14:54<00:42, 10.58s/it]

485_P feature done size (49, 1, 256)


Training Set:  97%|█████████▋| 104/107 [15:04<00:31, 10.56s/it]

486_P feature done size (44, 1, 256)


Training Set:  98%|█████████▊| 105/107 [15:13<00:20, 10.07s/it]

487_P feature done size (36, 1, 256)


Training Set:  99%|█████████▉| 106/107 [15:21<00:09,  9.41s/it]

488_P feature done size (36, 1, 256)


Training Set: 100%|██████████| 107/107 [15:33<00:00,  8.72s/it]

491_P feature done size (50, 1, 256)
Saving training data to Google Drive...





Processing test data...


Test Set:   3%|▎         | 1/35 [00:10<05:53, 10.40s/it]

302_P feature done size (36, 1, 256)


Test Set:   6%|▌         | 2/35 [00:19<05:24,  9.85s/it]

307_P feature done size (33, 1, 256)


Test Set:   9%|▊         | 3/35 [00:31<05:35, 10.49s/it]

331_P feature done size (42, 1, 256)


Test Set:  11%|█▏        | 4/35 [00:43<05:50, 11.30s/it]

335_P feature done size (46, 1, 256)


Test Set:  14%|█▍        | 5/35 [00:55<05:44, 11.49s/it]

346_P feature done size (42, 1, 256)


Test Set:  17%|█▋        | 6/35 [01:05<05:14, 10.84s/it]

367_P feature done size (32, 1, 256)


Test Set:  20%|██        | 7/35 [01:16<05:11, 11.14s/it]

377_P feature done size (43, 1, 256)


Test Set:  23%|██▎       | 8/35 [01:29<05:14, 11.65s/it]

381_P feature done size (46, 1, 256)


Test Set:  26%|██▌       | 9/35 [01:42<05:14, 12.10s/it]

382_P feature done size (47, 1, 256)


Test Set:  29%|██▊       | 10/35 [01:55<05:07, 12.31s/it]

388_P feature done size (46, 1, 256)


Test Set:  31%|███▏      | 11/35 [02:11<05:25, 13.55s/it]

389_P feature done size (63, 1, 256)


Test Set:  34%|███▍      | 12/35 [02:25<05:14, 13.69s/it]

390_P feature done size (49, 1, 256)


Test Set:  37%|███▋      | 13/35 [02:38<04:54, 13.37s/it]

395_P feature done size (47, 1, 256)


Test Set:  40%|████      | 14/35 [02:47<04:16, 12.20s/it]

403_P feature done size (38, 1, 256)


Test Set:  43%|████▎     | 15/35 [03:00<04:04, 12.22s/it]

404_P feature done size (44, 1, 256)


Test Set:  46%|████▌     | 16/35 [03:11<03:45, 11.87s/it]

406_P feature done size (40, 1, 256)


Test Set:  49%|████▊     | 17/35 [03:23<03:36, 12.02s/it]

413_P feature done size (42, 1, 256)


Test Set:  51%|█████▏    | 18/35 [03:33<03:14, 11.43s/it]

417_P feature done size (39, 1, 256)


Test Set:  54%|█████▍    | 19/35 [03:45<03:04, 11.50s/it]

418_P feature done size (43, 1, 256)


Test Set:  57%|█████▋    | 20/35 [04:00<03:09, 12.62s/it]

420_P feature done size (56, 1, 256)


Test Set:  60%|██████    | 21/35 [04:11<02:50, 12.19s/it]

422_P feature done size (33, 1, 256)


Test Set:  63%|██████▎   | 22/35 [04:23<02:38, 12.20s/it]

436_P feature done size (43, 1, 256)


Test Set:  66%|██████▌   | 23/35 [04:34<02:20, 11.73s/it]

439_P feature done size (33, 1, 256)


Test Set:  69%|██████▊   | 24/35 [04:48<02:15, 12.29s/it]

440_P feature done size (52, 1, 256)


Test Set:  71%|███████▏  | 25/35 [04:54<01:44, 10.47s/it]

451_P feature done size (0,)


Test Set:  74%|███████▍  | 26/35 [04:57<01:14,  8.26s/it]

458_P feature done size (0,)


Test Set:  77%|███████▋  | 27/35 [05:09<01:15,  9.48s/it]

472_P feature done size (44, 1, 256)


Test Set:  80%|████████  | 28/35 [05:21<01:10, 10.14s/it]

476_P feature done size (44, 1, 256)


Test Set:  83%|████████▎ | 29/35 [05:36<01:09, 11.64s/it]

477_P feature done size (40, 1, 256)


Test Set:  86%|████████▌ | 30/35 [05:46<00:55, 11.18s/it]

482_P feature done size (39, 1, 256)


Test Set:  89%|████████▊ | 31/35 [06:00<00:47, 11.79s/it]

483_P feature done size (44, 1, 256)


Test Set:  91%|█████████▏| 32/35 [06:12<00:35, 11.91s/it]

484_P feature done size (40, 1, 256)


Test Set:  94%|█████████▍| 33/35 [06:25<00:24, 12.39s/it]

489_P feature done size (50, 1, 256)


Test Set:  97%|█████████▋| 34/35 [06:39<00:12, 12.78s/it]

490_P feature done size (50, 1, 256)


Test Set: 100%|██████████| 35/35 [06:50<00:00, 11.74s/it]

492_P feature done size (41, 1, 256)
Saving test data to Google Drive...





In [None]:
# Sample Usage
save_dir = os.path.join('/content/drive/MyDrive', 'DAIC-Features')

loaded_audio_data = np.load(os.path.join(save_dir, 'train_audio_clf.npz'))
audio_feature_303 = loaded_audio_data['audio_303']  # Access data by participant ID

loaded_audio_data = np.load(os.path.join(save_dir, 'train_text_clf.npz'))
text_feature_303 = loaded_audio_data['text_303']  # Access data by participant ID

loaded_audio_data = np.load(os.path.join(save_dir, 'train_label_clf.npz'))
label_303 = loaded_audio_data['label_303']  # Access data by participant ID

In [None]:
audio_feature_303.shape

(34, 1, 256)

In [None]:
text_feature_303[:20]

array([" okay how 'bout yourself", ' here in california yeah',
       " oh well that it's big and broad there's a lot to do a lot of um um job opportunities than other states um pretty much that it's big and there's a lot you can do here",
       " traffic um maybe the violence rate bad news even though you know you wanna know what's going on in your environment but you still have to watch it so you can you know look out to see what's going on on a daily basis",
       ' sociology i have a um two year degree in liberal arts but my major was sociology',
       " no i'm actually an m_t_a bus operator",
       " well since metro is steady growing um my dream job is to move up in the company and i'm about to actually start some classes for supervisory next week so um 'cause they look to promote within the company so that's the good thing there's a lot of opportunities at metro where you can take classes and they'll pay for your tuition and things of that nature thanks",
       " well it's 

In [None]:
label_303

array(0)