# Introduction

This notebook is for having doing inference. The best models and weights are saved in the folder. just running all the notebooks will generate the final predictions csvs


### **!!! Disclaimer:**

Here as we are using raw waveform without data generator, loading all 9000 evaluation files in a single dataset will exhaust the available ram, so we are generating 3 subsections for each predictions, please download those csv, and merge them manually, to get the final_predictions scores

In [2]:
!pip install -r "requirement (1).txt"



# 0. Setup

## utility libraries

In [13]:
import os
import pathlib
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import librosa
import librosa.display
import pandas as pd
from tqdm import tqdm


from tensorflow.keras import layers
from tensorflow.keras import models
from IPython import display
from pathlib import Path
from IPython.display import Audio
from sklearn.model_selection import train_test_split

## hyperparameters

In [14]:
# Set the seed value for experiment reproducibility.
seed = 42
SR=16000 # resampling as the ram can't handle this much calculation
BATCH_SIZE = 1
AUD_LENGTH = 10
TRAIN_TEST_SPLIT = 0.2
tf.random.set_seed(seed)
np.random.seed(seed)

## All file paths

In [15]:
# insert needed paths here

# this is the known and unknown dataset path
DATASET_AUDIO_PATH = 'classwise_dataset'

# this is the random extra data addition folders
ASVSPOOF_DATA_PATH  = 'external files/asvspoof'
LIBRISPEECH_DATA_PATH = 'external files/librispeech'

# this is the evaluation folder for phase 1 and 2
EVAL_PATH_1 = 'spcup_2022_eval_part1'
EVAL_PATH_2 = 'spcup_2022_eval_part2'


# loading paths 

LOAD_MODEL = 'model_TSSDnet_final_epochs.h5'
LOAD_WEIGHT = 'weight_TSSDnet_final_epochs.h5'

# ### saving paths
# CSV_DIR = './'
# MODEL_SAVE_DIR = './'
# WEIGHT_SAVE_DIR = './'

# 2. Evaluation

In [16]:
new_model = tf.keras.models.load_model(LOAD_MODEL)
new_model.load_weights(LOAD_WEIGHT)

# 1. Dataset Generation

In [17]:
# getting audio dataset path to divide into 3 datasets and also for making tf datasets later

class_names = os.listdir(DATASET_AUDIO_PATH)
print("Our class names: {}".format(class_names,))

audio_paths = []
labels = []
for label, name in enumerate(class_names):
    label = int(name)
    print("Processing speaker {}".format(name,))
    print("Actual Label ",label)
    dir_path = Path(DATASET_AUDIO_PATH) / name
    speaker_sample_paths = [
        os.path.join(dir_path, filepath)
        for filepath in os.listdir(dir_path)
        if filepath.endswith(".wav")
    ]
    audio_paths += speaker_sample_paths
    labels += [label] * len(speaker_sample_paths)

print(
    "Found {} files belonging to {} classes.".format(len(audio_paths), len(class_names))
)

Our class names: ['0', '1', '2', '3', '4', '5']
Processing speaker 0
Actual Label  0
Processing speaker 1
Actual Label  1
Processing speaker 2
Actual Label  2
Processing speaker 3
Actual Label  3
Processing speaker 4
Actual Label  4
Processing speaker 5
Actual Label  5
Found 6000 files belonging to 6 classes.


In [18]:
# for eval only
def paths_and_labels_to_dataset_1(ds):
    """Constructs a dataset of audios for testing"""
    audio_ds = tf.data.Dataset.from_tensor_slices(ds)
    return tf.data.Dataset.zip(audio_ds)

In [19]:
# utility functions for repeating audio files
def repeated_data(file_path):
    """ This function will take a file path and give out truncated and padded to 10s version waveform"""
    y, sr = librosa.load(file_path,sr=SR)
    aud_length = AUD_LENGTH*sr # making all audio length 10 s and truncating the rest
    duration = librosa.get_duration(y=y, sr=sr)
    if duration < AUD_LENGTH:
        y = np.tile(y, int((aud_length/sr) // duration)+1)
    y = librosa.resample(y[:aud_length], orig_sr=sr, target_sr=SR)
    return y

def repeated_dataset(dataset):
    """ This function generated waveshape dataset"""
    for f in dataset:
        new_ds.append(repeated_data(f))
    return new_ds

In [None]:
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, batch_size= BATCH_SIZE, 
                 n_classes=6, shuffle=True):
        'Initialization'
        self.dim = AUD_LENGTH * SR
        self.batch_size = batch_size
        self.labels = labels
        self.shuffle = shuffle
        self.list_IDs = list_IDs
        self.on_epoch_end()

    def path_to_audio(self,path):
        """Reads and decodes an audio file."""
        return repeated_data(path)

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Generate data
        X, y = self.__data_generation(indexes)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples'
        
        X = []
        y = []
        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            _tempx = self.path_to_audio(self.list_IDs[ID])
            #_tempx = self.spect_audio(_tempx)
            X.append(_tempx)

            # Store class
            y.append(self.labels[ID])

        return np.reshape(np.array(X), (self.batch_size,SR*AUD_LENGTH,1)).astype(np.float32),np.array(y).astype(np.float32)

# 2. Evaluation

In [20]:
new_model = tf.keras.models.load_model(LOAD_MODEL)
new_model.load_weights(LOAD_WEIGHT)

## A. Generating Predictions for evluation 1 dataset

In [21]:
eval_path = EVAL_PATH_1
speaker_sample_paths = [
        os.path.join(eval_path, filepath)
        for filepath in os.listdir(eval_path)
        if filepath.endswith(".wav")
    ]
X_eval = []
X_eval += speaker_sample_paths

print(
    "Found {} files".format(len(X_eval))
)

Found 9000 files


In [27]:
# making a column of filenames
n = len(X_eval)
filenames = []
predictions = []
for i in tqdm(range(n)):
    f_name, f_ext = os.path.splitext(X_eval[i])
    filenames.append(os.path.basename(f_name)+f_ext)
    aud = repeated_data(X_eval[i])
    aud = paths_and_labels_to_dataset_1(np.expand_dims(np.expand_dims(aud, axis = 0),axis = 0))
    score_1 = new_model.predict(aud)
    pred_class_1 = score_1.argmax(axis=-1)
    predictions.append(pred_class_1[0])

100%|██████████| 9000/9000 [03:08<00:00, 47.64it/s]


In [28]:
score_df = pd.DataFrame({'file':filenames,'pred_class':predictions})
score_df.head()

Unnamed: 0,file,pred_class
0,000564048b88c05396a9e68b3a89840e.wav,5
1,0007e065b51cb7e792b0ce301600c449.wav,1
2,001d3ce2cce8f3bb45e6023b9771d19d.wav,4
3,00260c4cfe0002f5620825ac0c03e02d.wav,1
4,002f3d510760bc5117acddbba95cf1e0.wav,5


In [29]:
#converting dataframe to csv
score_df.to_csv('incept_part_1.csv', header=False, index=False)

## B. Generating Predictions for evluation 2 dataset

In [32]:
eval_path = EVAL_PATH_2
speaker_sample_paths = [
        os.path.join(eval_path, filepath)
        for filepath in os.listdir(eval_path)
        if filepath.endswith(".wav")
    ]
X_eval = []
X_eval += speaker_sample_paths

print(
    "Found {} files".format(len(X_eval))
)

Found 9000 files


In [33]:
# making a column of filenames
n = len(X_eval)
filenames = []
predictions = []
for i in tqdm(range(n)):
    f_name, f_ext = os.path.splitext(X_eval[i])
    filenames.append(os.path.basename(f_name)+f_ext)
    aud = repeated_data(X_eval[i])
    aud = paths_and_labels_to_dataset_1(np.expand_dims(np.expand_dims(aud, axis = 0),axis = 0))
    score_1 = new_model.predict(aud)
    pred_class_1 = score_1.argmax(axis=-1)
    predictions.append(pred_class_1[0])

100%|██████████| 9000/9000 [04:29<00:00, 33.44it/s]


In [34]:
score_df = pd.DataFrame({'file':filenames,'pred_class':predictions})
score_df.head()

Unnamed: 0,file,pred_class
0,000c8deb702043e5c1689f1e7a71950f.wav,0
1,001945bc6c04dd435ccd6780f64fac3b.wav,3
2,0020a5b1d8f42faa2fd7ff8abc942c4d.wav,4
3,00303d4b2e4bd17bd262580227ff3336.wav,1
4,00500d926ba4154bdb30b85f99dd5e6a.wav,4


In [None]:
#converting dataframe to csv
score_df.to_csv('incept_part_2.csv', header=False, index=False)