In [1]:
import os
import datetime

import pandas as pd

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_io as tfio

In [2]:
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

In [3]:
label_classes = ['spoof', 'bonafide']
map_class_to_id = {'spoof':0, 'bonafide':1}

In [4]:
@tf.function
def load_wav_16k_mono(filename):
    """ Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
    file_contents = tf.io.read_file(filename)
    wav, sample_rate = tf.audio.decode_wav(
          file_contents,
          desired_channels=1)
    wav = tf.squeeze(wav, axis=-1)
    sample_rate = tf.cast(sample_rate, dtype=tf.int64)
    wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    return wav

In [5]:
def read_csv(path, base_data_path):
    pd_data = pd.read_csv(path)

    filtered_pd = pd_data[pd_data.category.isin(label_classes)]

    class_id = filtered_pd['category'].apply(lambda name: map_class_to_id[name])
    filtered_pd = filtered_pd.assign(target=class_id)

    full_path = filtered_pd['filename'].apply(lambda row: os.path.join(base_data_path, row))
    filtered_pd = filtered_pd.assign(filename=full_path)

    filenames = filtered_pd['filename']
    targets = filtered_pd['target']

    return tf.data.Dataset.from_tensor_slices((filenames, targets)), len(targets)

In [6]:
def load_wav_for_map(filename, label):
    return load_wav_16k_mono(filename), label, 1

In [7]:
# applies the embedding extraction model to a wav data
def extract_embedding(wav_data, label, fold):
    scores, embeddings, spectrogram = yamnet_model(wav_data)
    num_embeddings = tf.shape(embeddings)[0]
    return (embeddings,
            tf.repeat(label, num_embeddings),
            tf.repeat(fold, num_embeddings))

In [8]:
def load_ds(csv, folder):
    ds, ds_len = read_csv(csv, folder)
    ds = ds.map(load_wav_for_map)
    ds = ds.map(extract_embedding).unbatch()
    return ds.map(lambda embedding, label, fold: (embedding, label)).cache(), ds_len

In [9]:
cached_ds, ds_length = load_ds('train.csv', 'train')
train_size = int(ds_length * 0.8)

train_ds = cached_ds.take(train_size)
val_ds = cached_ds.skip(train_size)

train_ds = train_ds.cache().shuffle(1000).batch(32).prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.cache().batch(32).prefetch(tf.data.AUTOTUNE)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unable to open file: libtensorflow_io.so, from paths: ['C:\\Users\\tproh\\Downloads\\fake-audio\\venv\\lib\\site-packages\\tensorflow_io\\python\\ops\\libtensorflow_io.so']
caused by: ['C:\\Users\\tproh\\Downloads\\fake-audio\\venv\\lib\\site-packages\\tensorflow_io\\python\\ops\\libtensorflow_io.so not found']


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unable to open file: libtensorflow_io.so, from paths: ['C:\\Users\\tproh\\Downloads\\fake-audio\\venv\\lib\\site-packages\\tensorflow_io\\python\\ops\\libtensorflow_io.so']
caused by: ['C:\\Users\\tproh\\Downloads\\fake-audio\\venv\\lib\\site-packages\\tensorflow_io\\python\\ops\\libtensorflow_io.so not found']


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: unable to open file: libtensorflow_io.so, from paths: ['C:\\Users\\tproh\\Downloads\\fake-audio\\venv\\lib\\site-packages\\tensorflow_io\\python\\ops\\libtensorflow_io.so']
caused by: ['C:\\Users\\tproh\\Downloads\\fake-audio\\venv\\lib\\site-packages\\tensorflow_io\\python\\ops\\libtensorflow_io.so not found']


NotImplementedError: in user code:

    File "C:\Users\tproh\AppData\Local\Temp\ipykernel_18368\141038387.py", line 2, in load_wav_for_map  *
        return load_wav_16k_mono(filename), label, 1
    File "C:\Users\tproh\AppData\Local\Temp\ipykernel_18368\590973627.py", line 10, in load_wav_16k_mono  *
        wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
    File "C:\Users\tproh\Downloads\fake-audio\venv\lib\site-packages\tensorflow_io\python\ops\audio_ops.py", line 462, in resample  **
        value = tf.vectorized_map(f, input)
    File "C:\Users\tproh\Downloads\fake-audio\venv\lib\site-packages\tensorflow_io\python\ops\audio_ops.py", line 458, in f
        return core_ops.io_audio_resample(
    File "C:\Users\tproh\Downloads\fake-audio\venv\lib\site-packages\tensorflow_io\python\ops\__init__.py", line 88, in __getattr__
        return getattr(self._load(), attrb)
    File "C:\Users\tproh\Downloads\fake-audio\venv\lib\site-packages\tensorflow_io\python\ops\__init__.py", line 84, in _load
        self._mod = _load_library(self._library)
    File "C:\Users\tproh\Downloads\fake-audio\venv\lib\site-packages\tensorflow_io\python\ops\__init__.py", line 69, in _load_library
        raise NotImplementedError(

    NotImplementedError: unable to open file: libtensorflow_io.so, from paths: ['C:\\Users\\tproh\\Downloads\\fake-audio\\venv\\lib\\site-packages\\tensorflow_io\\python\\ops\\libtensorflow_io.so']
    caused by: ['C:\\Users\\tproh\\Downloads\\fake-audio\\venv\\lib\\site-packages\\tensorflow_io\\python\\ops\\libtensorflow_io.so not found']


In [None]:
test_ds, _ = load_ds('test.csv', 'test')
test_ds = test_ds.batch(32).prefetch(tf.data.AUTOTUNE)

In [None]:
feature_classifier = tf.keras.Sequential([
    tf.keras.layers.Input(shape=1024, dtype=tf.float32,
                          name='input_embedding'),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(len(label_classes))
], name='feature_classifier')

feature_classifier.summary()

In [None]:
feature_classifier.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                 optimizer="adam",
                 metrics=['accuracy'])

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y %m %d-%H %M %S")

early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='loss',
                                            patience=3,
                                            restore_best_weights=True)

tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

callbacks = [early_stopping_callback, tensorboard_callback]

In [None]:
history = feature_classifier.fit(train_ds,
                       epochs=50,
                       validation_data=val_ds,
                       callbacks=callbacks)

In [None]:
loss, accuracy = feature_classifier.evaluate(test_ds)

print("Loss: ", loss)
print("Accuracy: ", accuracy)

In [None]:
class ReduceMeanLayer(tf.keras.layers.Layer):
  def __init__(self, axis=0, **kwargs):
    super(ReduceMeanLayer, self).__init__(**kwargs)
    self.axis = axis

  def call(self, inp):
    return tf.math.reduce_mean(inp, axis=self.axis)

In [None]:
saved_model_path = 'model'

input_segment = tf.keras.layers.Input(shape=(), dtype=tf.float32, name='audio')
embedding_extraction_layer = hub.KerasLayer(yamnet_model_handle,
                                            trainable=False, name='yamnet')
_, embeddings_output, _ = embedding_extraction_layer(input_segment)
serving_outputs = feature_classifier(embeddings_output)
serving_outputs = ReduceMeanLayer(axis=0, name='classifier')(serving_outputs)
serving_model = tf.keras.Model(input_segment, serving_outputs)
serving_model.save(saved_model_path, include_optimizer=False)

In [None]:
tf.keras.utils.plot_model(serving_model)



---

# Ethinte thaze run cheytha model use cheyya

In [None]:
reloaded_model = tf.saved_model.load(saved_model_path)

In [None]:
testing_wav_data = # Load some data to test the model

In [None]:
serving_results = reloaded_model.signatures['serving_default'](testing_wav_data)
real_or_fake = label_classes[tf.math.argmax(serving_results['classifier'])]
print(f'The input sound is: {real_or_fake}')
