In [11]:
import base64
from functools import partial
from pathlib import Path
from multiprocessing import Pool
from matplotlib import pyplot as plt
import os
import shutil
import numpy as np
import pandas as pd
import librosa
!pip install soundfile
import soundfile as sf
from scipy.io import wavfile
import torch.nn.functional as F
# from fastai.basic_data import DatasetType
import fastai
from fastai.vision import *



In [12]:
def read_file(filename, path='', sample_rate=None, trim=False):
    ''' Reads in a wav file and returns it as an np.float32 array in the range [-1,1] '''
    filename = Path(path) / filename
    file_sr, data = wavfile.read(filename)
    if data.dtype == np.int16:
        data = np.float32(data) / np.iinfo(np.int16).max
    elif data.dtype != np.float32:
        raise OSError('Encounted unexpected dtype: {}'.format(data.dtype))
    if sample_rate is not None and sample_rate != file_sr:
        if len(data) > 0:
            data = librosa.core.resample(
                data, file_sr, sample_rate, res_type='kaiser_fast')
        file_sr = sample_rate
    if trim and len(data) > 1:
        data = librosa.effects.trim(data, top_db=40)[0]
    return data, file_sr


def log_mel_spec_tfm(fname, src_path, dst_path):
    x, sample_rate = read_file(fname, src_path)

    n_fft = 1024
    hop_length = 256
    n_mels = 40
    fmin = 20
    fmax = sample_rate / 2

    mel_spec_power = librosa.feature.melspectrogram(x, sr=sample_rate, n_fft=n_fft,
                                                    hop_length=hop_length,
                                                    n_mels=n_mels, power=2.0,
                                                    fmin=fmin, fmax=fmax)
    mel_spec_db = librosa.power_to_db(mel_spec_power, ref=np.max)
    dst_fname = dst_path + '/' + fname[:-4] + '.png'
    plt.imsave(dst_fname, mel_spec_db)


def get_res():
    '''file: audio file
        returns percentage of COVID certainty [0,1]
    '''

    x, _ = librosa.load('audio.wav', sr=16000)
    sf.write('tmp.wav', x, 16000)

    fn = 'tmp.wav'
    x, sr = read_file(fn, '.')
    log_mel_spec_tfm(fn, '.', '.')

    img = plt.imread(fn[:-4] + '.png')

    learn = load_learner('')
    learn_predict = learn.predict(open_image(fn[:-4] + '.png'))
    print(learn_predict)
    return str(tuple(learn_predict[2].detach().cpu().numpy())[1])

In [13]:
res = get_res()
print(res)

(Category tensor(0), tensor(0), tensor([0.8179, 0.1821]))
0.1820751


In [9]:
print(sf.__version__)

0.10.3


In [10]:
print(librosa.__version__)

0.6.3


In [14]:
import numpy
print(numpy.__version__)

1.18.5
