###### *Execute in sequence*

# Data and Processing

In [0]:
from fastai.vision import *
from tqdm import tqdm_notebook
import IPython
import IPython.display
import PIL
import textwrap

In [0]:
ROOT = Path('/content')
FOLDER = 'aud' #for audio files
SOURCE = ROOT/FOLDER
LIST = ROOT/'head.csv'
CATMODEL = 'model.pkl' #model for categorization

In [116]:
!wget -q -O model.pkl 'https://drive.google.com/uc?id=1GiUQOVgGckM95dT30MvxxPPXMVkwlFwk&export=download' > /dev/null
!rm sample_data -r

rm: cannot remove 'sample_data': No such file or directory


In [0]:
#Sample audio. Can specify any url here
!apt-get -qq install sox libsox-fmt-mp3 > /dev/null
!wget -q -O aud.mp3 https://sampleswap.org/samples-ghost/VOCALS%20and%20SPOKEN%20WORD/Commercials%20and%20Radio/3207[kb]1941-newsreel-co_ed_warrior_women.aif.mp3
!sox 'aud.mp3' aud.wav

In [0]:
#splitting audio file to 3s
def trim(fn, dest='/content/temp'):
  os.system(f'mkdir -p {dest} && cd {dest} && sox {ROOT}/{fn} {fn} trim 0 3 : newfile : restart')

In [0]:
trim('aud.wav', FOLDER)

In [0]:
def createhead():
  os.system(f"echo 'fname,Accelerating_and_revving_and_vroom,Accordion,Acoustic_guitar,Applause,Bark,Bass_drum,Bass_guitar,Bathtub_(filling_or_washing),Bicycle_bell,Burping_and_eructation,Bus,Buzz,Car_passing_by,Cheering,Chewing_and_mastication,Child_speech_and_kid_speaking,Chink_and_clink,Chirp_and_tweet,Church_bell,Clapping,Computer_keyboard,Crackle,Cricket,Crowd,Cupboard_open_or_close,Cutlery_and_silverware,Dishes_and_pots_and_pans,Drawer_open_or_close,Drip,Electric_guitar,Fart,Female_singing,Female_speech_and_woman_speaking,Fill_(with_liquid),Finger_snapping,Frying_(food),Gasp,Glockenspiel,Gong,Gurgling,Harmonica,Hi-hat,Hiss,Keys_jangling,Knock,Male_singing,Male_speech_and_man_speaking,Marimba_and_xylophone,Mechanical_fan,Meow,Microwave_oven,Motorcycle,Printer,Purr,Race_car_and_auto_racing,Raindrop,Run,Scissors,Screaming,Shatter,Sigh,Sink_(filling_or_washing),Skateboard,Slam,Sneeze,Squeak,Stream,Strum,Tap,Tick-tock,Toilet_flush,Traffic_noise_and_roadway_noise,Trickle_and_dribble,Walk_and_footsteps,Water_tap_and_faucet,Waves_and_surf,Whispering,Writing,Yell,Zipper_(clothing)' > {LIST}")

In [0]:
def fntocsv(fn):
  val = ',0'
  os.system(f"echo '{fn}{val*80}' >> {LIST}")

In [0]:
#create csv for all files in FOLDER
createhead()
for fn in os.listdir(FOLDER):
  fntocsv(str(fn))

In [0]:
df = pd.read_csv(LIST)

# Classification Model

#### Audio to spectrogram

In [0]:
import librosa
import librosa.display

def read_audio(conf, pathname, trim_long_data):
    y, sr = librosa.load(pathname, sr=conf.sampling_rate)
    # trim silence
    if 0 < len(y): # workaround: 0 length causes error
        y, _ = librosa.effects.trim(y) # trim, top_db=default(60)
    # make it unified length to conf.samples
    if len(y) > conf.samples: # long enough
        if trim_long_data:
            y = y[0:0+conf.samples]
    else: # pad blank
        padding = conf.samples - len(y)    # add padding at both ends
        offset = padding // 2
        y = np.pad(y, (offset, conf.samples - len(y) - offset), 'constant')
    return y

def audio_to_melspectrogram(conf, audio):
    spectrogram = librosa.feature.melspectrogram(audio, 
                                                 sr=conf.sampling_rate,
                                                 n_mels=conf.n_mels,
                                                 hop_length=conf.hop_length,
                                                 n_fft=conf.n_fft,
                                                 fmin=conf.fmin,
                                                 fmax=conf.fmax)
    spectrogram = librosa.power_to_db(spectrogram)
    spectrogram = spectrogram.astype(np.float32)
    return spectrogram

def show_melspectrogram(conf, mels, title='Log-frequency power spectrogram'):
    librosa.display.specshow(mels, x_axis='time', y_axis='mel', 
                             sr=conf.sampling_rate, hop_length=conf.hop_length,
                            fmin=conf.fmin, fmax=conf.fmax)
    plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.show()

def read_as_melspectrogram(conf, pathname, trim_long_data, debug_display=False):
    x = read_audio(conf, pathname, trim_long_data)
    mels = audio_to_melspectrogram(conf, x)
    if debug_display:
        IPython.display.display(IPython.display.Audio(x, rate=conf.sampling_rate))
        show_melspectrogram(conf, mels)
    return mels


class conf:
    # Preprocessing settings
    sampling_rate = 44100
    duration = 2
    hop_length = 347*duration # to make time steps 128
    fmin = 20
    fmax = sampling_rate // 2
    n_mels = 128
    n_fft = n_mels * 20
    samples = sampling_rate * duration

In [125]:
def mono_to_color(X, mean=None, std=None, norm_max=None, norm_min=None, eps=1e-6):
    # Stack X as [X,X,X]
    X = np.stack([X, X, X], axis=-1)

    # Standardize
    mean = mean or X.mean()
    std = std or X.std()
    Xstd = (X - mean) / (std + eps)
    _min, _max = Xstd.min(), Xstd.max()
    norm_max = norm_max or _max
    norm_min = norm_min or _min
    if (_max - _min) > eps:
        # Scale to [0, 255]
        V = Xstd
        V[V < norm_min] = norm_min
        V[V > norm_max] = norm_max
        V = 255 * (V - norm_min) / (norm_max - norm_min)
        V = V.astype(np.uint8)
    else:
        # Just zero
        V = np.zeros_like(Xstd, dtype=np.uint8)
    return V

def convert_wav_to_image(df, source, img_dest=''):
    X = []
    # for row in df.iterrows():
    for i, row in tqdm_notebook(df.iterrows()):
        x = read_as_melspectrogram(conf, source/str(row.fname), trim_long_data=False)
        x_color = mono_to_color(x)
        X.append(x_color)
    return X

Xval = convert_wav_to_image(df, source=SOURCE)

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [0]:
from fastai import *
from fastai.vision import *
from fastai.vision.data import *
import random

CUR_X_FILES, CUR_X = list(df.fname.values), Xval

def open_fat2019_image(fn, convert_mode, after_open)->Image:
    # open
    idx = CUR_X_FILES.index(fn.split('/')[-1])
    # idx = int(fn.split('/')[-1])
    x = PIL.Image.fromarray(CUR_X[idx])
    # crop
    time_dim, base_dim = x.size
    crop_x = random.randint(0, time_dim - base_dim)
    x = x.crop([crop_x, 0, crop_x+base_dim, base_dim])    
    # standardize
    return Image(pil2tensor(x, np.float32).div_(255))

vision.data.open_image = open_fat2019_image

#### Implementation of LwLARP taken from [Dan Ellis](https://colab.research.google.com/drive/1AgPdhSp7ttY18O3fEoHOQKlt_3HJDLi8).

In [0]:
def _one_sample_positive_class_precisions(scores, truth):
    """Calculate precisions for each true class for a single sample.

    Args:
      scores: np.array of (num_classes,) giving the individual classifier scores.
      truth: np.array of (num_classes,) bools indicating which classes are true.

    Returns:
      pos_class_indices: np.array of indices of the true classes for this sample.
      pos_class_precisions: np.array of precisions corresponding to each of those
        classes.
    """
    num_classes = scores.shape[0]
    pos_class_indices = np.flatnonzero(truth > 0)
    # Only calculate precisions if there are some true classes.
    if not len(pos_class_indices):
        return pos_class_indices, np.zeros(0)
    # Retrieval list of classes for this sample.
    retrieved_classes = np.argsort(scores)[::-1]
    # class_rankings[top_scoring_class_index] == 0 etc.
    class_rankings = np.zeros(num_classes, dtype=np.int)
    class_rankings[retrieved_classes] = range(num_classes)
    # Which of these is a true label?
    retrieved_class_true = np.zeros(num_classes, dtype=np.bool)
    retrieved_class_true[class_rankings[pos_class_indices]] = True
    # Num hits for every truncated retrieval list.
    retrieved_cumulative_hits = np.cumsum(retrieved_class_true)
    # Precision of retrieval list truncated at each hit, in order of pos_labels.
    precision_at_hits = (
            retrieved_cumulative_hits[class_rankings[pos_class_indices]] /
            (1 + class_rankings[pos_class_indices].astype(np.float)))
    return pos_class_indices, precision_at_hits


def calculate_per_class_lwlrap(truth, scores):
    """Calculate label-weighted label-ranking average precision.

    Arguments:
      truth: np.array of (num_samples, num_classes) giving boolean ground-truth
        of presence of that class in that sample.
      scores: np.array of (num_samples, num_classes) giving the classifier-under-
        test's real-valued score for each class for each sample.

    Returns:
      per_class_lwlrap: np.array of (num_classes,) giving the lwlrap for each
        class.
      weight_per_class: np.array of (num_classes,) giving the prior of each
        class within the truth labels.  Then the overall unbalanced lwlrap is
        simply np.sum(per_class_lwlrap * weight_per_class)
    """
    assert truth.shape == scores.shape
    num_samples, num_classes = scores.shape
    # Space to store a distinct precision value for each class on each sample.
    # Only the classes that are true for each sample will be filled in.
    precisions_for_samples_by_classes = np.zeros((num_samples, num_classes))
    for sample_num in range(num_samples):
        pos_class_indices, precision_at_hits = (
            _one_sample_positive_class_precisions(scores[sample_num, :],
                                                  truth[sample_num, :]))
        precisions_for_samples_by_classes[sample_num, pos_class_indices] = (
            precision_at_hits)
    labels_per_class = np.sum(truth > 0, axis=0)
    weight_per_class = labels_per_class / float(np.sum(labels_per_class))
    # Form average of each column, i.e. all the precisions assigned to labels in
    # a particular class.
    per_class_lwlrap = (np.sum(precisions_for_samples_by_classes, axis=0) /
                        np.maximum(1, labels_per_class))
    # overall_lwlrap = simple average of all the actual per-class, per-sample precisions
    #                = np.sum(precisions_for_samples_by_classes) / np.sum(precisions_for_samples_by_classes > 0)
    #           also = weighted mean of per-class lwlraps, weighted by class label prior across samples
    #                = np.sum(per_class_lwlrap * weight_per_class)
    return per_class_lwlrap, weight_per_class


# Wrapper for fast.ai library
def lwlrap(scores, truth, **kwargs):
    score, weight = calculate_per_class_lwlrap(to_np(truth), to_np(scores))
    return torch.Tensor([(score * weight).sum()])

#### Inference

In [128]:
CUR_X_FILES, CUR_X = list(df.fname.values), Xval

test = ImageList.from_csv(ROOT, LIST, folder=FOLDER)
learn = load_learner(ROOT, CATMODEL, test=test)
preds, _ = learn.TTA(ds_type=DatasetType.Test)



In [0]:
df[learn.data.classes] = preds

In [130]:
df.describe()

Unnamed: 0,Accelerating_and_revving_and_vroom,Accordion,Acoustic_guitar,Applause,Bark,Bass_drum,Bass_guitar,Bathtub_(filling_or_washing),Bicycle_bell,Burping_and_eructation,Bus,Buzz,Car_passing_by,Cheering,Chewing_and_mastication,Child_speech_and_kid_speaking,Chink_and_clink,Chirp_and_tweet,Church_bell,Clapping,Computer_keyboard,Crackle,Cricket,Crowd,Cupboard_open_or_close,Cutlery_and_silverware,Dishes_and_pots_and_pans,Drawer_open_or_close,Drip,Electric_guitar,Fart,Female_singing,Female_speech_and_woman_speaking,Fill_(with_liquid),Finger_snapping,Frying_(food),Gasp,Glockenspiel,Gong,Gurgling,Harmonica,Hi-hat,Hiss,Keys_jangling,Knock,Male_singing,Male_speech_and_man_speaking,Marimba_and_xylophone,Mechanical_fan,Meow,Microwave_oven,Motorcycle,Printer,Purr,Race_car_and_auto_racing,Raindrop,Run,Scissors,Screaming,Shatter,Sigh,Sink_(filling_or_washing),Skateboard,Slam,Sneeze,Squeak,Stream,Strum,Tap,Tick-tock,Toilet_flush,Traffic_noise_and_roadway_noise,Trickle_and_dribble,Walk_and_footsteps,Water_tap_and_faucet,Waves_and_surf,Whispering,Writing,Yell,Zipper_(clothing)
count,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0
mean,0.00846,0.001116,0.00087,0.005763,0.009229,0.002956,0.000608,0.004626,0.000286,0.103948,0.068082,0.007912,0.001374,0.004459,0.007095,0.002493,0.000278,0.003735,0.005076,0.00588,0.001887,7e-05,0.001397,0.010763,0.008479,0.004366,0.003288,0.013515,0.000129,0.015425,0.011111,0.003394,0.043268,0.004148,7.8e-05,0.001683,0.003735,5.341145e-05,0.000603,0.019112,0.000641,0.001126,0.00681,0.000864,0.00129,0.078693,0.199777,0.003631,0.000171,0.022597,0.007055,0.006758,0.048674,0.010476,0.005571,0.000228,0.004714,0.002133,0.001342,0.000392,0.004902,0.003876,0.00157,0.008112,0.001821,0.039362,0.000492,0.000542,0.001415,0.005124,0.015115,0.005514,0.001125,0.024226,0.001804,0.001323,0.031449,0.009548,0.027115,0.009441
std,0.00889,0.001304,0.00101,0.004243,0.014088,0.001845,0.000381,0.00296,0.000211,0.045252,0.03882,0.009071,0.001023,0.003939,0.003536,0.003077,0.0003,0.003735,0.01356,0.004455,0.00135,3.6e-05,0.001228,0.010195,0.009416,0.003494,0.003732,0.008652,0.000153,0.01762,0.010909,0.003108,0.042143,0.002292,4.2e-05,0.001065,0.007608,0.0001696907,0.000728,0.025207,0.000969,0.001214,0.004376,0.000681,0.000505,0.072674,0.079301,0.003389,8.6e-05,0.028202,0.006593,0.005724,0.016818,0.005434,0.005006,0.000289,0.003292,0.001392,0.001582,0.000281,0.005104,0.002174,0.002485,0.002605,0.001118,0.022601,0.000416,0.000741,0.00113,0.01266,0.019347,0.009325,0.000916,0.011166,0.001317,0.00079,0.028122,0.004626,0.035905,0.004398
min,0.00019,0.000186,6e-06,0.000485,0.001417,0.000159,6e-06,0.00063,1e-06,0.014825,0.010696,0.000207,0.000248,0.0004,0.002892,0.000205,6.9e-05,0.000276,0.000503,0.000818,0.000142,9e-06,2.3e-05,0.000709,0.002026,0.000895,0.00067,0.004574,7e-06,0.001132,0.000805,8.1e-05,0.00239,0.000306,2.9e-05,7.6e-05,0.000189,6.121011e-07,4.7e-05,0.000161,1.4e-05,6.7e-05,0.001988,9.2e-05,0.000302,0.014059,0.044692,0.000153,1.4e-05,0.00259,0.00157,0.000127,0.015164,0.001342,0.000164,8e-06,0.00096,0.000445,6e-05,3.9e-05,0.000671,0.000237,5.9e-05,0.002606,0.000502,0.012447,4.9e-05,9e-06,0.000514,0.000366,0.000287,0.001362,9.9e-05,0.007814,0.000114,0.00022,0.002465,0.000813,0.003072,0.003634
25%,0.003907,0.000318,0.000259,0.002633,0.004002,0.001753,0.000409,0.00243,0.000156,0.080647,0.041705,0.001307,0.000804,0.002101,0.004689,0.000488,0.0001,0.001483,0.000593,0.002592,0.000585,4.3e-05,0.000649,0.003811,0.004179,0.001894,0.000773,0.008013,2.5e-05,0.002493,0.003976,0.001529,0.018366,0.002586,4.2e-05,0.001006,0.000436,2.344073e-06,8.5e-05,0.008011,0.000162,0.000216,0.003103,0.000441,0.001056,0.036885,0.169979,0.000873,0.000143,0.005292,0.002548,0.003507,0.039761,0.00648,0.001886,5.3e-05,0.001811,0.001015,0.000649,0.000176,0.001136,0.002451,0.000353,0.006774,0.000935,0.02117,0.000236,0.000138,0.000895,0.000582,0.004896,0.001674,0.000246,0.013408,0.001225,0.000684,0.013242,0.006429,0.010404,0.005386
50%,0.006099,0.00068,0.000328,0.004581,0.00475,0.002519,0.000549,0.003494,0.000234,0.117525,0.068156,0.004087,0.001111,0.003865,0.005639,0.001764,0.000181,0.001899,0.001003,0.004252,0.00221,7e-05,0.001015,0.008187,0.005339,0.002371,0.001184,0.011052,7.4e-05,0.008425,0.009656,0.002277,0.033914,0.003829,7.7e-05,0.001381,0.000781,4.30352e-06,0.000218,0.009407,0.000331,0.00065,0.004962,0.0007,0.001362,0.044759,0.223057,0.001796,0.000167,0.008514,0.004669,0.006503,0.046449,0.010652,0.004401,8.4e-05,0.003929,0.001565,0.000963,0.000339,0.002053,0.004035,0.000642,0.008273,0.00131,0.036599,0.00045,0.00019,0.001141,0.001257,0.01051,0.003042,0.00112,0.024055,0.001593,0.001306,0.017461,0.0095,0.01723,0.009857
75%,0.011767,0.001198,0.001415,0.008771,0.008586,0.005068,0.000772,0.006956,0.000388,0.12182,0.088688,0.009643,0.001442,0.005273,0.009769,0.00327,0.000209,0.005131,0.00157,0.009052,0.002901,8.2e-05,0.001922,0.013071,0.006968,0.006851,0.004639,0.013467,0.000163,0.014759,0.011255,0.003808,0.039743,0.006251,0.000118,0.002372,0.002239,1.16946e-05,0.000882,0.014809,0.000557,0.001817,0.009807,0.00091,0.001584,0.085456,0.241661,0.006814,0.000204,0.030276,0.008092,0.007017,0.058539,0.014009,0.006965,0.000288,0.008067,0.00295,0.00125,0.000508,0.007237,0.00452,0.000997,0.008607,0.002693,0.061262,0.000593,0.000562,0.001473,0.003207,0.016494,0.00406,0.001427,0.03252,0.001978,0.001757,0.05603,0.01164,0.028224,0.012368
max,0.034025,0.004757,0.003494,0.015445,0.055013,0.005941,0.001328,0.010742,0.00075,0.174433,0.138424,0.030829,0.003488,0.012583,0.013763,0.011651,0.001056,0.013376,0.050039,0.013579,0.004542,0.000125,0.004506,0.033953,0.036909,0.011197,0.011547,0.034501,0.000571,0.04979,0.04047,0.01015,0.149599,0.008109,0.000144,0.003511,0.027882,0.0006178694,0.00203,0.07537,0.003646,0.003996,0.014941,0.002701,0.001986,0.264973,0.328037,0.009585,0.000315,0.084301,0.024966,0.022674,0.075928,0.0205,0.017745,0.001055,0.009677,0.004643,0.006389,0.000897,0.016846,0.008314,0.008128,0.013193,0.003895,0.076196,0.001657,0.002669,0.004951,0.047033,0.073789,0.036286,0.003485,0.040599,0.004601,0.002788,0.093949,0.017792,0.141792,0.016504


In [0]:
df_res=df.drop(columns=['fname'])
for c in df_res.columns:
  df_res[c] = pd.to_numeric(df_res[c])
df_res['Sound'] = df_res.idxmax(axis=1)
df_res=pd.concat([df['fname'],df_res['Sound']], axis=1)

In [0]:

df_res.to_csv('category.csv', index=False)

In [133]:
df_res

Unnamed: 0,fname,Sound
0,aud006.wav,Male_speech_and_man_speaking
1,aud007.wav,Male_speech_and_man_speaking
2,aud008.wav,Burping_and_eructation
3,aud011.wav,Male_singing
4,aud005.wav,Burping_and_eructation
5,aud004.wav,Male_speech_and_man_speaking
6,aud010.wav,Male_speech_and_man_speaking
7,aud001.wav,Male_speech_and_man_speaking
8,aud002.wav,Male_speech_and_man_speaking
9,aud003.wav,Male_speech_and_man_speaking


# DeepSpeech

In [134]:
!pip3 install deepspeech > /dev/null
# Download pre-trained English model and extract
!curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.6.1/deepspeech-0.6.1-models.tar.gz
!tar xf deepspeech-0.6.1-models.tar.gz
# Download example audio files
!curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.6.1/audio-0.6.1.tar.gz > /dev/null
!tar xf audio-0.6.1.tar.gz

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   620    0   620    0     0   2214      0 --:--:-- --:--:-- --:--:--  2214
100 1172M  100 1172M    0     0  33.5M      0  0:00:34  0:00:34 --:--:-- 34.8M
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   608    0   608    0     0   2243      0 --:--:-- --:--:-- --:--:--  2243
100  192k  100  192k    0     0   204k      0 --:--:-- --:--:-- --:--:--  411k


#### Inference

In [0]:
def transcribe(fn):
  # Transcribe an audio file in audio folder
  os.system(f'sox {FOLDER}/{fn} --bits 16 --channels 1 --rate 16000 --encoding signed-integer --endian little --compression 0.0 --no-dither .temp.wav')
  os.system(f"deepspeech --model deepspeech-0.6.1-models/output_graph.pbmm --lm deepspeech-0.6.1-models/lm.binary --trie deepspeech-0.6.1-models/trie --audio .temp.wav > .transcript")
  with open('.transcript', 'r') as file:
    data = file.read().rstrip('\n')
  if data:
    os.remove('.temp.wav')
    os.remove('.transcript')
  return data

In [0]:
speech = []
for fn in sorted(os.listdir(FOLDER), key=lambda s: s.lower()):
  words = transcribe(fn)
  speech.append(words)

# Results

In [0]:
human_speech = {'Child_speech_and_kid_speaking':'Child', 'Female_singing':'Female singing', 'Female_speech_and_woman_speaking':'Woman', 'Male_singing':'Male singing', 'Male_speech_and_man_speaking':'Man', 'Whispering':'Whispering', 'Yell':'Yelling'}

In [0]:
description = []
for i, label in enumerate(df_res['Sound']):
  if label in human_speech:
    val = f'{human_speech[label]}: "{speech[i]}"'
  else:
    val = str(label).upper()
  description.append(val)

In [139]:
text = '  '.join(description)
print(textwrap.fill(text, 70))

Man: "the cowed and oregon state have gone military"  Man: "to strike
away the lad is a"  BURPING_AND_ERUCTATION  Male singing: "they
believe in using their heads when it comes"  BURPING_AND_ERUCTATION
Man: "almost she gun this well"  Man: "margaret your lard believe in
air power"  Man: "oh here's a go"  Man: "true to a navy amari"  Man:
"but jane start says that uncle sam"  Man: "he was fighting for the
parish service is "  Man: "her but one blow from her and"  Man: "i
take the air"


In [140]:
IPython.display.Audio("aud.wav")