## Saving a Cats Meow v Dogs Bork Model

This is a minimal example showing how to train a fastai model on Kaggle, and save it so you can use it in your app.

In [None]:
# Make sure we've got the latest version of fastai:
!pip install -Uqq fastai nbdev

In [None]:
from fastai.vision.all import *
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import os
import pandas as pd
import shutil

Download ESC-50 dataset and set up the directories:

In [None]:
# Remove old zips
!rm -rf /kaggle/working/*.zip*

# Download ESC-50 dataset
!wget -q https://github.com/karoldvl/ESC-50/archive/master.zip
!unzip -q -o master.zip

# Load metadata
meta_df = pd.read_csv('ESC-50-master/meta/esc50.csv')

# Create directories
cat_folder = Path('animal_sounds/cat')
dog_folder = Path('animal_sounds/dog')
cat_folder.mkdir(parents=True, exist_ok=True)
dog_folder.mkdir(parents=True, exist_ok=True)

# Extract cat and dog files
cat_files = meta_df[meta_df['category'] == 'cat']
dog_files = meta_df[meta_df['category'] == 'dog']

# Copy files to respective folders
for _, row in cat_files.iterrows():
    src = f"ESC-50-master/audio/{row['filename']}"
    dst = f"animal_sounds/cat/{row['filename']}"
    shutil.copy(src, dst)
    
for _, row in dog_files.iterrows():
    src = f"ESC-50-master/audio/{row['filename']}"
    dst = f"animal_sounds/dog/{row['filename']}"
    shutil.copy(src, dst)

print(f"Copied {len(cat_files)} cat sounds and {len(dog_files)} dog sounds")

# Now use 'animal_sounds' as your audio_folder
audio_folder = 'animal_sounds'

Download a separate cat and dog dataset

In [None]:
# Install Kaggle API package
!pip install -q kaggle

# Path to the dataset in the input section
dataset_path = Path("/kaggle/input/audio-cats-and-dogs")

# Get all cat files from both test and train directories
cat_files = list(dataset_path.glob("cats_dogs/test/cats/*")) + list(dataset_path.glob("cats_dogs/train/cat/*"))

# Get all dog files from both test and train directories  
dog_files = list(dataset_path.glob("cats_dogs/test/dogs/*")) + list(dataset_path.glob("cats_dogs/train/dog/*"))

# Copy files to respective folders
for filepath in cat_files:
    src = filepath
    filename = Path(filepath).name
    dst = f"animal_sounds/cat/{filename}"
    shutil.copy(src, dst)
    
for filepath in dog_files:
    src = filepath
    filename = Path(filepath).name
    dst = f"animal_sounds/dog/{filename}"
    shutil.copy(src, dst)

Create a helper function to generate spectrograms:

In [None]:
def create_spectrogram(audio_path, save_path=None):
    y, sr = librosa.load(audio_path)
    # create mel-spectrogram
    mel_spec = librosa.feature.melspectrogram(
        y=y, sr=sr, n_fft=2048, hop_length=512, n_mels=128
    )
    # convert to log scale (dB)
    log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)

    # plot and save as image
    plt.figure(figsize=(8,6))
    librosa.display.specshow(log_mel_spec, sr=sr, x_axis='time', y_axis='mel')
    plt.tight_layout()
    plt.axis('off')

    if save_path:
        plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
        plt.close()
        return save_path
    else:
        plt.close()
        return log_mel_spec

In [None]:
# Set up folders
spec_folder = './spectrograms'
Path(spec_folder).mkdir(exist_ok=True, parents=True)

# Process audio files to spectrograms
for label in ['cat', 'dog']:
    (Path(spec_folder)/label).mkdir(exist_ok=True)

    audio_path = Path(f"{audio_folder}/{label}")
    if audio_path.exists():
        files = list(audio_path.glob("*.wav"))
        print(f"Processing {len(files)} {label} audio files...")

        for i, file in enumerate(files):
            output_file = Path(spec_folder)/label/f"{file.stem}.png"
            create_spectrogram(str(file), str(output_file))
            if i % 10 == 0:
                print(f"Processed {i}/{len(files)} files")

print(f"Total spectrograms created: {len(list(Path(spec_folder).glob('**/*.png')))}")

In [None]:
sample_cat = list(Path(f"{spec_folder}/cat").glob("*.png"))[0]
sample_dog = list(Path(f"{spec_folder}/dog").glob("*.png"))[0]

In [None]:
Image.open(sample_cat)

In [None]:
Image.open(sample_dog)

**Note:** Unpacking the `aug_transforms()` function was helpful to control specific parameters rather than using the defaults. Playing with the spectrograms rotation, zoom, lighting, and warping helped with lowering error rate and getting a better loss.
Now we can create our `DataLoaders`:

In [None]:
# Create DataBlock and train model
dblock = DataBlock(
    blocks=(ImageBlock, CategoryBlock),
    get_items=get_image_files,
    splitter=RandomSplitter(),
    get_y=parent_label,
    item_tfms=Resize(224),
    batch_tfms = [
        *aug_transforms(max_rotate=10, max_zoom=1.1, max_lighting=0.2, max_warp=0),
        Normalize.from_stats(*imagenet_stats)
    ]
)

dls = dblock.dataloaders(spec_folder, bs=16)
dls.show_batch()

... and train our model, a resnet50:

In [None]:
learner = vision_learner(dls, resnet50, metrics=error_rate)

Plot the loss vs learning rate to reduce amount of guesswork on picking a good starting learning rate:

In [None]:
 learner.lr_find()

Fine tune and show the results 🥳

In [None]:
learner.fine_tune(200, 10e-3)

In [None]:
learner.show_results()

Now we can export our trained `Learner`. This contains all the information needed to run the model:

In [None]:
learner.export('cat-meow-vs-dog-bork.pkl')

Finally, open the Kaggle sidebar on the right if it's not already, and find the section marked "Output". Open the `/kaggle/working` folder, and you'll see `model.pkl`. Click on it, then click on the menu on the right that appears, and choose "Download". After a few seconds, your model will be downloaded to your computer, where you can then create your app that uses the model.