🐛 replace features_list by components_list

novoic · Jun 5, 2020 · 5881933 · 5881933
1 parent c30f8ce
commit 5881933
Show file tree

Hide file tree

Showing 2 changed files with 281 additions and 2 deletions.
diff --git a/notebook_tutorials/.ipynb_checkpoints/Tutorial1-AudioClassification-checkpoint.ipynb b/notebook_tutorials/.ipynb_checkpoints/Tutorial1-AudioClassification-checkpoint.ipynb
@@ -0,0 +1,279 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Audio classification with surfboard and sklearn\n",
+    "\n",
+    "In this notebook, we will use the ESC-50 dataset, sklearn and surfboard together to obtain good accuracy on audio classification."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Make sure surfboard is installed\n",
+    "!pip install .."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "from tqdm import tqdm\n",
+    "\n",
+    "import numpy as np\n",
+    "from sklearn.svm import LinearSVC\n",
+    "from sklearn.metrics import accuracy_score, plot_confusion_matrix\n",
+    "\n",
+    "from surfboard.sound import Waveform\n",
+    "from surfboard.feature_extraction import extract_features"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Download ESC-50. More information on this dataset can be found [here](https://github.com/karolpiczak/ESC-50). \n",
+    "ESC-50 is an environmental classification dataset.  \n",
+    "Download link: https://github.com/karoldvl/ESC-50/archive/master.zip  \n",
+    "Download is roughly 600MB. This might take a bit of time depending on your internet connection."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Unzip. Replace the path below with the path where your file was downloaded.\n",
+    "# On a mac, it is likely that the code below should work.\n",
+    "!unzip ~/Downloads/ESC-50-master.zip"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Inspect the unzipped file.\n",
+    "!ls ESC-50-master/audio"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### The files have name `{}-{}-{}-{id}.wav` where id is the label.\n",
+    "We will keep only the first 10 classes, to make the processing faster."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Keep only labels 0 ... 9.\n",
+    "acceptable_labels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
+    "file_names = [\n",
+    "    f for f in os.listdir(f\"ESC-50-master/audio\") if int(f.split('-')[-1].split('.')[0]) in acceptable_labels\n",
+    "]\n",
+    "\n",
+    "waveforms = []\n",
+    "\n",
+    "# Replace the argument to os.listdir() below if you unzipped somewhere else.\n",
+    "for fname in tqdm(file_names):\n",
+    "    waveforms.append(Waveform(path=os.path.join(f\"ESC-50-master/audio/{fname}\")))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Use surfboard to extract features.\n",
+    "What we do below can be done quickly using the surfboard CLI."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# Note that we only pick components for which statistics are defined (i.e. time series) to make\n",
+    "# the code neater. \n",
+    "components_list = [\n",
+    "    'mfcc', 'spectral_flux', 'spectral_slope', 'spectral_centroid', 'spectral_spread', 'spectral_skewness',\n",
+    "    'spectral_kurtosis', 'spectral_rolloff', 'shannon_entropy_slidingwindow', 'rms'\n",
+    "]\n",
+    "\n",
+    "statistics_list = ['mean', 'std', 'first_derivative_mean', 'first_derivative_std']\n",
+    "\n",
+    "# Extract dataframe...\n",
+    "feature_df = extract_features(\n",
+    "    waveforms=waveforms, components_list=components_list, statistics_list=statistics_list\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Let's inspect the extracted features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "feature_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Turn dataframe into numpy arrays.\n",
+    "X = np.array(feature_df)\n",
+    "labels = np.array([int(fname.split('-')[-1].split('.')[0]) for fname in file_names])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Now we pick some training ids and some dev ids."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_valid_split = int(0.8 * X.shape[0])\n",
+    "\n",
+    "# Pick random ids to create a train/valid split from the data.\n",
+    "train_ids = np.random.choice(X.shape[0], train_valid_split, replace=False)\n",
+    "valid_ids = [idx for idx in np.arange(X.shape[0]) if idx not in train_ids]\n",
+    "\n",
+    "print(\"There are {} training examples and {} validation examples...\".format(len(train_ids), len(valid_ids)))\n",
+    "\n",
+    "# Index into X using the randomly chosen ids.\n",
+    "X_train, X_valid = X[train_ids], X[valid_ids]\n",
+    "label_train, label_valid = labels[train_ids], labels[valid_ids]\n",
+    "\n",
+    "# Normalize columns (these features are going into an SVM) with maximum of each X_train column.\n",
+    "X_train, X_valid = X_train / X_train.max(0), X_valid / X_train.max(0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Onto the classification task."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "svm = LinearSVC()\n",
+    "\n",
+    "# Train the SVM.\n",
+    "svm.fit(X_train, label_train)\n",
+    "\n",
+    "predictions_train = svm.predict(X_train)\n",
+    "predictions_valid = svm.predict(X_valid)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Show the accuracy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print('Train accuracy is {} and validation accuracy is {}'.format(\n",
+    "    accuracy_score(label_train, predictions_train),\n",
+    "    accuracy_score(label_valid, predictions_valid),\n",
+    "))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Show the confusion matrix on valid set."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_confusion_matrix(svm, X_valid, label_valid);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Exercise\n",
+    "Now that you have a base model which should perform decently well on this task, try to add or remove features and see how this might affect the validation accuracy and confusion matrix."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebook_tutorials/Tutorial1-AudioClassification.ipynb b/notebook_tutorials/Tutorial1-AudioClassification.ipynb
@@ -113,7 +113,7 @@
    "source": [
     "# Note that we only pick components for which statistics are defined (i.e. time series) to make\n",
     "# the code neater. \n",
-    "features_list = [\n",
+    "components_list = [\n",
     "    'mfcc', 'spectral_flux', 'spectral_slope', 'spectral_centroid', 'spectral_spread', 'spectral_skewness',\n",
     "    'spectral_kurtosis', 'spectral_rolloff', 'shannon_entropy_slidingwindow', 'rms'\n",
     "]\n",
@@ -122,7 +122,7 @@
     "\n",
     "# Extract dataframe...\n",
     "feature_df = extract_features(\n",
-    "    waveforms=waveforms, features_list=features_list, statistics_list=statistics_list\n",
+    "    waveforms=waveforms, components_list=components_list, statistics_list=statistics_list\n",
     ")"
    ]
   },