In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        os.path.join(dirname, filename)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Project Overview

- Objective : 
  - **To classifiy urban sound**
  - used **librosa** library for music and audio analysis
- Classification Problem
- Data cleaning
- Exploratory Data Analysis
- Data Preprocessing : Feature Extraction using MFCC
- **Artificial Neural Network (ANN)** Training and Prediction

# About Project

Automatic environmental sound classification is a growing area of research with numerous real world applications. Whilst there is a large body of research in related audio fields such as speech and music, work on the classification of environmental sounds is comparatively scarce.

There is a plethora of real world applications for this research, such as:

• Content-based multimedia indexing and retrieval
• Assisting deaf individuals in their daily activities
• Smart home use cases such as 360-degree safety and security capabilities
• Industrial uses such as predictive maintenance

![](https://cdn.lucidsamples.com/c/15-category_default/sound-effects-packs.jpg)

## Code and Resources used

- Python version: 3.7.6
- Packages: Pandas, Numpy, Seaborn, Matplotlib, Scikit, Keras, Tensorflow, Librosa, Ipython
- Resources used:

  * Medium : https://mikesmales.medium.com/sound-classification-using-deep-learning-8bc2aa1990b7
  * Heartbeat : https://heartbeat.fritz.ai/working-with-audio-signals-in-python-6c2bd63b2daf


# Web Scraping

Dataset URL: https://urbansounddataset.weebly.com/urbansound8k.html

For this we will use a dataset called Urbansound8K. The dataset contains 8732 sound excerpts (<=4s) of urban sounds from 10 classes, which are:

• Air Conditioner
• Car Horn
• Children Playing
• Dog bark
• Drilling
• Engine Idling
• Gun Shot
• Jackhammer
• Siren
• Street Music


# Audio file overview

Sound are pressure waves, and these waves can be represented by numbers over a time period. These air pressure differences communicates with the brain.

These sound excerpts are digital audio files in .wav format. Sound waves are digitised by sampling them at discrete intervals known as the sampling rate (typically 44.1kHz for CD quality audio meaning samples are taken 44,100 times per second).

Each sample is the amplitude of the wave at a particular time interval, where the bit depth determines how detailed the sample will be also known as the dynamic range of the signal (typically 16bit which means a sample can range from 65,536 amplitude values).

## Sampling Frequency
The sampling frequency (or sample rate) is the number of samples (data points) per second in a sound. For example: if the sampling frequency is 44 khz, a recording with a duration of 60 seconds will contain 2,646,000 samples. In practice, sampling even higher than 10x helps measure the amplitude correctly in the time domain.

# Importing Libraries

In [None]:
# important packages
	
import pandas as pd					# data manipulation using dataframes
import numpy as np					# data statistical analysis

import seaborn as sns				# Statistical data visualization
import matplotlib.pyplot as plt		# data visualisation
%matplotlib inline

## Librosa library
Librosa is a Python package for music and audio processing by Brian McFee and will allow us to load audio in our notebook as a numpy array for analysis and manipulation.

For much of the preprocessing we will be able to use Librosa’s load() function, which by default converts the sampling rate to 22.05 KHz, normalise the data so the bit-depth values range between -1 and 1 and flattens the audio channels into mono.

In [None]:
import librosa							# package for music and audio analysis
import librosa.display

In [None]:
import IPython.display as ipd			# public api for display tool in ipython

# Loading and Visualizing an audio file

In [None]:
audio_file = "../input/urbansound8k/fold3/102105-3-0-0.wav"

## Loading file

In [None]:
data,sample_rate = librosa.load(audio_file)

## Playing audio

In [None]:
ipd.Audio(audio_file)

This is dog sound. Let us visualize in waveform using librosa library

## Waveform visualization
depicts the waveform visualization of the amplitude vs the time representation of the signal.

In [None]:
plt.figure(figsize=(14,5))
librosa.display.waveplot(data, sr = sample_rate)
plt.show()

## Spectogram :  
A spectrogram is a visual representation of the spectrum of frequencies of a signal as it varies with time. They are time-frequency portraits of signals. Using a spectrogram, we can see how energy levels (dB) vary over time.

In [None]:
X = librosa.stft(data)

#converting into energy levels(dB)
Xdb = librosa.amplitude_to_db(abs(X))

plt.figure(figsize=(20, 5))
librosa.display.specshow(Xdb, sr=sample_rate, x_axis='time', y_axis='hz')
plt.colorbar()
plt.show()

## Log-frequency axis: 
Features can be obtained from a spectrogram by converting the linear frequency axis, as shown above, into a logarithmic axis. The resulting representation is also called a log-frequency spectrogram

In [None]:
plt.figure(figsize=(20, 5))
librosa.display.specshow(Xdb, sr=sample_rate, x_axis='time', y_axis='log')
plt.colorbar()
plt.show()

# Importing Dataset

In [None]:
raw_df = pd.read_csv("../input/urbansound8k/UrbanSound8K.csv")
df = raw_df.copy()

# Data Inspection

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.columns

In [None]:
df.info()			# for concise summary of dataset

# Data Cleaning

In [None]:
### MISSING DATA ###

df.isnull().sum()

# Explanatory Data Analysis

I looked at the distributions of the data and the value counts for the various categorical variables. Below are a few highlights :

## Target Variable

In [None]:
df['class'].value_counts()

In [None]:
sns.countplot(y="class", data=df, order = df['class'].value_counts().index)
plt.show()

Dataset is almost balanced

## Other variables : Predictors

In [None]:
df.hist( bins = 10, figsize = (10,10), color = 'r')
plt.show()

In [None]:
# Calculate correlations
corr = df.corr()

# Heatmap
sns.heatmap(corr,  annot=True, fmt=".2f")

### Obervations

- start and end timings are correlated to each other
- most of the voice are foreground as backgruond voices
- data in fold folders are almost equal

## Sounds of different classes

In [None]:
df.head()

In [None]:
classID = list(df['classID'].unique())

In [None]:
audio_list = []

for ID in classID:
    for i in range(len(df)):
        if(df.classID[i] == ID):
            
            file = df['slice_file_name'][i]
            folder = str(df['fold'][i])
            class_id = ID
            class_ = df['class'][i]
            audio_file = "../input/urbansound8k/" + "fold" + folder + "/" + file
            audio_list.append(audio_file)
            break

In [None]:
audio_list

random sound file of different class

In [None]:
import random
audio_file = random.choice(audio_list)
data,sample_rate = librosa.load(audio_file)
ipd.Audio(audio_file)

## Waveforms of different classes

In [None]:
df.head()

In [None]:
for ID in classID:
    for i in range(len(df)):
        if(df.classID[i] == ID):
            
            file = df['slice_file_name'][i]
            folder = str(df['fold'][i])
            class_id = ID
            class_ = df['class'][i]
            audio_file = "../input/urbansound8k/" + "fold" + folder + "/" + file
            print(audio_file)
            
            data,sample_rate = librosa.load(audio_file)

            plt.figure(figsize=(14,5))
            librosa.display.waveplot(data, sr = sample_rate)
            plt.title(class_)
            plt.show()
            break