# HISTORY

* This dataset contains 8732 labeled sound excerpts (<=4s) of urban sounds from 10 classes: air_conditioner, car_horn, children_playing, dog_bark, drilling, enginge_idling, gun_shot, jackhammer, siren, and street_music. The classes are drawn from the urban sound taxonomy. For a detailed description of the dataset and how it was compiled please refer to our paper.
* All excerpts are taken from field recordings uploaded to www.freesound.org. The files are pre-sorted into ten folds (folders named fold1-fold10) to help in the reproduction of and comparison with the automatic classification results reported in the article above.

* In addition to the sound excerpts, a CSV file containing metadata about each excerpt is also provided.


# PACKAGES AND LIBRARIES

In [None]:
#GENERAL
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import random
#PATH PROCESS
import os
import os.path
from pathlib import Path
import glob
#IMAGE PROCESS
from PIL import Image
from keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
from keras.applications.vgg16 import preprocess_input, decode_predictions
import imageio
from IPython.display import Image
import matplotlib.image as mpimg
#MUSIC PROCESS
import pydub
from scipy.io.wavfile import read, write
import librosa
import librosa.display
import IPython
from IPython.display import Audio
import scipy
#SCALER & TRANSFORMATION
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split
from keras import regularizers
from sklearn.preprocessing import LabelEncoder
#ACCURACY CONTROL
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, roc_auc_score, roc_curve
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
#OPTIMIZER
from keras.optimizers import RMSprop,Adam,Optimizer,Optimizer, SGD
#MODEL LAYERS
from tensorflow.keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization,MaxPooling2D,BatchNormalization,\
                        Permute, TimeDistributed, Bidirectional,GRU, SimpleRNN,\
LSTM, GlobalAveragePooling2D, SeparableConv2D, ZeroPadding2D, Convolution2D, ZeroPadding2D,Reshape,\
Conv2DTranspose, LeakyReLU, Conv1D, AveragePooling1D, MaxPooling1D
from keras import models
from keras import layers
import tensorflow as tf
from keras.applications import VGG16,VGG19,inception_v3
from keras import backend as K
from keras.utils import plot_model
from keras.datasets import mnist
import keras
#SKLEARN CLASSIFIER
from xgboost import XGBClassifier, XGBRegressor
from lightgbm import LGBMClassifier, LGBMRegressor
from catboost import CatBoostClassifier, CatBoostRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.cross_decomposition import PLSRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import ElasticNetCV
#IGNORING WARNINGS
from warnings import filterwarnings
filterwarnings("ignore",category=DeprecationWarning)
filterwarnings("ignore", category=FutureWarning) 
filterwarnings("ignore", category=UserWarning)

# PATH, LABEL, TRANSFORMATION

#### MAIN CSV

In [None]:
Meta_Data_CSV = pd.read_csv("../input/urbansound8k/UrbanSound8K.csv")

In [None]:
print(Meta_Data_CSV.head(-1))

In [None]:
print(Meta_Data_CSV["class"].value_counts())

In [None]:
print(Meta_Data_CSV.isnull().sum())

#### TO ARRAY

In [None]:
File_Name_Array = np.array(Meta_Data_CSV["slice_file_name"])
Fold_Array = np.array(Meta_Data_CSV["fold"])
Class_Array = np.array(Meta_Data_CSV["class"])

# if it is necessary

In [None]:
print(File_Name_Array)

In [None]:
print(Fold_Array)

In [None]:
print(Class_Array)

#### DETERMINING FILE LOCATIONS

In [None]:
Path_Wav_List = []
Category_List = []

for path_number in range(8732):
    File_Path_Name = "../input/urbansound8k/fold" + str(Meta_Data_CSV["fold"][path_number]) + "/" + Meta_Data_CSV["slice_file_name"][path_number]
    Path_Wav_List.append(File_Path_Name)
    Category_List.append(Meta_Data_CSV["class"][path_number])

In [None]:
print(Path_Wav_List[0:5])

In [None]:
print(Category_List[0:5])

#### TO SERIES

In [None]:
Path_Wav_Series = pd.Series(Path_Wav_List,name="WAV").astype(str)
Category_Series = pd.Series(Category_List,name="CATEGORY")

In [None]:
print(Path_Wav_Series)

In [None]:
print(Category_Series)

#### TO DATAFRAME

In [None]:
Main_Train_Data = pd.concat([Path_Wav_Series,Category_Series],axis=1)

In [None]:
print(Main_Train_Data.head(-1))

In [None]:
print(Main_Train_Data["CATEGORY"].value_counts())

#### SHUFFLE

In [None]:
Main_Train_Data = Main_Train_Data.sample(frac=1).reset_index(drop=True)

In [None]:
print(Main_Train_Data.head(-1))

#### SAVING

In [None]:
Main_Train_Data.to_csv("Wav_Type.csv")

# PROCESS FUNCTIONS

#### NOISE FUNCTION

In [None]:
def noise_function(data):
    noise_value = 0.009 * np.random.uniform() * np.amax(data)
    data = data + noise_value * np.random.normal(size=data.shape[0])
    
    return data

#### STRETCH FUNCTION

In [None]:
def stretch_function(data,rate=0.8):
    
    return librosa.effects.time_stretch(data,rate)

#### SHIFT FUNCTION

In [None]:
def shift_function(data):
    shift_range = int(np.random.uniform(-5,5) * 1000)
    
    return np.roll(data,shift_range)

#### PITCH FUNCTION

In [None]:
def pitch_function(data,sampling_rate,pitch_factor=0.5):
    
    return librosa.effects.pitch_shift(data,sampling_rate,pitch_factor)

#### SPECSHOW FUNCTION

In [None]:
def specshow_function(wav_path):
    figure = plt.figure(figsize=(13,7))
    
    audio_type,sample_rate = librosa.load(wav_path)
    
    stft_audio = librosa.stft(audio_type)
    Db_audio = librosa.amplitude_to_db(abs(stft_audio))
    librosa.display.specshow(Db_audio,sr=sample_rate,x_axis="time",y_axis="hz")

#### WAVEPLOT FUNCTION

In [None]:
def waveplot_function(wav_path):
    figure = plt.figure(figsize=(13,7))
    
    audio_type,sample_rate = librosa.load(wav_path)
    librosa.display.waveplot(audio_type,sr=sample_rate)

#### PLAYING FUNCTION

In [None]:
def playing_function(wav_path):
    
    audio_type,sample_rate = librosa.load(wav_path)
    
    return Audio(audio_type,rate=sample_rate)

#### EXTRACT FUNCTION

In [None]:
def extract_function(data):
    
    output_result = np.array([])
    mean_zero = np.mean(librosa.feature.zero_crossing_rate(y=data).T,axis=0)
    output_result = np.hstack((output_result,mean_zero))
    
    stft_output = np.abs(librosa.stft(data))
    chroma_output = np.mean(librosa.feature.chroma_stft(S=stft_output,sr=sample_rate).T,axis=0)
    output_result = np.hstack((output_result,chroma_output))
    
    mfcc_output = np.mean(librosa.feature.mfcc(y=data,sr=sample_rate).T,axis=0)
    output_result = np.hstack((output_result,mfcc_output))
    
    root_output = np.mean(librosa.feature.rms(y=data).T,axis=0)
    output_result = np.hstack((output_result,root_output))
    
    mel_output = np.mean(librosa.feature.melspectrogram(y=data,sr=sample_rate).T,axis=0)
    output_result = np.hstack((output_result,mel_output))
    
    return output_result

#### EXPORT FUNCTION

In [None]:
def export_function(path):
    
    data,sample_rate = librosa.load(path,duration=1.0)
    
    output_One = extract_function(data)
    result = np.array(output_One)
    
    noise_output = noise_function(data)
    output_Two = extract_function(noise_output)
    result = np.vstack((result,output_Two))
    
    stretch_output = stretch_function(data)
    stretch_pitch = pitch_function(stretch_output,sample_rate)
    output_Three = extract_function(stretch_pitch)
    result = np.vstack((result,output_Three))
    
    return result

# ANALYSIS

In [None]:
plt.style.use("dark_background")

#### PLAYING

In [None]:
playing_function(Main_Train_Data["WAV"][2])

In [None]:
playing_function(Main_Train_Data["WAV"][2333])

In [None]:
playing_function(Main_Train_Data["WAV"][7564])

In [None]:
playing_function(Main_Train_Data["WAV"][4])

#### WAVEPLOT

In [None]:
waveplot_function(Main_Train_Data["WAV"][2])

In [None]:
waveplot_function(Main_Train_Data["WAV"][2351])

In [None]:
waveplot_function(Main_Train_Data["WAV"][2551])

In [None]:
waveplot_function(Main_Train_Data["WAV"][2451])

In [None]:
waveplot_function(Main_Train_Data["WAV"][4])

#### SPECSHOW

In [None]:
specshow_function(Main_Train_Data["WAV"][45])

In [None]:
specshow_function(Main_Train_Data["WAV"][145])

In [None]:
specshow_function(Main_Train_Data["WAV"][2318])

In [None]:
specshow_function(Main_Train_Data["WAV"][7518])

#### NOISE

In [None]:
figure = plt.figure(figsize=(14,5))

wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][2000])

noise_injection = noise_function(wav_type)

librosa.display.waveplot(noise_injection,sr=sample_rate)
Audio(noise_injection,rate=sample_rate)

In [None]:
figure = plt.figure(figsize=(14,5))

wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][6020])

noise_injection = noise_function(wav_type)

librosa.display.waveplot(noise_injection,sr=sample_rate)
Audio(noise_injection,rate=sample_rate)

In [None]:
figure = plt.figure(figsize=(14,5))

wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][450])

noise_injection = noise_function(wav_type)

librosa.display.waveplot(noise_injection,sr=sample_rate)
Audio(noise_injection,rate=sample_rate)

#### STRETCH

In [None]:
figure = plt.figure(figsize=(14,5))

wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][450])

stretch_audio = stretch_function(wav_type)
librosa.display.waveplot(stretch_audio,sr=sample_rate)
Audio(stretch_audio,rate=sample_rate)

In [None]:
figure = plt.figure(figsize=(14,5))

wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][150])

stretch_audio = stretch_function(wav_type)
librosa.display.waveplot(stretch_audio,sr=sample_rate)
Audio(stretch_audio,rate=sample_rate)

In [None]:
figure = plt.figure(figsize=(14,5))

wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][1150])

stretch_audio = stretch_function(wav_type)
librosa.display.waveplot(stretch_audio,sr=sample_rate)
Audio(stretch_audio,rate=sample_rate)

#### SHIFT

In [None]:
figure = plt.figure(figsize=(14,5))

wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][1544])

shift_audio = shift_function(wav_type)
librosa.display.waveplot(shift_audio,sr=sample_rate)
Audio(shift_audio,rate=sample_rate)

In [None]:
figure = plt.figure(figsize=(14,5))

wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][544])

shift_audio = shift_function(wav_type)
librosa.display.waveplot(shift_audio,sr=sample_rate)
Audio(shift_audio,rate=sample_rate)

In [None]:
figure = plt.figure(figsize=(14,5))

wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][5637])

shift_audio = shift_function(wav_type)
librosa.display.waveplot(shift_audio,sr=sample_rate)
Audio(shift_audio,rate=sample_rate)

#### PITCH

In [None]:
figure = plt.figure(figsize=(14,5))

wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][637])

pitch_audio = pitch_function(wav_type,sample_rate)
librosa.display.waveplot(pitch_audio,sr=sample_rate)
Audio(pitch_audio,rate=sample_rate)

In [None]:
figure = plt.figure(figsize=(14,5))

wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][765])

pitch_audio = pitch_function(wav_type,sample_rate)
librosa.display.waveplot(pitch_audio,sr=sample_rate)
Audio(pitch_audio,rate=sample_rate)

In [None]:
figure = plt.figure(figsize=(14,5))

wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][5])

pitch_audio = pitch_function(wav_type,sample_rate)
librosa.display.waveplot(pitch_audio,sr=sample_rate)
Audio(pitch_audio,rate=sample_rate)

#### SAME TIMEFRAME

In [None]:
figure = plt.figure(figsize=(14,5))

wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][5],duration=2.5,offset=0.6)
librosa.display.waveplot(wav_type,sr=sample_rate)
Audio(wav_type,rate=sample_rate)

In [None]:
figure = plt.figure(figsize=(14,5))

wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][1115],duration=2.5,offset=0.6)
librosa.display.waveplot(wav_type,sr=sample_rate)
Audio(wav_type,rate=sample_rate)

In [None]:
figure = plt.figure(figsize=(14,5))

wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][4155],duration=2.5,offset=0.6)
librosa.display.waveplot(wav_type,sr=sample_rate)
Audio(wav_type,rate=sample_rate)

#### SIMPLE CHECKING

In [None]:
wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][5],duration=2.5,offset=0.6)

print(wav_type.shape)
print(wav_type.dtype)
print(sample_rate)

In [None]:
wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][5234],duration=2.5,offset=0.6)

print(wav_type.shape)
print(wav_type.dtype)
print(sample_rate)

In [None]:
wav_type,sample_rate = librosa.load(Main_Train_Data["WAV"][434],duration=2.5,offset=0.6)

print(wav_type.shape)
print(wav_type.dtype)
print(sample_rate)

#### END OF THE FIRST PART

Please check for next step:
https://www.kaggle.com/scidatb/urban-sound-prediction-ii-ai-ml-process