**Importing Required Libraries**

In [None]:
import pandas as pd
import numpy as np
import librosa
import librosa.display
from keras import layers
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import classification_report
import os
import warnings
from IPython.display import HTML, display
import time
warnings.filterwarnings('ignore')

**Reading Metadata**

In [None]:
data=pd.read_csv("/kaggle/input/speech-accent-archive/speakers_all.csv",index_col='speakerid')
data.head()

**Data Type of given columns**

In [None]:
data.info()

**Dropping Columns:8-10 as they have null values**

In [None]:
data.drop(data.columns[8:11],axis=1,inplace=True)
data=data.fillna('NaN')
data.head()

**Determing Frequency features of wave**

In [None]:
from pathlib import Path
directory_path='/kaggle/input/speech-accent-archive/recordings/recordings/'
def feature_engineering(directory_path,data):
    p=0
    
    df=pd.DataFrame()
    tmp={}
    #p=1
    for index, row in data.iterrows():
        if os.path.isfile(directory_path+row['filename']+'.mp3')==False:
            print('File '+str(row['filename'])+".mp3 doesn't exist")
            data=data.drop([index])
            continue
        tmp['filename']=row['filename']
        tmp['country']=row['country']
        y, sr=librosa.load(os.path.join(os.path.abspath(directory_path),row['filename']+'.mp3'))
        tmp['rms']=np.mean(librosa.feature.rms(y=y))
        tmp['chroma_stft']=np.mean(librosa.feature.chroma_stft(y=y, sr=sr))
        tmp['spec_cent']=np.mean(librosa.feature.spectral_centroid(y=y,sr=sr))
        tmp['spec_bw']=np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
        tmp['rolloff']=np.mean(librosa.feature.spectral_rolloff(y=y,sr=sr))
        tmp['zcr']=np.mean(librosa.feature.zero_crossing_rate(y))
        mfcc=librosa.feature.mfcc(y=y, sr=sr)
        i=0
        for e in mfcc: 
            tmp['mfcc'+str(i)]=np.mean(e)
            i+=1
        df=df.append([tmp])
        print(p)
        p+=1
    return df
    

In [None]:
df=feature_engineering(directory_path,data)

**Saving the processed metadata inorder to reduce time further**

In [None]:
df.to_csv('processed_metadata.csv')

**Reading Metadata**

In [None]:
data_to_fit=pd.read_csv('processed_metadata.csv')

In [None]:
data_to_fit.head()

In [None]:
data_to_fit['country'].value_counts()

Note: From the above table we can infer that the data is imbalanced.

**Sampling Dataset**

In [None]:
freq=data_to_fit['country'].value_counts()
frequent_values=freq[freq>=35].index
data_to_fit=data_to_fit[data_to_fit['country'].isin(frequent_values)]
data_to_fit=data_to_fit.drop(data_to_fit[data_to_fit['country']=='usa'].sample(frac=.90).index)
data_to_fit=data_to_fit.drop(data_to_fit[data_to_fit['country']=='china'].sample(frac=.45).index)
data_to_fit=data_to_fit.drop(data_to_fit[data_to_fit['country']=='india'].sample(frac=.4).index)
data_to_fit=data_to_fit.drop(data_to_fit[data_to_fit['country']=='canada'].sample(frac=.1).index)
data_to_fit=data_to_fit.drop(data_to_fit[data_to_fit['country']=='south korea'].sample(frac=.1).index)
data_to_fit=data_to_fit.drop(data_to_fit[data_to_fit['country']=='uk'].sample(frac=.4).index)
data_to_fit['country'].value_counts()

**Label Encoding**

In [None]:
le=preprocessing.LabelEncoder()
data_to_fit['country']=le.fit_transform(data_to_fit['country'].astype(str))

In [None]:
(data_to_fit['country'].unique())

**Dividing data for cross validation**

In [None]:
x_train, x_test, y_train, y_test=train_test_split(data_to_fit.drop(columns=['country','filename','Unnamed: 0']), data_to_fit['country'],test_size=0.2)

**Model Creation**

In [None]:
model=Sequential()
model.add(layers.Dense(100, activation='relu',input_shape=(x_train.shape[1],)))
model.add(layers.Dense(200, activation='relu'))
model.add(layers.Dense(100, activation='relu'))
model.add(layers.Dense(9, activation='softmax'))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy',metrics=['accuracy'])
classifier=model.fit(x_train, y_train, epochs=1300)

**Creating Testing Dataset as per Assignment**

In [None]:
#Testing as per assignment
df_test=pd.DataFrame({'filename':['shivam_sharma'],'country':['india']})
metadata_test=feature_engineering('../input/test-audio/',df_test)

In [None]:
metadata_test.drop(labels=['filename','country'],inplace=True,axis=1)

In [None]:
le.inverse_transform([model.predict_classes(metadata_test)])

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, model.predict_classes(x_test), target_names=['belgium', 'brazil','canada','india', 'china','south korea','turkey', 'uk','usa']))