In [1]:
import os
import librosa
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [2]:
SR=22050
N_MFCC=40
N_MELS=128
DURATION=1
SAMPLES_PER_TRACK=SR*DURATION
CLASSES=['AR','Sniper','nogun']

In [3]:
dataset = pd.read_csv('dataset1.csv')

In [4]:


# def extract_mfcc(audio_path, n_mfcc=13):
#     y, sr = librosa.load(audio_path)
#     mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
#     return np.mean(mfcc, axis=1) 

def extract_features(file_path):
    y,sr=librosa.load(file_path,sr=SR,duration=DURATION)
    if len(y)<SAMPLES_PER_TRACK:
        y=np.pad(y,(0,SAMPLES_PER_TRACK-len(y)))
    else:
        y=y[:SAMPLES_PER_TRACK]
    mfcc=librosa.feature.mfcc(y=y,sr=sr,n_mfcc=N_MFCC)
    mfcc=librosa.util.fix_length(mfcc,size=174,axis=1)
    mfcc=mfcc[...,np.newaxis]

    mel_spec=librosa.feature.melspectrogram(y=y,sr=sr)
    mel_spec=librosa.power_to_db(mel_spec,ref=np.max)
    # plt.figure(figsize=(10,6))
    # librosa.display.specshow(mel_spec,sr=SR,x_axis='time',y_axis='mel')
    # plt.colorbar(format='%+2.0f dB')
    # plt.title('Mel-Spectogram')
    # plt.tight_layout()
    # plt.show()
    mel_spec=librosa.util.fix_length(mel_spec,size=174,axis=1)
    mel_spec=mel_spec[...,np.newaxis]
    return mfcc,mel_spec


# audio_features_melspec = []
# audio_features_mfcc=[]

# for audio_file in os.listdir(audio_dir):
#     if audio_file.endswith('.wav'):
#         audio_path = os.path.join(audio_dir, audio_file)
#         mfcc_features,melspec_features = extract_features(audio_path)
#         audio_features_mfcc.append([audio_file] + mfcc_features.tolist())
#         audio_features_melspec.append([audio_file]+melspec_features.tolist())



In [5]:
def load_dataset(dataset_path):
    x_mfcc,x_melspec,gun_type,direction,distance=[],[],[],[],[]
    suffixes=['_gain.wav','_pan.wav','_pitch.wav','_reverse.wav','_speed.wav','_noise.wav','_original.wav']
    for i,class_name in enumerate(CLASSES):
        class_dir=os.path.join(dataset_path,class_name)
        for file_name in os.listdir(class_dir):
            if file_name.endswith(".wav"):
                file_path=os.path.join(class_dir,file_name)
                mfcc,mel_spec=extract_features(file_path)
                x_mfcc.append(mfcc)
                x_melspec.append(mel_spec)
                if class_name=='Sniper':
                    for suffix in suffixes:
                        if file_name.endswith(suffix):
                            file_name=file_name[:-len(suffix)]+'.wav'
                gun_type.append(i)
                if class_name=='nogun':
                    direction.append('None')
                    distance.append('None')
                else:
                    row=dataset[dataset['name']==file_name]
                    direction.append(row['dire'].values[0])
                    distance.append(row['dist'].values[0])
    x_mfcc=np.array(x_mfcc)
    x_melspec=np.array(x_melspec)
    gun_encoder=LabelEncoder()
    gun_types_encoded=gun_encoder.fit_transform(gun_type)
    gun_types_categorical=to_categorical(gun_types_encoded)
    gun_types_categorical=np.array(gun_types_categorical)
    direction_encoder=LabelEncoder()
    direction_types_encoded=direction_encoder.fit_transform(direction)
    direction_types_categorical=to_categorical(direction_types_encoded)
    direction_types_categorical=np.array(direction_types_categorical)
    distance_encoder=LabelEncoder()
    distance_types_encoded=distance_encoder.fit_transform(distance)
    distance_types_categorical=to_categorical(distance_types_encoded)
    distance_types_categorical=np.array(distance_types_categorical)
    return x_mfcc,x_melspec,gun_types_categorical,direction_types_categorical,distance_types_categorical


In [6]:
X_mfcc,X_melspec,y_gun,y_direction,y_distance=load_dataset('gun_sound_v9')

In [7]:
X_melspec_shape=X_melspec.shape
X_melspec_shape

(2807, 128, 174, 1)

In [8]:
X_mfcc_shape=X_mfcc.shape
X_mfcc_shape

(2807, 40, 174, 1)

In [9]:
y_gun.shape

(2807, 3)

In [10]:
y_direction.shape

(2807, 6)

In [11]:
X_mfcc_train, X_mfcc_test, X_melspec_train, X_melspec_test, y_gun_train, y_gun_test,y_distance_train,y_distance_test,y_direction_train,y_direction_test = train_test_split(
    X_mfcc, X_melspec, y_gun,y_distance,y_direction,test_size=0.3, random_state=42
)

# Then, split the training+validation set into training and validation sets
# X_mfcc_train, X_mfcc_val, X_melspec_train, X_melspec_val, y_gun_train, y_gun_val,y_distance_train,y_distance_val,y_direction_train,y_direction_val = train_test_split(
#     X_mfcc_trainval, X_melspec_trainval, y_gun_trainval,y_distance_trainval,y_direction_trainval, test_size=0.3, random_state=42
# )

In [12]:
y_gun_train=np.array(y_gun_train)
y_gun_train.shape

(1964, 3)

In [13]:
X_mfcc_test.shape

(843, 40, 174, 1)

In [14]:
import tensorflow as tf
from tensorflow.keras.metrics import AUC
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Dropout,GlobalAveragePooling2D,Dense,Input
from tensorflow.keras.models import Model
def create_model(input_shape_mfcc,input_shape_melspec,num_classes=3,num_directions=6,num_distances=7):
    input_mfcc=Input(shape=input_shape_mfcc,name='mfcc_input')
    x_mfcc=Conv2D(16,kernel_size=(3,3),activation='relu')(input_mfcc)
    x_mfcc=MaxPooling2D(pool_size=(2,2))(x_mfcc)
    x_mfcc=Dropout(0.3)(x_mfcc)

    x_mfcc=Conv2D(32,kernel_size=(3,3),activation='relu')(x_mfcc)
    x_mfcc=MaxPooling2D(pool_size=(2,2))(x_mfcc)
    x_mfcc=Dropout(0.3)(x_mfcc)

    x_mfcc=Conv2D(64,kernel_size=(3,3),activation='relu')(x_mfcc)
    x_mfcc=MaxPooling2D(pool_size=(1,2))(x_mfcc)
    x_mfcc=Dropout(0.3)(x_mfcc)
    
    x_mfcc=Conv2D(128,kernel_size=(3,3),activation='relu')(x_mfcc)
    x_mfcc=MaxPooling2D(pool_size=(2,2))(x_mfcc)
    x_mfcc=Dropout(0.3)(x_mfcc)
    
    x_mfcc=GlobalAveragePooling2D()(x_mfcc)

    input_melspec=Input(shape=input_shape_melspec,name='melspec_input')
    x_melspec=Conv2D(16,kernel_size=(3,3),activation='relu')(input_melspec)
    x_melspec=MaxPooling2D(pool_size=(2,2))(x_melspec)
    x_melspec=Dropout(0.3)(x_melspec)

    x_melspec=Conv2D(32,kernel_size=(3,3),activation='relu')(x_melspec)
    x_melspec=MaxPooling2D(pool_size=(2,2))(x_melspec)
    x_melspec=Dropout(0.3)(x_melspec)

    x_melspec=Conv2D(64,kernel_size=(3,3),activation='relu')(x_melspec)
    x_melspec=MaxPooling2D(pool_size=(2,2))(x_melspec)
    x_melspec=Dropout(0.3)(x_melspec)

    x_melspec=Conv2D(128,kernel_size=(3,3),activation='relu')(x_melspec)
    x_melspec=MaxPooling2D(pool_size=(2,2))(x_melspec)
    x_melspec=Dropout(0.3)(x_melspec)

    x_melspec = GlobalAveragePooling2D()(x_melspec)

    concatenated=tf.keras.layers.concatenate([x_mfcc,x_melspec])
    common_dense=Dense(128,activation='relu',name='concatenated')(concatenated)
    gunshot_output=Dense(num_classes,activation='softmax',name='gunshot_output')(common_dense)
    direction_output=Dense(num_directions,activation='softmax',name='direction_output')(common_dense)
    distance_output=Dense(num_distances,activation='softmax',name='distance_output')(common_dense)

    model=Model(inputs=[input_mfcc,input_melspec],outputs=[gunshot_output,direction_output,distance_output])
    
    return model

In [15]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
import numpy as np

def create_feature_extractor(input_shape_mfcc, input_shape_melspec):
    model = create_model(input_shape_mfcc, input_shape_melspec)
    feature_extractor = Model(inputs=model.inputs, outputs=model.get_layer('concatenated').output)
    return model,feature_extractor

In [16]:
input_shape_mfcc=(40,174,1)
input_shape_melspec=(128,174,1)
model,feature_extractor=create_feature_extractor(input_shape_mfcc,input_shape_melspec)

In [17]:
model.compile(optimizer='adam',
              loss={
                    'gunshot_output':'categorical_crossentropy',
                    'direction_output':'categorical_crossentropy',
                    'distance_output':'categorical_crossentropy'},
                metrics={
                    'gunshot_output': ['accuracy', 'Precision', 'Recall', AUC()],
                    'distance_output': ['accuracy', 'Precision', 'Recall', AUC()],
                    'direction_output': ['accuracy', 'Precision', 'Recall', AUC()]
                    })
model.summary()

In [18]:
from keras.callbacks import ModelCheckpoint
batch_size=32
valid_batch_size=32
callbacks=[ModelCheckpoint("best_model.keras",monitor='val_loss')]
history=model.fit([X_mfcc_train,X_melspec_train],
                  {
                      'gunshot_output':y_gun_train,
                      'direction_output':y_direction_train,
                      'distance_output':y_distance_train
                  },
                  validation_data=([X_mfcc_test,X_melspec_test],
                                    {
                                        'gunshot_output':y_gun_test,
                                        'direction_output':y_direction_test,
                                        'distance_output':y_distance_test
                                    }),
                    epochs=64,batch_size=32,callbacks=[callbacks])


Epoch 1/64
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 323ms/step - direction_output_Precision: 0.4830 - direction_output_Recall: 0.0460 - direction_output_accuracy: 0.2534 - direction_output_auc_2: 0.6108 - distance_output_Precision: 0.4049 - distance_output_Recall: 0.0478 - distance_output_accuracy: 0.2196 - distance_output_auc_1: 0.5914 - gunshot_output_Precision: 0.5335 - gunshot_output_Recall: 0.2866 - gunshot_output_accuracy: 0.5161 - gunshot_output_auc: 0.6811 - loss: 5.0943 - val_direction_output_Precision: 0.9273 - val_direction_output_Recall: 0.0605 - val_direction_output_accuracy: 0.3725 - val_direction_output_auc_2: 0.7394 - val_distance_output_Precision: 1.0000 - val_distance_output_Recall: 0.0486 - val_distance_output_accuracy: 0.3286 - val_distance_output_auc_1: 0.7627 - val_gunshot_output_Precision: 0.9620 - val_gunshot_output_Recall: 0.2100 - val_gunshot_output_accuracy: 0.5563 - val_gunshot_output_auc: 0.7613 - val_loss: 4.0982
Epoch 2/64
[1m62/6

In [19]:
features=feature_extractor.predict([X_mfcc,X_melspec])

[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 43ms/step


In [20]:
features

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [21]:
features

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [22]:
# layer_name='concatenated'
# intermediate_layer_model=Model(inputs=model.input,outputs=model.get_layer(layer_name).output)
# intermediate_layer_model.summary()

In [23]:
features.shape

(2807, 128)

In [24]:
y_gun.shape

(2807, 3)

In [25]:
y_combined = np.column_stack((y_gun, y_direction, y_distance))
X_train,X_test,y_train_combined,y_test_combined=train_test_split(features,y_combined,test_size=0.3)
y_gun_train = y_train_combined[:, 0]
y_dir_train = y_train_combined[:, 1]
y_dist_train = y_train_combined[:, 2]

y_gun_test = y_test_combined[:, 0]
y_dir_test = y_test_combined[:, 1]
y_dist_test = y_test_combined[:, 2]

In [26]:
import xgboost as xgb
xgb_model_gun=xgb.XGBClassifier()
xgb_model_gun.fit(X_train,y_gun_train)
xgb_model_direction = xgb.XGBClassifier()
xgb_model_direction.fit(X_train, y_dir_train)
xgb_model_distance = xgb.XGBClassifier()
xgb_model_distance.fit(X_train, y_dist_train)

In [27]:
gun_type_pred=xgb_model_gun.predict(X_test)
direction_type_pred = xgb_model_direction.predict(X_test)
distance_type_pred = xgb_model_distance.predict(X_test)

In [28]:
from sklearn.metrics import accuracy_score

# Evaluate accuracy for gun type
gun_accuracy = accuracy_score(y_gun_test, gun_type_pred)
print(f"Accuracy for gun type classification: {gun_accuracy * 100:.2f}%")

# Evaluate accuracy for direction type
direction_accuracy = accuracy_score(y_dir_test, direction_type_pred)
print(f"Accuracy for direction type classification: {direction_accuracy * 100:.2f}%")

# Evaluate accuracy for distance type
distance_accuracy = accuracy_score(y_dist_test, distance_type_pred)
print(f"Accuracy for distance type classification: {distance_accuracy * 100:.2f}%")


Accuracy for gun type classification: 98.70%
Accuracy for direction type classification: 98.93%
Accuracy for distance type classification: 99.05%


In [29]:
from sklearn.metrics import classification_report

# Gun type classification report
print("Gun Type Classification Report")
print(classification_report(y_gun_test, gun_type_pred))

# Direction type classification report
print("Direction Type Classification Report")
print(classification_report(y_dir_test, direction_type_pred))

# Distance type classification report
print("Distance Type Classification Report")
print(classification_report(y_dist_test, distance_type_pred))


Gun Type Classification Report
              precision    recall  f1-score   support

         0.0       0.98      1.00      0.99       464
         1.0       0.99      0.98      0.99       379

    accuracy                           0.99       843
   macro avg       0.99      0.99      0.99       843
weighted avg       0.99      0.99      0.99       843

Direction Type Classification Report
              precision    recall  f1-score   support

         0.0       1.00      0.99      0.99       530
         1.0       0.98      0.99      0.99       313

    accuracy                           0.99       843
   macro avg       0.99      0.99      0.99       843
weighted avg       0.99      0.99      0.99       843

Distance Type Classification Report
              precision    recall  f1-score   support

         0.0       0.99      1.00      0.99       692
         1.0       0.99      0.95      0.97       151

    accuracy                           0.99       843
   macro avg       0.99 

In [31]:
import plotly.graph_objects as go
plt.clf()
fig = go.Figure()
fig.add_trace(go.Scatter(
                    y=history.history['gunshot_output_accuracy'],
                    name='Train'))
fig.add_trace(go.Scatter(
                    y=history.history['val_gunshot_output_accuracy'],
                    name='Valid'))
fig.update_layout(height=500, 
                  width=700,
                  title='Accuracy for gunshot feature',
                  xaxis_title='Epoch',
                  yaxis_title='Accuracy')
fig.show()

<Figure size 640x480 with 0 Axes>

In [32]:
plt.clf()
fig = go.Figure()
fig.add_trace(go.Scatter(
                    y=history.history['direction_output_accuracy'],
                    name='Train'))
fig.add_trace(go.Scatter(
                    y=history.history['val_direction_output_accuracy'],
                    name='Valid'))
fig.update_layout(height=500, 
                  width=700,
                  title='Accuracy for direction feature',
                  xaxis_title='Epoch',
                  yaxis_title='Accuracy')
fig.show()

<Figure size 640x480 with 0 Axes>

In [33]:
plt.clf()
fig = go.Figure()
fig.add_trace(go.Scatter(
                    y=history.history['distance_output_accuracy'],
                    name='Train'))
fig.add_trace(go.Scatter(
                    y=history.history['val_distance_output_accuracy'],
                    name='Valid'))
fig.update_layout(height=500, 
                  width=700,
                  title='Accuracy for distance feature',
                  xaxis_title='Epoch',
                  yaxis_title='Accuracy')
fig.show()

<Figure size 640x480 with 0 Axes>

In [34]:
# checkpoint=tf.keras.callbacks.ModelCheckpoint('best_model.keras',monitor='val_gunshot_output_accuracy',save_best_only=True,mode='max')
# history=model.fit([X_mfcc_train,X_melspec_train],
#                   {
#                       'gunshot_output':y_gun_train,
#                       'direction_output':y_direction_train,
#                       'distance_output':y_distance_train
#                   },
#                   validation_data=([X_mfcc_val,X_melspec_val],
#                                     {
#                                         'gunshot_output':y_gun_val,
#                                         'direction_output':y_direction_val,
#                                         'distance_output':y_distance_val
#                                     }),
#                     epochs=30,batch_size=32,callbacks=[checkpoint])
# model = create_model(input_shape_mfcc, input_shape_melspec)
# model=tf.keras.models.load_model('best_model.keras')
# model.load_weights('best_model.keras')
# results=model.evaluate([X_mfcc_test,X_melspec_test],{
#                                         'gunshot_output':y_gun_test,
#                                         'direction_output':y_direction_test,
#                                         'distance_output':y_distance_test
#                                     })
# test_loss=results[0]
# test_gun_accuracy=results[1]
# test_direction_accuracy=results[2]
# test_distance_accuracy=results[3]

# # print(f'Test Accuracy:{test_accuracy*100:.2f}%')
# # print(f'Test Loss:{test_loss*100:.2f}%')