In [123]:
import librosa
import os
import pandas as pd
import numpy as np

audio_dir = 'data/gun_sound_v2'

def extract_mfcc(audio_path, n_mfcc=13):
    y, sr = librosa.load(audio_path)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return np.mean(mfcc, axis=1) 

audio_features = []

for audio_file in os.listdir(audio_dir):
    if audio_file.endswith('.mp3'):
        audio_path = os.path.join(audio_dir, audio_file)
        mfcc_features = extract_mfcc(audio_path)
        audio_features.append([audio_file] + mfcc_features.tolist())

mfcc_df = pd.DataFrame(audio_features, columns=["name"] + [f"mfcc_{i}" for i in range(13)])
mfcc_df.to_csv('mfcc_features.csv', index=False)

In [125]:
#audio_features

In [126]:
train1 = pd.read_csv('data/v3_exp1_train.csv')
train2 = pd.read_csv('data/v3_exp2_train.csv')
train3 = pd.read_csv('data/v3_exp3_train.csv')

combined_train = pd.concat([train1, train2, train3], ignore_index=True)

test1 = pd.read_csv('data/v3_exp1_test.csv')
test2 = pd.read_csv('data/v3_exp2_test.csv')
test3 = pd.read_csv('data/v3_exp3_test.csv')

combined_test = pd.concat([test1, test2, test3], ignore_index=True)

print(combined_train.shape)
print(combined_test.shape)

(3537, 4)
(1015, 4)


In [127]:
mfcc_df = pd.read_csv('mfcc_features.csv')


In [128]:
combined_train.dropna(subset=['dist'], inplace=True)
combined_test.dropna(subset=['dist'], inplace=True)

In [129]:
print(combined_train['dist'].unique())

['0m' '100m' '200m' '400m' '50m' '600m']


In [130]:
def preprocess_distance(distance_str):
    return float(distance_str[:-1]) 


In [131]:
combined_train['dist'] = combined_train['dist'].apply(preprocess_distance)
combined_test['dist'] = combined_test['dist'].apply(preprocess_distance)

In [132]:
train_with_mfcc = pd.merge(combined_train, mfcc_df, on='name')
test_with_mfcc = pd.merge(combined_test, mfcc_df, on='name')

print(train_with_mfcc.shape)
print(test_with_mfcc.shape)

(3418, 17)
(945, 17)


In [133]:
train_with_mfcc.head()

Unnamed: 0,name,cate,dist,dire,mfcc_0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,mfcc_10,mfcc_11,mfcc_12
0,ak_0m_center_0001.mp3,ak,0.0,center,-351.239105,53.565125,11.088269,8.196982,1.936979,7.175904,3.30731,0.957629,2.787974,5.012168,0.532845,4.628334,-0.23891
1,ak_0m_center_0002.mp3,ak,0.0,center,-321.528625,49.5075,7.170056,13.769941,1.18518,10.565263,2.595757,1.628103,1.459195,6.466399,-0.606562,3.867482,-1.246346
2,ak_0m_center_0003.mp3,ak,0.0,center,-416.266937,62.881073,13.108184,3.899353,-0.033276,6.804298,2.947388,-1.413968,0.030184,2.951718,-0.949337,2.316116,-0.80664
3,ak_0m_center_0004.mp3,ak,0.0,center,-278.62265,56.087826,11.520677,21.156794,5.208237,15.305903,8.001134,7.542254,8.594604,10.176992,4.099832,8.510305,2.827379
4,ak_0m_center_0006.mp3,ak,0.0,center,-369.432556,60.8074,17.566315,21.224545,5.947494,14.679273,7.059857,5.183119,5.629483,11.344815,5.441309,7.889458,3.987313


In [134]:
X_train = train_with_mfcc.drop(columns=['name', 'cate', 'dist', 'dire']).values
y_cate_train = train_with_mfcc['cate'].values
y_dist_train = train_with_mfcc['dist'].values
y_direction_train = train_with_mfcc['dire'].values   


In [135]:
X_test = test_with_mfcc.drop(columns=['name', 'cate', 'dist', 'dire']).values
y_cate_test = test_with_mfcc['cate'].values
y_dist_test = test_with_mfcc['dist'].values
y_direction_test = test_with_mfcc['dire'].values

In [136]:
print(X.shape,y_direction.shape,y_cate.shape,y_dist.shape)

(3418, 13) (3418,) (3418,) (3418,)


In [137]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np

le_cate = LabelEncoder()
y_cate_train_encoded = le_cate.fit_transform(y_cate_train)

le_direction = LabelEncoder()
y_direction_train_encoded = le_direction.fit_transform(y_direction_train)

y_dist_train_encoded = LabelEncoder().fit_transform(y_dist_train)

y_cate_test_encoded = le_cate.transform(y_cate_test)
y_direction_test_encoded = le_direction.transform(y_direction_test)
y_dist_test_encoded = LabelEncoder().fit_transform(y_dist_test)

In [138]:
import tensorflow as tf
from tensorflow.keras import layers, models

input_shape = (X.shape[1],) 

inputs = layers.Input(shape=input_shape)

x = layers.Dense(128, activation='relu')(inputs)
x = layers.Dropout(0.3)(x)
x = layers.Dense(64, activation='relu')(x)
x = layers.Dropout(0.3)(x)

category_output = layers.Dense(len(np.unique(y_cate_train_encoded)), activation='softmax', name='category_output')(x)
direction_output = layers.Dense(len(np.unique(y_direction_train_encoded)), activation='softmax', name='direction_output')(x)
distance_output = layers.Dense(len(np.unique(y_dist_train_encoded)), activation='softmax', name='distance_output')(x)

model = models.Model(inputs=inputs, outputs=[category_output, direction_output, distance_output])


In [139]:
model.compile(optimizer='adam',
              loss={'category_output': 'sparse_categorical_crossentropy', 
                    'direction_output': 'sparse_categorical_crossentropy', 
                    'distance_output': 'sparse_categorical_crossentropy'},
              metrics={'category_output': 'accuracy', 
                       'direction_output': 'accuracy', 
                       'distance_output': 'accuracy'})

In [140]:
model.summary()

In [141]:
history = model.fit(X_train,
                    {'category_output': y_cate_train_encoded, 
                     'direction_output': y_direction_train_encoded, 
                     'distance_output': y_dist_train_encoded},
                    epochs=100,
                    batch_size=32,  
                    validation_split=0.2, 
                    verbose=1)


Epoch 1/100


[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 15ms/step - category_output_accuracy: 0.0759 - direction_output_accuracy: 0.2670 - distance_output_accuracy: 0.1910 - loss: 85.2096 - val_category_output_accuracy: 0.0599 - val_direction_output_accuracy: 0.5395 - val_distance_output_accuracy: 0.5395 - val_loss: 6.9224
Epoch 2/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - category_output_accuracy: 0.2228 - direction_output_accuracy: 0.3757 - distance_output_accuracy: 0.3748 - loss: 7.6309 - val_category_output_accuracy: 0.0599 - val_direction_output_accuracy: 0.5395 - val_distance_output_accuracy: 0.5395 - val_loss: 6.7998
Epoch 3/100
[1m86/86[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - category_output_accuracy: 0.2486 - direction_output_accuracy: 0.4050 - distance_output_accuracy: 0.4082 - loss: 6.9412 - val_category_output_accuracy: 0.0599 - val_direction_output_accuracy: 0.5395 - val_distance_output_accuracy: 0.5395 

In [142]:
predictions = model.predict(X_test)

pred_category = predictions[0]
pred_direction = predictions[1]
pred_distance = predictions[2]

category_accuracy = np.mean(np.argmax(pred_category, axis=1) == y_cate_test_encoded)
direction_accuracy = np.mean(np.argmax(pred_direction, axis=1) == y_direction_test_encoded)
distance_accuracy = np.mean(np.argmax(pred_distance, axis=1) == y_dist_test_encoded)

print(f"Category Accuracy: {category_accuracy:.4f}")
print(f"Direction Accuracy: {direction_accuracy:.4f}")
print(f"Distance Accuracy: {distance_accuracy:.4f}")

[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Category Accuracy: 0.7164
Direction Accuracy: 0.8529
Distance Accuracy: 0.8931
