In [59]:
siren_sounds = ['siren_sound_1.wav','siren_sound_2.wav','siren_sound_3.wav','traffic_sound_1.wav']
traffic_sounds = ['traffic_sound_1.wav', 'traffic_sound_2.wav']

In [60]:
from random import choice
import librosa
import cv2
import time
import numpy as np
import pathlib
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.metrics import accuracy_score, classification_report
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense 
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import wave

In [61]:
data_train = pd.read_csv("Emergency_Vehicles/train.csv")
data_train.head()

Unnamed: 0,image_names,emergency_or_not
0,1503.jpg,0
1,1420.jpg,0
2,1764.jpg,0
3,1356.jpg,0
4,1117.jpg,0


In [62]:
#Preprocessing images  - Image to array
def preprocessing_img(file_path):
    img = load_img(f"Emergency_vehicles/train/{file_path}",target_size=(224,224))
    
    img_array = img_to_array(img)
    img_array /= 255.0
    return img_array

data_train["img_array"] = data_train["image_names"].apply(preprocessing_img)
data_train.head()

Unnamed: 0,image_names,emergency_or_not,img_array
0,1503.jpg,0,"[[[0.36862746, 0.3764706, 0.2901961], [0.38431..."
1,1420.jpg,0,"[[[0.84705883, 0.9137255, 0.9764706], [0.85098..."
2,1764.jpg,0,"[[[0.24313726, 0.2509804, 0.2], [0.22352941, 0..."
3,1356.jpg,0,"[[[0.7647059, 0.84705883, 0.7176471], [0.62352..."
4,1117.jpg,0,"[[[0.0, 0.03137255, 0.0], [0.1254902, 0.160784..."


In [63]:
def preprocessing_sound(file_path,type):
    if (type == 'siren'):
        audio,sr = librosa.load(f'SirenSounds/{file_path}',sr = 44100)
    else:
        audio,sr = librosa.load(f'TrafficSounds/{file_path}',sr = 44100)

    # Generate spectrogram
    spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr)
    spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)
    return spectrogram_db

In [64]:
new_column=[]

for index,row in data_train.iterrows():
    if (row['emergency_or_not'] == 1):
        file_path = choice(siren_sounds)
        if (file_path[0] == 't'):
            new_column.append(preprocessing_sound(file_path=file_path,type='traffic'))
        else:
            new_column.append(preprocessing_sound(file_path=file_path,type='siren'))
    else:
        file_path = choice(traffic_sounds)
        
        new_column.append(preprocessing_sound(file_path=file_path,type = 'traffic'))

data_train['sound'] = new_column

In [65]:
data_train.head()

Unnamed: 0,image_names,emergency_or_not,img_array,sound
0,1503.jpg,0,"[[[0.36862746, 0.3764706, 0.2901961], [0.38431...","[[-28.686245, -27.850946, -19.184986, -15.9539..."
1,1420.jpg,0,"[[[0.84705883, 0.9137255, 0.9764706], [0.85098...","[[-28.686245, -27.850946, -19.184986, -15.9539..."
2,1764.jpg,0,"[[[0.24313726, 0.2509804, 0.2], [0.22352941, 0...","[[-14.50043, -11.244556, -11.512085, -10.55725..."
3,1356.jpg,0,"[[[0.7647059, 0.84705883, 0.7176471], [0.62352...","[[-28.686245, -27.850946, -19.184986, -15.9539..."
4,1117.jpg,0,"[[[0.0, 0.03137255, 0.0], [0.1254902, 0.160784...","[[-28.686245, -27.850946, -19.184986, -15.9539..."


In [67]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))





In [70]:
model.compile(optimizer = "adam",loss="binary_crossentropy",metrics=['accuracy'])
# Ensure proper data types
data_train['img_array'] = data_train['img_array'].astype(float)
data_train['sound'] = data_train['sound'].astype(float)

data_train['emergency_or_not'] = data_train['emergency_or_not'].astype(int)

# model.fit(x=[data_train['img_array'], data_train['sound']],y = data_train['emergency_or_not'],epochs=10, batch_size=32)

ValueError: setting an array element with a sequence.

In [None]:
data_test = pd.read_csv('Emergency_Vehicles/test.csv')
data_test['img_array']  = data_test['image_names'].apply(preprocessing_img)

new_column=[]

for index,row in data_test.iterrows():
    if (row['emergency_or_not'] == 1):
        file_path = choice(siren_sounds)
        if (file_path[0] == 't'):
            new_column.append(preprocessing_sound(file_path=file_path,type='traffic'))
        else:
            new_column.append(preprocessing_sound(file_path=file_path,type='siren'))
    else:
        file_path = choice(traffic_sounds)
        
        new_column.append(preprocessing_sound(file_path=file_path,type = 'traffic'))

data_test['sound'] = new_column

In [None]:
x_test = np.array(data_test['img_array','sound'].tolist())

y_predict = model.predict(x_test)
y_predict_binary = (y_predict > 0.5).astype(int)
print(y_predict_binary)