In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
from tqdm import tqdm
from tensorflow.keras.utils import to_categorical 

In [2]:
DATADIR = "D:/THESIS/Spectrogram/"

In [3]:
categories = os.listdir(DATADIR)

In [4]:
categories

['air_conditioner',
 'car_horn',
 'children_playing',
 'dog_bark',
 'drilling',
 'engine_idling',
 'gun_shot',
 'jackhammer',
 'siren',
 'street_music']

In [5]:
data = []

def create_dataset():
    for category in categories:  # do dogs and cats

        path = os.path.join(DATADIR,category)  # create path to dogs and cats
        class_num = categories.index(category)  # get the classification  (0 or a 1). 0=dog 1=cat

        for img in tqdm(os.listdir(path)):  # iterate over each image per dogs and cats
            try:
                img_array = cv2.imread(os.path.join(path,img)) ##,cv2.IMREAD_GRAYSCALE)  # convert to array
                #rgb = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
                new_array =img_array[34:252, 53:389]
                #print(new_array.shape)
                rsz=cv2.resize(new_array, (54, 84))
                data.append([rsz, class_num])  # add this to our training_data
                #plt.axis('off')
                #plt.imshow(rsz)
                #plt.savefig("ragresize.png")
                #break
            except Exception as e:  # in the interest in keeping the output clean...
                pass
            #except OSError as e:
            #    print("OSErrroBad img most likely", e, os.path.join(path,img))
            #except Exception as e:
            #    print("general exception", e, os.path.join(path,img))
        #break
    return data



In [6]:
data = create_dataset()

print(len(data))

100%|█████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:07<00:00, 142.12it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 429/429 [00:02<00:00, 154.51it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:08<00:00, 117.40it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:07<00:00, 135.04it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:07<00:00, 137.37it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:07<00:00, 127.00it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 374/374 [00:02<00:00, 154.96it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:07<00:00, 138.82it/s]
100%|███████████████████████████████████

8732





In [7]:
X = []
Y = []

for features,label in data:
    X.append(features)
    Y.append(label)

In [8]:
X = np.array(X)
Y = np.array(Y)

In [9]:
import pickle

In [10]:
X.shape

(8732, 84, 54, 3)

In [11]:
from sklearn.model_selection import train_test_split

In [12]:
X_data, X_test, Y_data, Y_test = train_test_split(X, Y,test_size=0.10, random_state = 50,stratify=Y)

In [13]:
unique1, y_data_n = np.unique(Y_data, return_counts=True)
unique2, y_test_n = np.unique(Y_test, return_counts=True)
print(np.asarray((unique1, y_data_n, y_test_n)).T)

[[  0 900 100]
 [  1 386  43]
 [  2 900 100]
 [  3 900 100]
 [  4 900 100]
 [  5 900 100]
 [  6 336  38]
 [  7 900 100]
 [  8 836  93]
 [  9 900 100]]


In [14]:
X_train,X_val, Y_train, Y_val = train_test_split(X_data, Y_data,test_size=0.20, random_state = 50, stratify=  Y_data)

In [15]:
unique1, y_data_n = np.unique(Y_train, return_counts=True)
unique2, y_test_n = np.unique(Y_val, return_counts=True)
#print(np.asarray((unique1, y_data_n, y_test_n)).T)

In [16]:
def normalizaze_data(X):
    X_data = []
    for data in X:
        data = data / 255
        X_data.append(data)
    return np.array(X_data)

In [17]:
%%time
x_train = normalizaze_data(X_train)
x_val = normalizaze_data(X_val)
x_test = normalizaze_data(X_test)

Wall time: 3.27 s


In [18]:
x_train = x_train.reshape(-1, 54, 84, 3)
x_val = x_val.reshape(-1, 54, 84, 3)
x_test = x_test.reshape(-1, 54, 84, 3)

In [19]:
%%time
y_train =to_categorical(Y_train)
y_val = to_categorical(Y_val)
y_test =to_categorical(Y_test)

Wall time: 1.99 ms


In [20]:
print(f"x_train.shape: {x_train.shape}")
print(f"y_train.shape: {y_train.shape}")
print(f"x_val.shape: {x_val.shape}")
print(f"y_val.shape: {y_val.shape}")
print(f"x_test.shape: {x_test.shape}")
print(f"y_test.shape: {y_test.shape}")

x_train.shape: (6286, 54, 84, 3)
y_train.shape: (6286, 10)
x_val.shape: (1572, 54, 84, 3)
y_val.shape: (1572, 10)
x_test.shape: (874, 54, 84, 3)
y_test.shape: (874, 10)


In [21]:
pickle_out = open("x_train_seed_50.pickle","wb")
pickle.dump(x_train,pickle_out)
pickle_out.close()

pickle_out = open("y_train_seed_50.pickle","wb")
pickle.dump(y_train,pickle_out)
pickle_out.close()

pickle_out = open("x_val_seed_50.pickle","wb")
pickle.dump(x_val,pickle_out)
pickle_out.close()

pickle_out = open("y_val_seed_50.pickle","wb")
pickle.dump(y_val,pickle_out)
pickle_out.close()

pickle_out = open("x_test_seed_50.pickle","wb")
pickle.dump(x_test,pickle_out)
pickle_out.close()

pickle_out = open("y_test_seed_50.pickle","wb")
pickle.dump(y_test,pickle_out)
pickle_out.close()