In [3]:
!pip install librosa

Collecting librosa
  Downloading librosa-0.9.2-py3-none-any.whl (214 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m214.3/214.3 kB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
Collecting soundfile>=0.10.2
  Downloading soundfile-0.11.0-py2.py3-none-any.whl (23 kB)
Collecting audioread>=2.1.9
  Downloading audioread-3.0.0.tar.gz (377 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m377.0/377.0 kB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting resampy>=0.2.2
  Downloading resampy-0.4.2-py3-none-any.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting pooch>=1.0
  Downloading pooch-1.6.0-py3-none-any.whl (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
Collecting numba>=0.45.1
  Downloading numba-0.56.2-c

In [1]:
########################################################################
# import default libraries
########################################################################
import os
import sys
import gc
########################################################################


########################################################################
# import additional libraries
########################################################################
import numpy as np
import scipy.stats
# from import
from tqdm import tqdm
try:
    from sklearn.externals import joblib
except:
    import joblib
# original lib
import common as com
import keras_model
import pandas as pd
import keras
import librosa
import tensorflow as tf
import librosa.display
############

In [2]:
############################################################


########################################################################
# load parameter.yaml
########################################################################
param = com.yaml_load()
########################################################################
saved_weight = os.path.join(param["P_MODELSAVE"], 'dataweights.{epoch:02d}-{val_acc:.2f}.hdf5')

modelchk = keras.callbacks.ModelCheckpoint(saved_weight, 
                                      monitor='val_acc', 
                                      verbose=1,
                                      save_best_only=True, 
                                      save_weights_only=False,
                                      mode='auto',
                                      period=2)

tensorboard = keras.callbacks.TensorBoard(log_dir=param["P_LOGS"],
                                          histogram_freq=0,
                                          write_graph=True,
                                          write_images=True)

csv_logger = keras.callbacks.CSVLogger(f'{param["P_LOGS"]}/keras_log.csv',
                                       append=True)

# model_unet = keras_model.get_model(input_shape=(1,param["feature"]["n_mels"],param["feature"]["n_frames"]), lr = param["fit"]["lr"])

########################################################################
# visualizer
########################################################################
class visualizer(object):
    def __init__(self):
        import matplotlib.pyplot as plt
        self.plt = plt
        self.fig = self.plt.figure(figsize=(7, 5))
        self.plt.subplots_adjust(wspace=0.3, hspace=0.3)

    def loss_plot(self, loss, val_loss):
        """
        Plot loss curve.

        loss : list [ float ]
            training loss time series.
        val_loss : list [ float ]
            validation loss time series.

        return   : None
        """
        ax = self.fig.add_subplot(1, 1, 1)
        ax.cla()
        ax.plot(loss)
        ax.plot(val_loss)
        ax.set_title("Model loss")
        ax.set_xlabel("Epoch")
        ax.set_ylabel("Loss")
        ax.legend(["Train", "Validation"], loc="upper right")

    def save_figure(self, name):
        """
        Save figure.

        name : str
            save png file path.

        return : None
        """
        self.plt.savefig(name)


########################################################################


########################################################################
# get data from the list for file paths
########################################################################
def file_list_to_data(file_list,
                      msg="calc...",
                      n_mels=64,
                      n_frames=5,
                      n_hop_frames=1,
                      n_fft=1024,
                      hop_length=512,
                      power=2.0):
    """
    convert the file_list to a vector array.
    file_to_vector_array() is iterated, and the output vector array is concatenated.

    file_list : list [ str ]
        .wav filename list of dataset
    msg : str ( default = "calc..." )
        description for tqdm.
        this parameter will be input into "desc" param at tqdm.

    return : numpy.array( numpy.array( float ) )
        data for training (this function is not used for test.)
        * dataset.shape = (number of feature vectors, dimensions of feature vectors)
    """

    # iterate file_to_vector_array()
    for idx in tqdm(range(len(file_list)), desc=msg):
        vectors = com.file_to_vectors(file_list[idx],
                                                n_mels=n_mels,
                                                n_frames=n_frames,
                                                n_fft=n_fft,
                                                hop_length=hop_length,
                                                power=power)
        # vectors_masked = com.spec_augment(vectors)
        if idx == 0:
            data = np.zeros((len(file_list), vectors.shape[0], vectors.shape[1]), float)
            # data_masked = np.zeros((len(file_list), vectors_masked.shape[1], vectors_masked.shape[0]), float)
        data[idx, :, :] = vectors
        data[idx,:,:] = librosa.power_to_db(data[idx,:,:])
        # data_masked[idx, :, :] = vectors_masked.T
    return data

def masking(data):
      for idx in range(len(data)):
        
        vectors_masked = com.spec_augment(data[idx,:,:])
        # data[idx,:,:] = librosa.power_to_db(data[idx,:,:])
        # vectors_masked[idx,:,:] = librosa.power_to_db(vectors_masked[idx,:,:])
        if idx == 0:
            data_masked = np.zeros((len(data), vectors_masked.shape[0], vectors_masked.shape[1]), float)
        data_masked[idx, :, :] = vectors_masked
      return np.swapaxes(data_masked, 1, 2), np.swapaxes(data, 2, 1)


In [3]:
mode = True

# make output directory
os.makedirs(param["model_directory"], exist_ok=True)

# initialize the visualizer
visualizer = visualizer()

# load base_directory list
dirs = com.select_dirs(param=param, mode=mode)
idx = 0
# target_dir = "/content/drive/MyDrive/dev_data/car8"
target_dir = "dev_data/car3"
# loop of the base directory
print("\n===========================")
print("[{idx}/{total}] {target_dir}".format(target_dir=target_dir, idx=idx+1, total=len(dirs)))

# set path
machine_type = os.path.split(target_dir)[1]
model_file_path = "{model}/model_{machine_type}.hdf5".format(model=param["model_directory"],
                                                                machine_type=machine_type)

history_img = "{model}/history_{machine_type}.png".format(model=param["model_directory"],
                                                            machine_type=machine_type)
# pickle file for storing anomaly score distribution
score_distr_file_path = "{model}/score_distr_{machine_type}.pkl".format(model=param["model_directory"],
                                                                        machine_type=machine_type)




2022-10-11 23:19:46,471 - INFO - load_directory <- development



[1/8] dev_data/car3


<Figure size 504x360 with 0 Axes>

In [4]:
# generate dataset
print("============== DATASET_GENERATOR ==============")

# get file list for all sections
# all values of y_true are zero in training
files, y_true = com.file_list_generator(target_dir=target_dir,
                                        section_name="*",
                                        dir_name="train",
                                        mode=mode)



data = file_list_to_data(files,
                            msg="generate train_dataset",
                            n_mels=param["feature"]["n_mels"],
                            n_frames=1,
                            n_hop_frames=param["feature"]["n_mels"],
                            n_fft=param["feature"]["n_fft"],
                            hop_length=param["feature"]["hop_length"],
                            power=param["feature"]["power"])
data = np.concatenate((data, data), axis=0)
j = param["feature"]["n_mels"]
y = param["feature"]["n_frames"]
n = machine_type
data_masked, data = masking(data)
data = data.reshape(1222, data.shape[1], data.shape[2], 1)
data_masked =  data_masked.reshape(1222, data.shape[1], data.shape[2], 1)



2022-10-11 23:19:46,494 - INFO - target_dir : dev_data/car3_*
2022-10-11 23:19:46,502 - INFO - #files : 611





generate train_dataset: 100%|██████████| 611/611 [00:14<00:00, 43.05it/s]


In [5]:
# j = param["feature"]["n_mels"]
# y = param["feature"]["n_frames"]
# data = np.load(f"data__{j}_{y}.npy")

In [6]:
print(data_masked.shape)

(1222, 512, 128, 1)


In [7]:
print(data.shape)
print(data_masked.shape)

(1222, 512, 128, 1)
(1222, 512, 128, 1)


In [8]:
print(data.shape)

(1222, 512, 128, 1)


In [None]:
# number of vectors for each wave file
# train model
print("============== MODEL TRAINING ==============")
model = keras_model.get_model_3((param["feature"]["n_frames"],param["feature"]["n_mels"], 1))
model_opt = tf.keras.optimizers.Adam(learning_rate=2)

model.compile(optimizer=model_opt, loss='mse', metrics=['accuracy'])

model.summary()

history = model.fit(x=data_masked,
                    y=data,
                    epochs=150,
                    batch_size=32,
                    shuffle=param["fit"]["shuffle"],
                    validation_split=0.1,
                    verbose=param["fit"]["verbose"])


pd.DataFrame.from_dict(model.history.history).to_csv(f'history_{machine_type}.csv',index=False)


# # calculate y_pred for fitting anomaly score distribution
# y_pred = []
# start_idx = 0
# for file_idx in range(len(files)):
#         y_pred.append(np.mean(np.square(data[file_idx,: ,  :] 
#                                 - model.predict(data[file_idx,: , :]))))
#         start_idx += n_vectors_ea_file

# # fit anomaly score distribution
# shape_hat, loc_hat, scale_hat = scipy.stats.gamma.fit(y_pred)
# gamma_params = [shape_hat, loc_hat, scale_hat]
# joblib.dump(gamma_params, score_distr_file_path)

model.save(model_file_path)
com.logger.info("save_model -> {}".format(model_file_path))
visualizer.loss_plot(history.history["loss"], history.history["val_loss"])
visualizer.save_figure(history_img)

print("============== END TRAINING ==============")

del data
del model
gc.collect()


In [5]:
mode = True

# make output directory
os.makedirs(param["model_directory"], exist_ok=True)

# load base_directory list
dirs = com.select_dirs(param=param, mode=mode)
idx = 0
# target_dir = "/content/drive/MyDrive/dev_data/car8"
target_dir = "dev_data/car4"
# loop of the base directory
print("\n===========================")
print("[{idx}/{total}] {target_dir}".format(target_dir=target_dir, idx=idx+1, total=len(dirs)))

# set path
machine_type = os.path.split(target_dir)[1]
model_file_path = "{model}/model_{machine_type}.hdf5".format(model=param["model_directory"],
                                                                machine_type=machine_type)

history_img = "{model}/history_{machine_type}.png".format(model=param["model_directory"],
                                                            machine_type=machine_type)
# pickle file for storing anomaly score distribution
score_distr_file_path = "{model}/score_distr_{machine_type}.pkl".format(model=param["model_directory"],
                                                                        machine_type=machine_type)

# generate dataset
print("============== DATASET_GENERATOR ==============")

# get file list for all sections
# all values of y_true are zero in training
files, y_true = com.file_list_generator(target_dir=target_dir,
                                        section_name="*",
                                        dir_name="train",
                                        mode=mode)



data = file_list_to_data(files,
                            msg="generate train_dataset",
                            n_mels=param["feature"]["n_mels"],
                            n_frames=1,
                            n_hop_frames=param["feature"]["n_mels"],
                            n_fft=param["feature"]["n_fft"],
                            hop_length=param["feature"]["hop_length"],
                            power=param["feature"]["power"])
data = np.concatenate((data, data), axis=0)
j = param["feature"]["n_mels"]
y = param["feature"]["n_frames"]
n = machine_type
data_masked, data = masking(data)
data = data.reshape(1222, data.shape[1], data.shape[2], 1)
data_masked =  data_masked.reshape(1222, data.shape[1], data.shape[2], 1)




2022-10-11 23:43:16,381 - INFO - load_directory <- development
2022-10-11 23:43:16,389 - INFO - target_dir : dev_data/car4_*
2022-10-11 23:43:16,395 - INFO - #files : 611



[1/8] dev_data/car4



generate train_dataset: 100%|██████████| 611/611 [00:16<00:00, 37.20it/s]


In [6]:
# number of vectors for each wave file
# train model
print("============== MODEL TRAINING ==============")
model = keras_model.get_model_3((param["feature"]["n_frames"],param["feature"]["n_mels"], 1))
model_opt = tf.keras.optimizers.Adam(learning_rate=1)

model.compile(optimizer=model_opt, loss='mse', metrics=['accuracy'])

model.summary()

history = model.fit(x=data_masked,
                    y=data,
                    epochs=150,
                    batch_size=32,
                    shuffle=param["fit"]["shuffle"],
                    validation_split=0.1,
                    verbose=param["fit"]["verbose"])


pd.DataFrame.from_dict(model.history.history).to_csv(f'history_{machine_type}.csv',index=False)


# # calculate y_pred for fitting anomaly score distribution
# y_pred = []
# start_idx = 0
# for file_idx in range(len(files)):
#         y_pred.append(np.mean(np.square(data[file_idx,: ,  :] 
#                                 - model.predict(data[file_idx,: , :]))))
#         start_idx += n_vectors_ea_file

# # fit anomaly score distribution
# shape_hat, loc_hat, scale_hat = scipy.stats.gamma.fit(y_pred)
# gamma_params = [shape_hat, loc_hat, scale_hat]
# joblib.dump(gamma_params, score_distr_file_path)

model.save(model_file_path)
com.logger.info("save_model -> {}".format(model_file_path))
visualizer.loss_plot(history.history["loss"], history.history["val_loss"])
visualizer.save_figure(history_img)

print("============== END TRAINING ==============")

del data
del model
gc.collect()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 512, 128, 1  0           []                               
                                )]                                                                
                                                                                                  
 block_1_conv (Conv2D)          (None, 512, 128, 16  160         ['input_2[0][0]']                
                                )                                                                 
                                                                                                  
 block_1_conv_bn (BatchNormaliz  (None, 512, 128, 16  64         ['block_1_conv[0][0]']           
 ation)                         )                                                           

2022-10-12 00:15:03,529 - INFO - save_model -> /notebooks/model/model_car4.hdf5


TypeError: loss_plot() missing 1 required positional argument: 'val_loss'

In [None]:
mode = True

# make output directory
os.makedirs(param["model_directory"], exist_ok=True)


# load base_directory list
dirs = com.select_dirs(param=param, mode=mode)
idx = 0
# target_dir = "/content/drive/MyDrive/dev_data/car8"
target_dir = "dev_data/car5"
# loop of the base directory
print("\n===========================")
print("[{idx}/{total}] {target_dir}".format(target_dir=target_dir, idx=idx+1, total=len(dirs)))

# set path
machine_type = os.path.split(target_dir)[1]
model_file_path = "{model}/model_{machine_type}.hdf5".format(model=param["model_directory"],
                                                                machine_type=machine_type)

history_img = "{model}/history_{machine_type}.png".format(model=param["model_directory"],
                                                            machine_type=machine_type)
# pickle file for storing anomaly score distribution
score_distr_file_path = "{model}/score_distr_{machine_type}.pkl".format(model=param["model_directory"],
                                                                        machine_type=machine_type)

# generate dataset
print("============== DATASET_GENERATOR ==============")

# get file list for all sections
# all values of y_true are zero in training
files, y_true = com.file_list_generator(target_dir=target_dir,
                                        section_name="*",
                                        dir_name="train",
                                        mode=mode)



data = file_list_to_data(files,
                            msg="generate train_dataset",
                            n_mels=param["feature"]["n_mels"],
                            n_frames=1,
                            n_hop_frames=param["feature"]["n_mels"],
                            n_fft=param["feature"]["n_fft"],
                            hop_length=param["feature"]["hop_length"],
                            power=param["feature"]["power"])
data = np.concatenate((data, data), axis=0)
j = param["feature"]["n_mels"]
y = param["feature"]["n_frames"]
n = machine_type
data_masked, data = masking(data)
data = data.reshape(1222, data.shape[1], data.shape[2], 1)
data_masked =  data_masked.reshape(1222, data.shape[1], data.shape[2], 1)




In [None]:
# number of vectors for each wave file
# train model
print("============== MODEL TRAINING ==============")
model = keras_model.get_model_3((param["feature"]["n_frames"],param["feature"]["n_mels"], 1))
model_opt = tf.keras.optimizers.Adam(learning_rate=4.5)

model.compile(optimizer=model_opt, loss='mse', metrics=['accuracy'])

model.summary()

history = model.fit(x=data_masked,
                    y=data,
                    epochs=150,
                    batch_size=32,
                    shuffle=param["fit"]["shuffle"],
                    validation_split=0.1,
                    verbose=param["fit"]["verbose"])


pd.DataFrame.from_dict(model.history.history).to_csv(f'history_{machine_type}.csv',index=False)


# # calculate y_pred for fitting anomaly score distribution
# y_pred = []
# start_idx = 0
# for file_idx in range(len(files)):
#         y_pred.append(np.mean(np.square(data[file_idx,: ,  :] 
#                                 - model.predict(data[file_idx,: , :]))))
#         start_idx += n_vectors_ea_file

# # fit anomaly score distribution
# shape_hat, loc_hat, scale_hat = scipy.stats.gamma.fit(y_pred)
# gamma_params = [shape_hat, loc_hat, scale_hat]
# joblib.dump(gamma_params, score_distr_file_path)

model.save(model_file_path)
com.logger.info("save_model -> {}".format(model_file_path))
visualizer.loss_plot(history.history["loss"], history.history["val_loss"])
visualizer.save_figure(history_img)

print("============== END TRAINING ==============")

del data
del model
gc.collect()

In [None]:
mode = True

# make output directory
os.makedirs(param["model_directory"], exist_ok=True)


# load base_directory list
dirs = com.select_dirs(param=param, mode=mode)
idx = 0
# target_dir = "/content/drive/MyDrive/dev_data/car8"
target_dir = "dev_data/car6"
# loop of the base directory
print("\n===========================")
print("[{idx}/{total}] {target_dir}".format(target_dir=target_dir, idx=idx+1, total=len(dirs)))

# set path
machine_type = os.path.split(target_dir)[1]
model_file_path = "{model}/model_{machine_type}.hdf5".format(model=param["model_directory"],
                                                                machine_type=machine_type)

history_img = "{model}/history_{machine_type}.png".format(model=param["model_directory"],
                                                            machine_type=machine_type)
# pickle file for storing anomaly score distribution
score_distr_file_path = "{model}/score_distr_{machine_type}.pkl".format(model=param["model_directory"],
                                                                        machine_type=machine_type)

# generate dataset
print("============== DATASET_GENERATOR ==============")

# get file list for all sections
# all values of y_true are zero in training
files, y_true = com.file_list_generator(target_dir=target_dir,
                                        section_name="*",
                                        dir_name="train",
                                        mode=mode)



data = file_list_to_data(files,
                            msg="generate train_dataset",
                            n_mels=param["feature"]["n_mels"],
                            n_frames=1,
                            n_hop_frames=param["feature"]["n_mels"],
                            n_fft=param["feature"]["n_fft"],
                            hop_length=param["feature"]["hop_length"],
                            power=param["feature"]["power"])
data = np.concatenate((data, data), axis=0)
j = param["feature"]["n_mels"]
y = param["feature"]["n_frames"]
n = machine_type
data_masked, data = masking(data)
data = data.reshape(1224, data.shape[1], data.shape[2], 1)
data_masked =  data_masked.reshape(1224, data.shape[1], data.shape[2], 1)




In [None]:
# number of vectors for each wave file
# train model
print("============== MODEL TRAINING ==============")
model = keras_model.get_model_3((param["feature"]["n_frames"],param["feature"]["n_mels"], 1))
model_opt = tf.keras.optimizers.Adam(learning_rate=4.5)

model.compile(optimizer=model_opt, loss='mse', metrics=['accuracy'])

model.summary()

history = model.fit(x=data_masked,
                    y=data,
                    epochs=150,
                    batch_size=32,
                    shuffle=param["fit"]["shuffle"],
                    validation_split=0.1,
                    verbose=param["fit"]["verbose"])


pd.DataFrame.from_dict(model.history.history).to_csv(f'history_{machine_type}.csv',index=False)


# # calculate y_pred for fitting anomaly score distribution
# y_pred = []
# start_idx = 0
# for file_idx in range(len(files)):
#         y_pred.append(np.mean(np.square(data[file_idx,: ,  :] 
#                                 - model.predict(data[file_idx,: , :]))))
#         start_idx += n_vectors_ea_file

# # fit anomaly score distribution
# shape_hat, loc_hat, scale_hat = scipy.stats.gamma.fit(y_pred)
# gamma_params = [shape_hat, loc_hat, scale_hat]
# joblib.dump(gamma_params, score_distr_file_path)

model.save(model_file_path)
com.logger.info("save_model -> {}".format(model_file_path))


print("============== END TRAINING ==============")

del data
del model
gc.collect()

In [None]:
mode = True

# make output directory
os.makedirs(param["model_directory"], exist_ok=True)


# load base_directory list
dirs = com.select_dirs(param=param, mode=mode)
idx = 0
# target_dir = "/content/drive/MyDrive/dev_data/car8"
target_dir = "dev_data/car7"
# loop of the base directory
print("\n===========================")
print("[{idx}/{total}] {target_dir}".format(target_dir=target_dir, idx=idx+1, total=len(dirs)))

# set path
machine_type = os.path.split(target_dir)[1]
model_file_path = "{model}/model_{machine_type}.hdf5".format(model=param["model_directory"],
                                                                machine_type=machine_type)

history_img = "{model}/history_{machine_type}.png".format(model=param["model_directory"],
                                                            machine_type=machine_type)
# pickle file for storing anomaly score distribution
score_distr_file_path = "{model}/score_distr_{machine_type}.pkl".format(model=param["model_directory"],
                                                                        machine_type=machine_type)

# generate dataset
print("============== DATASET_GENERATOR ==============")

# get file list for all sections
# all values of y_true are zero in training
files, y_true = com.file_list_generator(target_dir=target_dir,
                                        section_name="*",
                                        dir_name="train",
                                        mode=mode)



data = file_list_to_data(files,
                            msg="generate train_dataset",
                            n_mels=param["feature"]["n_mels"],
                            n_frames=1,
                            n_hop_frames=param["feature"]["n_mels"],
                            n_fft=param["feature"]["n_fft"],
                            hop_length=param["feature"]["hop_length"],
                            power=param["feature"]["power"])
data = np.concatenate((data, data), axis=0)
j = param["feature"]["n_mels"]
y = param["feature"]["n_frames"]
n = machine_type
data_masked, data = masking(data)
data = data.reshape(1222, data.shape[1], data.shape[2], 1)
data_masked =  data_masked.reshape(1222, data.shape[1], data.shape[2], 1)




In [None]:
# number of vectors for each wave file
# train model
print("============== MODEL TRAINING ==============")
model = keras_model.get_model_3((param["feature"]["n_frames"],param["feature"]["n_mels"], 1))
model_opt = tf.keras.optimizers.Adam(learning_rate=4.5)

model.compile(optimizer=model_opt, loss='mse', metrics=['accuracy'])

model.summary()

history = model.fit(x=data_masked,
                    y=data,
                    epochs=150,
                    batch_size=32,
                    shuffle=param["fit"]["shuffle"],
                    validation_split=0.1,
                    verbose=param["fit"]["verbose"])


pd.DataFrame.from_dict(model.history.history).to_csv(f'history_{machine_type}.csv',index=False)


# # calculate y_pred for fitting anomaly score distribution
# y_pred = []
# start_idx = 0
# for file_idx in range(len(files)):
#         y_pred.append(np.mean(np.square(data[file_idx,: ,  :] 
#                                 - model.predict(data[file_idx,: , :]))))
#         start_idx += n_vectors_ea_file

# # fit anomaly score distribution
# shape_hat, loc_hat, scale_hat = scipy.stats.gamma.fit(y_pred)
# gamma_params = [shape_hat, loc_hat, scale_hat]
# joblib.dump(gamma_params, score_distr_file_path)

model.save(model_file_path)
com.logger.info("save_model -> {}".format(model_file_path))
visualizer.loss_plot(history.history["loss"], history.history["val_loss"])
visualizer.save_figure(history_img)

print("============== END TRAINING ==============")

del data
del model
gc.collect()

In [None]:
mode = True

# make output directory
os.makedirs(param["model_directory"], exist_ok=True)


# load base_directory list
dirs = com.select_dirs(param=param, mode=mode)
idx = 0
# target_dir = "/content/drive/MyDrive/dev_data/car8"
target_dir = "dev_data/car8"
# loop of the base directory
print("\n===========================")
print("[{idx}/{total}] {target_dir}".format(target_dir=target_dir, idx=idx+1, total=len(dirs)))

# set path
machine_type = os.path.split(target_dir)[1]
model_file_path = "{model}/model_{machine_type}.hdf5".format(model=param["model_directory"],
                                                                machine_type=machine_type)

history_img = "{model}/history_{machine_type}.png".format(model=param["model_directory"],
                                                            machine_type=machine_type)
# pickle file for storing anomaly score distribution
score_distr_file_path = "{model}/score_distr_{machine_type}.pkl".format(model=param["model_directory"],
                                                                        machine_type=machine_type)

# generate dataset
print("============== DATASET_GENERATOR ==============")

# get file list for all sections
# all values of y_true are zero in training
files, y_true = com.file_list_generator(target_dir=target_dir,
                                        section_name="*",
                                        dir_name="train",
                                        mode=mode)



data = file_list_to_data(files,
                            msg="generate train_dataset",
                            n_mels=param["feature"]["n_mels"],
                   
                         n_frames=1,
                            n_hop_frames=param["feature"]["n_mels"],
                            n_fft=param["feature"]["n_fft"],
                            hop_length=param["feature"]["hop_length"],
                            power=param["feature"]["power"])
data = np.concatenate((data, data), axis=0)
j = param["feature"]["n_mels"]
y = param["feature"]["n_frames"]
n = machine_type
data_masked, data = masking(data)
data = data.reshape(1216, data.shape[1], data.shape[2], 1)
data_masked =  data_masked.reshape(1216, data.shape[1], data.shape[2], 1)




In [None]:
# number of vectors for each wave file
# train model
print("============== MODEL TRAINING ==============")
model = keras_model.get_model_3((param["feature"]["n_frames"],param["feature"]["n_mels"], 1))
model_opt = tf.keras.optimizers.Adam(learning_rate=4.5)

model.compile(optimizer=model_opt, loss='mse', metrics=['accuracy'])

model.summary()

history = model.fit(x=data_masked,
                    y=data,
                    epochs=150,
                    batch_size=32,
                    shuffle=param["fit"]["shuffle"],
                    validation_split=0.1,
                    verbose=param["fit"]["verbose"])


pd.DataFrame.from_dict(model.history.history).to_csv(f'history_{machine_type}.csv',index=False)


# # calculate y_pred for fitting anomaly score distribution
# y_pred = []
# start_idx = 0
# for file_idx in range(len(files)):
#         y_pred.append(np.mean(np.square(data[file_idx,: ,  :] 
#                                 - model.predict(data[file_idx,: , :]))))
#         start_idx += n_vectors_ea_file

# # fit anomaly score distribution
# shape_hat, loc_hat, scale_hat = scipy.stats.gamma.fit(y_pred)
# gamma_params = [shape_hat, loc_hat, scale_hat]
# joblib.dump(gamma_params, score_distr_file_path)

model.save(model_file_path)
com.logger.info("save_model -> {}".format(model_file_path))
# visualizer.loss_plot(history.history["loss"], history.history["val_loss"])
# visualizer.save_figure(history_img)

print("============== END TRAINING ==============")

del data
del model
gc.collect()

In [None]:
mode = True

# make output directory
os.makedirs(param["model_directory"], exist_ok=True)


# load base_directory list
dirs = com.select_dirs(param=param, mode=mode)
idx = 0
# target_dir = "/content/drive/MyDrive/dev_data/car8"
target_dir = "dev_data/car1"
# loop of the base directory
print("\n===========================")
print("[{idx}/{total}] {target_dir}".format(target_dir=target_dir, idx=idx+1, total=len(dirs)))

# set path
machine_type = os.path.split(target_dir)[1]
model_file_path = "{model}/model_{machine_type}.hdf5".format(model=param["model_directory"],
                                                                machine_type=machine_type)

history_img = "{model}/history_{machine_type}.png".format(model=param["model_directory"],
                                                            machine_type=machine_type)
# pickle file for storing anomaly score distribution
score_distr_file_path = "{model}/score_distr_{machine_type}.pkl".format(model=param["model_directory"],
                                                                        machine_type=machine_type)

# generate dataset
print("============== DATASET_GENERATOR ==============")

# get file list for all sections
# all values of y_true are zero in training
files, y_true = com.file_list_generator(target_dir=target_dir,
                                        section_name="*",
                                        dir_name="train",
                                        mode=mode)



data = file_list_to_data(files,
                            msg="generate train_dataset",
                            n_mels=param["feature"]["n_mels"],
                            n_frames=1,
                            n_hop_frames=param["feature"]["n_mels"],
                            n_fft=param["feature"]["n_fft"],
                            hop_length=param["feature"]["hop_length"],
                            power=param["feature"]["power"])
data = np.concatenate((data, data), axis=0)
j = param["feature"]["n_mels"]
y = param["feature"]["n_frames"]
n = machine_type
data_masked, data = masking(data)
data = data.reshape(1210, data.shape[1], data.shape[2], 1)
data_masked =  data_masked.reshape(1210, data.shape[1], data.shape[2], 1)




In [None]:
# number of vectors for each wave file
# train model
print("============== MODEL TRAINING ==============")
model = keras_model.get_model_3((param["feature"]["n_frames"],param["feature"]["n_mels"], 1))
model_opt = tf.keras.optimizers.Adam(learning_rate=4.5)

model.compile(optimizer=model_opt, loss='mse', metrics=['accuracy'])

model.summary()

history = model.fit(x=data_masked,
                    y=data,
                    epochs=150,
                    batch_size=32,
                    shuffle=param["fit"]["shuffle"],
                    validation_split=0.1,
                    verbose=param["fit"]["verbose"])


pd.DataFrame.from_dict(model.history.history).to_csv(f'history_{machine_type}.csv',index=False)


# # calculate y_pred for fitting anomaly score distribution
# y_pred = []
# start_idx = 0
# for file_idx in range(len(files)):
#         y_pred.append(np.mean(np.square(data[file_idx,: ,  :] 
#                                 - model.predict(data[file_idx,: , :]))))
#         start_idx += n_vectors_ea_file

# # fit anomaly score distribution
# shape_hat, loc_hat, scale_hat = scipy.stats.gamma.fit(y_pred)
# gamma_params = [shape_hat, loc_hat, scale_hat]
# joblib.dump(gamma_params, score_distr_file_path)

model.save(model_file_path)
com.logger.info("save_model -> {}".format(model_file_path))
# visualizer.loss_plot(history.history["loss"], history.history["val_loss"])
# visualizer.save_figure(history_img)

print("============== END TRAINING ==============")

del data
del model
gc.collect()

In [None]:
mode = True

# make output directory
os.makedirs(param["model_directory"], exist_ok=True)


# load base_directory list
dirs = com.select_dirs(param=param, mode=mode)
idx = 0
# target_dir = "/content/drive/MyDrive/dev_data/car8"
target_dir = "dev_data/car2"
# loop of the base directory
print("\n===========================")
print("[{idx}/{total}] {target_dir}".format(target_dir=target_dir, idx=idx+1, total=len(dirs)))

# set path
machine_type = os.path.split(target_dir)[1]
model_file_path = "{model}/model_{machine_type}.hdf5".format(model=param["model_directory"],
                                                                machine_type=machine_type)

history_img = "{model}/history_{machine_type}.png".format(model=param["model_directory"],
                                                            machine_type=machine_type)
# pickle file for storing anomaly score distribution
score_distr_file_path = "{model}/score_distr_{machine_type}.pkl".format(model=param["model_directory"],
                                                                        machine_type=machine_type)

# generate dataset
print("============== DATASET_GENERATOR ==============")

# get file list for all sections
# all values of y_true are zero in training
files, y_true = com.file_list_generator(target_dir=target_dir,
                                        section_name="*",
                                        dir_name="train",
                                        mode=mode)



data = file_list_to_data(files,
                            msg="generate train_dataset",
                            n_mels=param["feature"]["n_mels"],
                            n_frames=1,
                            n_hop_frames=param["feature"]["n_mels"],
                            n_fft=param["feature"]["n_fft"],
                            hop_length=param["feature"]["hop_length"],
                            power=param["feature"]["power"])
data = np.concatenate((data, data), axis=0)
j = param["feature"]["n_mels"]
y = param["feature"]["n_frames"]
n = machine_type
data_masked, data = masking(data)
data = data.reshape(1222, data.shape[1], data.shape[2], 1)
data_masked =  data_masked.reshape(1222, data.shape[1], data.shape[2], 1)




In [None]:
# number of vectors for each wave file
# train model
print("============== MODEL TRAINING ==============")
model = keras_model.get_model_3((param["feature"]["n_frames"],param["feature"]["n_mels"], 1))
model_opt = tf.keras.optimizers.Adam(learning_rate=4)

model.compile(optimizer=model_opt, loss='mse', metrics=['accuracy'])

model.summary()

history = model.fit(x=data_masked,
                    y=data,
                    epochs=150,
                    batch_size=32,
                    shuffle=param["fit"]["shuffle"],
                    validation_split=0.1,
                    verbose=param["fit"]["verbose"])


pd.DataFrame.from_dict(model.history.history).to_csv(f'history_{machine_type}.csv',index=False)


# # calculate y_pred for fitting anomaly score distribution
# y_pred = []
# start_idx = 0
# for file_idx in range(len(files)):
#         y_pred.append(np.mean(np.square(data[file_idx,: ,  :] 
#                                 - model.predict(data[file_idx,: , :]))))
#         start_idx += n_vectors_ea_file

# # fit anomaly score distribution
# shape_hat, loc_hat, scale_hat = scipy.stats.gamma.fit(y_pred)
# gamma_params = [shape_hat, loc_hat, scale_hat]
# joblib.dump(gamma_params, score_distr_file_path)

model.save(model_file_path)
com.logger.info("save_model -> {}".format(model_file_path))
# visualizer.loss_plot(history.history["loss"], history.history["val_loss"])
# visualizer.save_figure(history_img)

print("============== END TRAINING ==============")

del data
del model
gc.collect()