In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
from tqdm import tqdm

# managing files
from glob import glob
from shutil import copyfile

# tensorflow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, GlobalAveragePooling2D, Conv2D, MaxPool2D, ZeroPadding2D, BatchNormalization, Input, DepthwiseConv2D, Add, LeakyReLU, ReLU
from tensorflow.keras.optimizers import Adam, SGD

# Audio
import librosa
import librosa.display as dsp
import IPython.display as ipd

# Augmentation
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift

In [2]:
train_csv = pd.read_csv('data/train_data.csv')
train_csv

Unnamed: 0,id,age,gender,respiratory_condition,fever_or_muscle_pain,covid19
0,1,24,female,0,1,0
1,2,51,male,0,0,0
2,3,22,male,0,0,0
3,4,29,female,1,0,0
4,5,23,male,0,0,0
...,...,...,...,...,...,...
3800,3801,53,male,0,0,0
3801,3802,25,male,0,0,0
3802,3803,26,female,0,0,0
3803,3804,27,female,0,0,0


In [3]:
# Define Augmentation
augment = Compose([
    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
    TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
    PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
    Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5),
])

In [4]:
train_path = glob('data\\train\\*.*')
len(train_path)

3805

In [5]:
# Audio augmentation
def path_to_mel(path, augmentation):
    audio, sr = librosa.load(path, sr=16000)

    if augmentation == True:
        audio = augment(samples=audio, sample_rate=16000)
        audio = np.hstack([audio, np.zeros(340000 - audio.shape[0])])

    elif augmentation == False:
        audio = np.hstack([audio, np.zeros(340000 - audio.shape[0])])
        
    mel = librosa.feature.melspectrogram(audio, sr=sr, n_mels=40)

    return np.array(mel, np.float64)

In [6]:
train_csv.loc[int(train_path[1].split('\\')[-1].split('.')[0])][5]

0

In [7]:
data = []
label = []

for i in tqdm(train_path):

    if train_csv.loc[int(i.split('\\')[-1].split('.')[0])][5] == 1:
        data.append(path_to_mel(i, False))
        label.append(1)

        for i in range(5):
            data.append(path_to_mel(i, True))
            label.append(1)

    else:
        data.append(path_to_mel(i, False))
        label.append(0)

  mel = librosa.feature.melspectrogram(audio, sr=sr, n_mels=40)
  0.00000000e+00  0.00000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel = librosa.feature.melspectrogram(audio, sr=sr, n_mels=40)
  0%|          | 5/3805 [00:01<15:51,  4.00it/s]  

In [21]:
BATCH_SIZE = 16
SHUFFLE_BUFFER_SIZE = 100
AUTOTUNE = tf.data.AUTOTUNE

train_ds = tf.data.Dataset.from_tensor_slices((data, label))
train_ds = train_ds.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)

In [25]:
def residual_block(x, filters_in, filters_out):
    shortcut = x
    x = BatchNormalization()(x)
    x = ReLU()(x)
    x = Conv2D(filters_in, kernel_size=(1, 1), strides=(1, 1), padding="same",kernel_initializer='he_normal')(x)

    x = BatchNormalization()(x)
    x = ReLU()(x)    
    x = Conv2D(filters_in, kernel_size=(3, 3), strides=(1, 1), padding="same",kernel_initializer='he_normal')(x)

    x = BatchNormalization()(x)
    x = ReLU()(x)  
    x = Conv2D(filters_out, kernel_size=(1, 1), strides=(1, 1), padding="same",kernel_initializer='he_normal')(x)

    shortcut_channel = x.shape.as_list()[0]
    
    if shortcut_channel != filters_out:
        shortcut = Conv2D(filters_out, kernel_size=(1, 1), strides=(1, 1), padding="same",kernel_initializer='he_normal')(shortcut)
        
    x = Add()([x, shortcut])
    return ReLU()(x)

def build_model():

  inputs = tf.keras.layers.Input(shape=(40, 665, 1))

  outputs = Conv2D(16,(3,3),activation=None,padding='same',kernel_initializer='he_normal')(inputs)
  outputs = BatchNormalization()(outputs)
  outputs = ReLU()(outputs)
  outputs = MaxPool2D((2,2))(outputs)

  outputs = residual_block(outputs, 16, 32)
  outputs = MaxPool2D((2,2))(outputs)
  outputs = residual_block(outputs, 32, 32)
  # outputs = residual_block(outputs, 32, 32)
  outputs = residual_block(outputs, 32, 64)
  outputs = MaxPool2D((2,2))(outputs)
  outputs = residual_block(outputs, 64, 64)
  # outputs = residual_block(outputs, 64, 64)
  # outputs = MaxPool2D((2,2))(outputs)

  outputs = GlobalAveragePooling2D()(outputs)
  # outputs = Flatten()(outputs)

  outputs = Dense(32,activation=None,kernel_initializer='he_normal')(outputs)
  outputs = BatchNormalization()(outputs)
  outputs = ReLU()(outputs)
  outputs = Dropout(0.5)(outputs)

  outputs = Dense(2,activation='softmax')(outputs)
  model = Model(inputs=inputs, outputs=outputs)
  model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
  
  return model

model = build_model()
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 40, 665, 1)  0           []                               
                                ]                                                                 
                                                                                                  
 conv2d_17 (Conv2D)             (None, 40, 665, 16)  160         ['input_2[0][0]']                
                                                                                                  
 batch_normalization_14 (BatchN  (None, 40, 665, 16)  64         ['conv2d_17[0][0]']              
 ormalization)                                                                                    
                                                                                            

In [26]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [27]:
history = model.fit(train_ds, epochs=2)

Epoch 1/2


  return dispatch_target(*args, **kwargs)


Epoch 2/2


In [29]:
inf_path = glob('data\\test\\*.*')

inf_data = []
for i in tqdm(inf_path):
    inf_data.append(path_to_mel(i, False))

  mel = librosa.feature.melspectrogram(audio, sr=sr, n_mels=40)
  0.00000000e+00  0.00000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel = librosa.feature.melspectrogram(audio, sr=sr, n_mels=40)
  0.00000000e+00  0.00000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel = librosa.feature.melspectrogram(audio, sr=sr, n_mels=40)
  0.00000000e+00  0.00000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel = librosa.feature.melspectrogram(audio, sr=sr, n_mels=40)
  0.00000000e+00  0.00000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel = librosa.feature.melspectrogram(audio, sr=sr, n_mels=40)
  0.00000000e+00  0.00000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel = librosa.feature.m

In [31]:
inf_data_reshaped = []

for i in tqdm(inf_data):
    inf_data_reshaped.append(i.reshape(-1, 40, 665, 1))

100%|██████████| 5732/5732 [00:00<00:00, 1146264.45it/s]


In [None]:
# Model 추론
inf_result = []

for i in tqdm(inf_data_reshaped):
    preds = model.predict(i)
    score = tf.nn.softmax(preds[0])
    class_names = [0,1]

    inf_result.append(class_names[np.argmax(score)])

In [35]:
np.max(inf_result)

0

In [38]:
submission = pd.read_csv('data/sample_submission.csv')
submission = submission.drop('covid19', 1)
submission['covid19'] = inf_result
submission.to_csv('resnet_block_limited_data_mel.csv', index=False)

  submission = submission.drop('covid19', 1)
