In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import os
import json
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [3]:
import torch
print(torch.__version__)

1.13.1+cu116


In [None]:
import os
import torch

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  # Arrange GPU devices starting from 0
os.environ["CUDA_VISIBLE_DEVICES"]= "4"  # Set the GPU 5 to use

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device:', device)
print('Current cuda device:', torch.cuda.current_device())
print('Count of using GPUs:', torch.cuda.device_count())

torch.manual_seed(777)
if device =='cuda':
    torch.cuda.manual_seed_all(777)

In [4]:
print('cuda index:', torch.cuda.current_device())

print('gpu 개수:', torch.cuda.device_count())

print('graphic name:', torch.cuda.get_device_name())

cuda = torch.device('cuda')

print(cuda)

cuda index: 0
gpu 개수: 1
graphic name: NVIDIA GeForce RTX 3080
cuda


In [5]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') #GPU 할당

In [6]:
print(device)

cuda


In [7]:
import random

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(929)

In [8]:
label_dict = {
    "강제추행(성범죄)": 1,
    "강도범죄": 2,
    "절도범죄": 3,
    "폭력범죄": 4,
    "도움요청": 5,
}

In [9]:
def train_dataset():
    folder = "D:\\위험상황데이터셋\\dataset\\train"
    dataset = []
    class_label = 0
    for file in tqdm(os.listdir(folder),colour='green'):
        if 'wav' in file:
            abs_file_path = os.path.join(folder,file)
            data, sr = librosa.load(abs_file_path, sr = 16000,mono=True)
            
            json_path = abs_file_path.replace("_label.wav",".json")
            with open(json_path,encoding='utf-8') as json_file:
                json_data = json.load(json_file)
                categories = json_data["annotations"][0]["categories"]
                category = categories["category_02"]
                class_label = int(label_dict[category])
            dataset.append([data,class_label])
    
    print("Dataset 생성 완료")
    return pd.DataFrame(dataset,columns=['data','label'])

In [10]:
def test_dataset():
    folder = "D:\\위험상황데이터셋\\dataset\\test"
    dataset = []
    for file in tqdm(os.listdir(folder),colour='green'):
        if 'wav' in file:
            abs_file_path = os.path.join(folder,file)
            data, sr = librosa.load(abs_file_path, sr = 16000,mono=True)
            
            dataset.append([data, file])
    
    print("Dataset 생성 완료")
    return pd.DataFrame(dataset,columns=['data', 'file_name'])

In [11]:
train_wav = train_dataset()
test_wav = test_dataset()

100%|[32m█████████████████████████████████████████████████████████[0m| 22000/22000 [00:52<00:00, 417.95it/s][0m


Dataset 생성 완료


100%|[32m███████████████████████████████████████████████████████████[0m| 5000/5000 [00:12<00:00, 386.15it/s][0m

Dataset 생성 완료





In [12]:
train_wav.head()

Unnamed: 0,data,label
0,"[-9.62628e-05, -0.00020544285, -0.00011758789,...",1
1,"[1.169485e-05, 2.5358913e-05, 3.8124912e-05, 4...",1
2,"[0.00013293176, 0.0001595899, 1.5221034e-05, -...",1
3,"[-4.965741e-05, -4.61519e-05, 2.123904e-05, 5....",1
4,"[-0.00029593214, -0.0006093591, -0.0005188418,...",1


In [13]:
counts = train_wav['label'].value_counts()

# 결과 출력
print(counts)

2    2207
5    2200
3    2200
4    2200
1    2193
Name: label, dtype: int64


In [14]:
train_x = np.array(train_wav.data)
test_x = np.array(test_wav.data)
data_type = type(train_x)
print(data_type)

<class 'numpy.ndarray'>


In [15]:
train_length=[]
test_length=[]

for i in train_x:
    train_length.append(len(i))
    
for i in test_x:
    test_length.append(len(i))
    

bins =15
hist, bin_edges = np.histogram(train_length, bins=bins)
print('train_x length Histogram')
print(bin_edges)
print(hist)

hist, bin_edges = np.histogram(test_length, bins=bins)
print('test_x length Histogram')
print(bin_edges)
print(hist)


train_x length Histogram
[  10240.    92864.2  175488.4  258112.6  340736.8  423361.   505985.2
  588609.4  671233.6  753857.8  836482.   919106.2 1001730.4 1084354.6
 1166978.8 1249603. ]
[9417 1079  385  102    0    0    1    0    7    3    0    0    0    0
    6]
test_x length Histogram
[   7680.           90453.53333333  173227.06666667  256000.6
  338774.13333333  421547.66666667  504321.2         587094.73333333
  669868.26666667  752641.8         835415.33333333  918188.86666667
 1000962.4        1083735.93333333 1166509.46666667 1249283.        ]
[2022  267  150   51    0    0    1    0    4    2    0    0    0    0
    3]


In [16]:
def remove_top_n_percent(data, n=15):
    # 각 데이터의 길이 계산
    lengths = [len(d) for d in data]
    
    # 길이 기준으로 내림차순 정렬
    sorted_lengths = sorted(lengths, reverse=True)
    
    # 상위 n% 길이 계산
    top_n_percent_length = sorted_lengths[int(len(sorted_lengths) * n / 100)]
    
    # 길이가 상위 n%에 해당하는 데이터 제거
    filtered_data = [d for d in data if len(d) <= top_n_percent_length]
    
    return np.array(filtered_data)

In [17]:
data_type = type(train_x)
print(data_type)
train_x = remove_top_n_percent(train_x)
data_type = type(train_x)
print(data_type)
train_lengths = [len(d) for d in train_x]
test_x = remove_top_n_percent(test_x)
test_lengths = [len(d) for d in test_x]

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


  return np.array(filtered_data)


In [18]:
print('Filtered Train:')
print(max(train_lengths))
print(len(train_lengths))
print(len(train_lengths)/11000*100,'%')
print('\nFiltered Test:')
print(max(test_lengths))
print(len(test_lengths))
print(len(test_lengths)/2500*100,'%')

Filtered Train:
89424
9352
85.01818181818182 %

Filtered Test:
111136
2135
85.39999999999999 %


In [19]:
def get_avr_length(data):
    total_length = 0
    data_count=0
    for i in data:
        total_length+=len(i)
        data_count+=1
    return int(total_length/data_count)
    

def set_length(data, avr_length):
    result = []
    for i in tqdm(data,colour='green'):
        if len(i) <=avr_length:
            padded_data = np.pad(i, (0, avr_length-len(i)), 'wrap')
            tmp = padded_data.tolist()
            result.append(tmp)   
        else:
            result.append(i[:avr_length])
            
    result = np.array(result)
    print('데이터 세팅 완료~!')
    return result

In [20]:
data_type = type(train_x)
print(data_type)

<class 'numpy.ndarray'>


In [21]:
avr_length=get_avr_length(train_x)
train_x = set_length(train_x,avr_length)
test_x = set_length(test_x,avr_length)

100%|[32m██████████████████████████████████████████████████████████[0m| 9352/9352 [00:06<00:00, 1544.32it/s][0m


데이터 세팅 완료~!


100%|[32m██████████████████████████████████████████████████████████[0m| 2135/2135 [00:01<00:00, 1094.81it/s][0m


데이터 세팅 완료~!


In [22]:
print('train :', train_x.shape)
print('test :', test_x.shape)

train : (9352, 50667)
test : (2135, 50667)


In [23]:
extracted_features = librosa.feature.mfcc(y=train_x[0], sr=16000, n_mfcc=40)
print(extracted_features.shape)
extracted_features = librosa.feature.mfcc(y=test_x[0], sr=16000, n_mfcc=40)
print(extracted_features.shape)

(40, 99)
(40, 99)


In [24]:
def preprocess_dataset(data):
    mfccs = []
    for i in tqdm(data,colour='green'):
        extracted_features = librosa.feature.mfcc(y=i,
                                              sr=16000,
                                              n_mfcc=40)
        mfccs.append(extracted_features)
            
    return mfccs

In [25]:
train_mfccs = preprocess_dataset(train_x)
train_mfccs = np.array(train_mfccs)
train_mfccs = train_mfccs.reshape(-1, train_mfccs.shape[1], train_mfccs.shape[2], 1)
#test_x = test_x.reshape(-1, test_x.shape[1], test_x.shape[2], 1)

100%|[32m████████████████████████████████████████████████████████████[0m| 9352/9352 [01:54<00:00, 81.51it/s][0m


In [26]:
np.array(train_mfccs).shape

(9352, 40, 99, 1)

Custom DataSet

In [27]:
import torchvision.datasets as datasets # 데이터셋 집합체
import torchvision.transforms as transforms # 변환 툴

from torch.utils.data import DataLoader # 학습 및 배치로 모델에 넣어주기 위한 툴
from torch.utils.data import DataLoader, Dataset

class CustomDataset(Dataset):
    def __init__(self, X, y, train_mode=True, transforms=None): #필요한 변수들을 선언
        self.X = X
        self.y = y
        self.train_mode = train_mode
        self.transforms = transforms

    def __getitem__(self, index): #index번째 data를 return
        X = self.X[index]
        
        if self.transforms is not None:
            X = self.transforms(X)

        if self.train_mode:
            y = self.y[index]
            return X, y
        else:
            return X
    
    def __len__(self): #길이 return
        return len(self.X)

train_X = train_mfccs[:8000]
vali_X = train_mfccs[8000:]

train_y = train_wav.label[:8000]
vali_y = train_wav.label[8000:].reset_index(drop = True)

# 에포크 설정
num_epochs = 100

# 배치 사이즈 설정
batch_size = 10

#만든 train dataset를 DataLoader에 넣어 batch 만들기
train_dataset = CustomDataset(X=train_X, y=train_y)
train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True)

vali_dataset = CustomDataset(X=vali_X, y=vali_y)
vali_loader = DataLoader(vali_dataset, batch_size = batch_size, shuffle=False)

In [28]:
import pickle

# train_loader와 vali_loader를 dictionary 형태로 저장
data = {'train_loader': train_loader, 'vali_loader': vali_loader}

# 파일로 저장
with open('data.pickle', 'wb') as f:
    pickle.dump(data, f)

In [29]:
print(train_dataset)
x,y = train_dataset.__getitem__(5)
print(x)
x.shape

<__main__.CustomDataset object at 0x000001E9E79517F0>
[[[-460.104925  ]
  [-423.75440959]
  [-407.94510396]
  ...
  [-548.88746759]
  [-548.88746759]
  [-548.88746759]]

 [[  47.59595444]
  [  47.56444795]
  [  31.10083833]
  ...
  [   0.        ]
  [   0.        ]
  [   0.        ]]

 [[ -49.82005715]
  [ -67.93859642]
  [ -71.60807103]
  ...
  [   0.        ]
  [   0.        ]
  [   0.        ]]

 ...

 [[  -5.39343936]
  [  -4.77270195]
  [  -7.23844319]
  ...
  [   0.        ]
  [   0.        ]
  [   0.        ]]

 [[  -0.78771144]
  [  -2.07211224]
  [  -3.82161642]
  ...
  [   0.        ]
  [   0.        ]
  [   0.        ]]

 [[   3.50300228]
  [   7.21237114]
  [   3.36427638]
  ...
  [   0.        ]
  [   0.        ]
  [   0.        ]]]


(40, 99, 1)

In [30]:
train_batches = len(train_loader)
vali_batches = len(vali_loader)

print('/ total train batches :', train_batches)
print('/ total valid batches :', vali_batches)

/ total train batches : 800
/ total valid batches : 136


In [31]:
from tqdm.auto import tqdm
import torch.nn as nn # 신경망들이 포함됨

class CNNclassification(torch.nn.Module):
    def __init__(self):
        super(CNNclassification, self).__init__()
        self.layer1 = torch.nn.Sequential(
            nn.Conv2d(40, 10, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer2 = torch.nn.Sequential(
            nn.Conv2d(10, 100, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer3 = torch.nn.Sequential(
            nn.Conv2d(100, 200, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer4 = torch.nn.Sequential(
            nn.Conv2d(200, 300, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.layer5 = torch.nn.Sequential(
            nn.Conv2d(300, 400, kernel_size=2, stride=1, padding=1), #cnn layer
            nn.ReLU(), #activation function
            nn.MaxPool2d(kernel_size=2, stride=2)) #pooling layer
        
        self.fc_layer = nn.Sequential( 
            nn.Linear(1600, 5) #fully connected layer(ouput layer)
        )    
        
    def forward(self, x):
        x = x.float() # 입력 데이터를 float32 타입으로 변환
        x = self.layer1(x) #1층
        
        x = self.layer2(x) #2층
         
        x = self.layer3(x) #3층
        
        x = self.layer4(x) #4층
        
        x = self.layer5(x) #5층

        x = torch.flatten(x, start_dim=1) # N차원 배열 -> 1차원 배열
        
        out = self.fc_layer(x)
        return out

In [32]:
import torch.optim as optim # 최적화 알고리즘들이 포함힘

model = CNNclassification().to(device)
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(params = model.parameters(), lr = 1e-3 )
scheduler = None

In [33]:
model(torch.rand(10,40,99,1).to(device))

tensor([[-0.0132, -0.0369, -0.0095,  0.0285, -0.0454],
        [-0.0135, -0.0366, -0.0094,  0.0292, -0.0455],
        [-0.0132, -0.0369, -0.0096,  0.0293, -0.0458],
        [-0.0130, -0.0365, -0.0091,  0.0284, -0.0449],
        [-0.0132, -0.0372, -0.0092,  0.0287, -0.0452],
        [-0.0131, -0.0366, -0.0094,  0.0284, -0.0451],
        [-0.0129, -0.0369, -0.0092,  0.0290, -0.0455],
        [-0.0138, -0.0371, -0.0093,  0.0281, -0.0460],
        [-0.0136, -0.0367, -0.0095,  0.0281, -0.0457],
        [-0.0134, -0.0369, -0.0092,  0.0283, -0.0459]], device='cuda:0',
       grad_fn=<AddmmBackward0>)

In [34]:
from tqdm.auto import tqdm

def train(model, optimizer, train_loader, scheduler, device): 
    model.to(device)
    n = len(train_loader)
    best_acc = 0
    
    for epoch in range(1,num_epochs): #에포크 설정
        model.train() #모델 학습
        running_loss = 0.0
        
        for wav, label in tqdm(iter(train_loader)):
            
            wav, label = wav.to(device), label.to(device) #배치 데이터
            optimizer.zero_grad() #배치마다 optimizer 초기화
        
            # Data -> Model -> Output
            logit = model(wav) #예측값 산출
            loss = criterion(logit, label) #손실함수 계산
            
            # 역전파
            loss.backward() #손실함수 기준 역전파 
            optimizer.step() #가중치 최적화
            running_loss += loss.item()
             
        print('[%d] Train loss: %.10f' %(epoch, running_loss / len(train_loader)))
        
        if scheduler is not None:
            scheduler.step()
            
            
        #Validation set 평가
        model.eval() #evaluation 과정에서 사용하지 않아야 하는 layer들을 알아서 off 시키도록 하는 함수
        vali_loss = 0.0
        correct = 0
       
        with torch.no_grad(): #파라미터 업데이트 안하기 때문에 no_grad 사용
            for wav, label in tqdm(iter(vali_loader)):
                
                wav, label = wav.to(device), label.to(device)
                logit = model(wav)
                vali_loss += criterion(logit, label)
                pred = logit.argmax(dim=1, keepdim=True)  #10개의 class중 가장 값이 높은 것을 예측 label로 추출
                correct += pred.eq(label.view_as(pred)).sum().item() #예측값과 실제값이 맞으면 1 아니면 0으로 합산
        vali_acc = 100 * correct / len(vali_loader.dataset)
        print('Vail set: Loss: {:.4f}, Accuracy: {}/{} ( {:.0f}%)\n'.format(vali_loss / len(vali_loader), correct, len(vali_loader.dataset), 100 * correct / len(vali_loader.dataset)))
        
        #베스트 모델 저장
        if best_acc < vali_acc:
            best_acc = vali_acc
            torch.save(model.state_dict(), 'data/saved/best_model2.pth') #이 디렉토리에 best_model.pth을 저장
            print('Model Saved.')

In [36]:
os.environ['TORCH_USE_CUDA_DSA'] = '1'
with torch.autograd.detect_anomaly():
    train(model, optimizer, train_loader, scheduler, device)

  with torch.autograd.detect_anomaly():


  0%|          | 0/800 [00:00<?, ?it/s]

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
