In [None]:
import torch
import pandas as pd
import numpy as np
import os

# load data
from torch.utils.data import DataLoader
import multiprocessing as mp
import time

# train
from torch import nn
from torch.nn import functional as F
import torch.nn.init

# visualization
import matplotlib.pyplot as plt

test_df = pd.read_csv('test_tables/test_table(20*1000).csv', encoding='UTF-8', engine = 'python')
result_table = pd.read_csv('./result_tables/result_table.csv', engine='python', encoding='utf-8')
result_table = result_table.set_index('index')
result_table = result_table.replace(np.nan, 0)

batch_size = []
training_epochs = []
layer1_ks = []
layer1_MP = []
layer2_ks = []
layer2_MP = []
layer3_ks = []
layer3_MP = []
fc1_to = []
fc1_bias = [] 
fc2_bias = []

num_workers = 40

for i in range(1, 145):
    batch_size.append(test_df.iloc[0, i])
    training_epochs.append(test_df.iloc[1, i])
    layer1_ks.append(test_df.iloc[2, i])
    layer1_MP.append(test_df.iloc[3, i])
    layer2_ks.append(test_df.iloc[4, i])
    layer2_MP.append(test_df.iloc[5, i])
    layer3_ks.append(test_df.iloc[6, i])
    layer3_MP.append(test_df.iloc[7, i])
    fc1_to.append(test_df.iloc[8, i])
    fc1_bias.append(test_df.iloc[9, i])
    fc2_bias.append(test_df.iloc[10, i])

def read_csv(file_name):
    df = pd.read_csv(file_name, header=None)
    return df


class MFCCDataset(torch.utils.data.Dataset):
    def __init__(self, file_list):
        pool = mp.Pool(processes = 100)
        start = time.time()
        self.raw = pd.concat(pool.map(read_csv, file_list))
        print("time :", time.time() - start)
        pool.close()
        pool.join()
        
        self.label = torch.IntTensor(np.array(self.raw[0].values).reshape(len(self.raw[0].values), 1))
        self.len = len(self.label)
        self.data = torch.Tensor(np.array(self.raw.loc[:,1:]).reshape(len(self.label),20,1000))

    def __len__(self):
        return self.len

    def __getitem__(self, idx):
        return self.data[idx], self.label[idx]

class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=layer1_ks[i], stride=1, padding=int(layer1_ks[i]/2)),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=layer1_MP[i], stride=layer1_MP[i]))

        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, kernel_size=layer2_ks[i], stride=1, padding=int(layer2_ks[i]/2)),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=layer2_MP[i], stride=layer2_MP[i]))

        self.layer3 = torch.nn.Sequential(
            torch.nn.Conv2d(64, 128, kernel_size=layer3_ks[i], stride=1, padding=int(layer3_ks[i]/2)),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=layer3_MP[i], stride=layer3_MP[i]))

        denom = (128*20*1000)/((layer1_MP[i]*layer2_MP[i]*layer3_MP[i])*(layer1_MP[i]*layer2_MP[i]*layer3_MP[i]))

        self.fc1 = torch.nn.Linear(int(denom), fc1_to[i], bias=bool(fc1_bias[i]))
        self.fc2 = torch.nn.Linear(fc1_to[i], 3, bias=bool(fc2_bias[i]))

        torch.nn.init.xavier_uniform_(self.fc1.weight)
        #torch.nn.init.xavier_uniform_(self.fc2.weight)

        dropout = torch.nn.Dropout(p=0.3)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)   # 전결합층을 위해서 Flatten
        out = self.fc1(out)
        out = self.fc2(out)
        return out

import random
USE_CUDA = torch.cuda.is_available() # GPU를 사용가능하면 True, 아니라면 False를 리턴
device = torch.device("cuda" if USE_CUDA else "cpu") # GPU 사용 가능하면 사용하고 아니면 CPU 사용\
print("다음 기기로 학습합니다:", device)
random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

train_list = []
for i in range(26):
    train_list.append('./separated data4(20*1000)/train'+str(i+1)+'.csv')

test_list = []
for i in range(7):
    test_list.append('./separated data4(20*1000)/test'+str(i+1)+'.csv')



for i in range(144):
    sum_test_accuracy = 0
    sum_test_inference_time = 0
    max_test_accuracy = -1
    for j in range(5):
        print("Loading data...")
        
        dataset = MFCCDataset(train_list)

        dataloader = torch.utils.data.DataLoader(
            dataset, batch_size = int(batch_size[i]), num_workers = num_workers, drop_last=True
            )
        print("data is ready!")

        # CNN 모델 정의
        learning_rate = 0.001
        training_epoch = training_epochs[i]
        model = CNN() 
        model = torch.nn.DataParallel(model)
        model.cuda()
        criterion = torch.nn.CrossEntropyLoss() 
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        total_batch = len(dataloader)
        print('총 배치의 수 : {}'.format(total_batch))

        global_step = 0
        for epoch in range(training_epoch):
            avg_cost = 0

            for X, Y in dataloader: # 미니 배치 단위로 꺼내온다. X는 미니 배치, Y는 레이블.
                # image is already size of (28x28), no reshape
                # label is not one-hot encoded
                X = X.reshape(batch_size[i],1,20,1000).to(device)
                Y = Y.reshape(batch_size[i],1)[:,0].to(device, dtype=torch.int64)

                optimizer.zero_grad()
                hypothesis = model(X)
                cost = criterion(hypothesis, Y)
                cost.backward()
                optimizer.step()

                avg_cost += cost / total_batch

            print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, avg_cost))

        print("Loading testset...")

        testset = MFCCDataset(test_list)

        dataloader = torch.utils.data.DataLoader(
        testset, batch_size = int(batch_size[i]), num_workers = num_workers, shuffle=True, drop_last=True
        )
        print("testset is ready!")

        start = time.time()
        with torch.no_grad():
            X_test = testset.data.view(len(testset), 1, 20, 1000).float().to(device)
            Y_test = testset.label.to(device, dtype=torch.int64)[:,0]

            prediction = model(X_test)
            correct_prediction = torch.argmax(prediction, 1) == Y_test 
            accuracy = correct_prediction.float().mean()
            print('Accuracy:', accuracy.item())
        if(accuracy.item() > max_test_accuracy):
            max_test_accuracy = accuracy.item()

        sum_test_accuracy += accuracy.item()
        sum_test_inference_time += time.time() - start 
        result_table.iloc[i][2*j] = accuracy.item()
        result_table.iloc[i][2*j+1] = time.time() - start 

        print("time :", time.time() - start, "\n\n")
    print(str(i+1)+'번째 테스트' +str(j+1) + '회 학습 avg_accuracy : '+str(sum_test_accuracy/10)+' avg_inference_time : '+str(sum_test_inference_time/10)+"\n\n")
    result_table.iloc[i][10] = sum_test_accuracy/10
    result_table.iloc[i][11] = sum_test_inference_time/10