## [Dacon] 블럭 장난감 제조 공정 최적화 경진대회
## 팀명 : 춘
## 2020년 7월 3일 (제출날짜)

## 1. 라이브러리 및 데이터 (Library & Data)
### 유전알고리즘 실행 속도를 개선한 genome_quick.py 사용

In [3]:
import time
import pandas as pd
import numpy as np
import multiprocessing
import warnings
from copy import deepcopy
from module.genome_quick import Genome, genome_score
from module.Data import Data
warnings.filterwarnings(action='ignore')
# np.random.seed(777)

## 2. 변수 선택 및 모델 구축 (Feature Engineering & Initial Modeling)

### 유전알고리즘에서 데이터 프레임 실행부분이 오래 걸리기 때문에 input 데이터를 미리 한번만 만듬


In [4]:
CPU_CORE = multiprocessing.cpu_count() # 멀티프로세싱 CPU 사용 수
N_POPULATION = 50                      # 세대당 생성수
N_BEST = 5                             # 베스트 수
N_CHILDREN = 5                         # 자손 유전자 수
PROB_MUTATION = 0.5                    # 돌연변이
REVERSE = True                         # 배열 순서 (False: ascending order, True: descending order)

score_ini = 0                          # 초기 점수
input_length = 125                     # 입력 데이터 길이
output_length_1 = 5                   # Event (CHECK_1~4, PROCESS, CHANGE, STOP)
output_length_2 = 12                   # MOL(0~5.5, step:0.5)
h1 = 50                                # 히든레이어1 노드 수
h2 = 50                                # 히든레이어2 노드 수
h3 = 50                                # 히든레이어3 노드 수
EPOCHS = 10                          # 반복 횟수


# 유전알고리즘 속도 개선을 위해 input 데이터를 미리 한번만 만들고 Genome class에 인스턴스로 추가함.
order = pd.read_csv('./data/order.csv')
for i in range(30):
    order.loc[91+i,:] = ['0000-00-00', 0, 0, 0, 0] 
    
input_dic = {
    s : np.append(np.array(order.loc[s//24:(s//24+30), 'BLK_1':'BLK_4']).reshape(-1), s%24)
    
    for s in range(2184) 
}

# genome과 best_genomes 초기화.
genomes = []
for _ in range(N_POPULATION):
    genome = Genome(score_ini, input_length, output_length_1, output_length_2, input_dic, h1, h2, h3)
    genomes.append(genome)
try:
    for i in range(N_BEST):
        genomes[i] = best_genomes[i]
except:
    best_genomes = []
    for _ in range(5):
        genome = Genome(score_ini, input_length, output_length_1, output_length_2, input_dic, h1, h2, h3)
        best_genomes.append(genome)

## 3. 모델 학습 및 검증 (Model Tuning & Evaluation)
1. PRT는 고정값 사용.
2. Event A와 Event B (MOL_A, MOL_B)를 다른 값으로 설정.
3. Event는 CHECK와 PROCESS 만 사용함.
4. 목적 함수로 score 평가식을 사용함.
5. Event와 MOL에 대한 인공신경망 모델을 유전 알고리즘으로 학습

In [5]:
n_gen = 1
score_history = []
high_score_history = []
mean_score_history = []
while n_gen <= EPOCHS:    
    start_time = time.time()
    genomes = np.array(genomes)    
    while len(genomes)%CPU_CORE != 0:
        genomes = np.append(genomes, Genome(score_ini, input_length, output_length_1, output_length_2, input_dic, h1, h2, h3))
    genomes = genomes.reshape((len(genomes)//CPU_CORE, CPU_CORE))
    
    for idx, _genomes in enumerate(genomes):
        if __name__ == '__main__':
            pool = multiprocessing.Pool(processes=CPU_CORE)
            genomes[idx] = pool.map(genome_score, _genomes)
            pool.close()
            pool.join()    
    genomes = list(genomes.reshape(genomes.shape[0]*genomes.shape[1]))    
    
    # score에 따라 정렬
    genomes.sort(key=lambda x: x.score, reverse=REVERSE)
    
    # 평균 점수
    s = 0 
    for i in range(N_BEST):
        s += genomes[i].score
    s /= N_BEST
    
    # Best Score
    bs = genomes[0].score 
    
    # Best Model 추가
    if best_genomes is not None:
        genomes.extend(best_genomes)
        
    # score에 따라 정렬
    genomes.sort(key=lambda x: x.score, reverse=REVERSE)
    
    score_history.append([n_gen, genomes[0].score])
    high_score_history.append([n_gen, bs])
    mean_score_history.append([n_gen, s])
    
    solve_time = time.time() - start_time
    
    # 결과 출력
    print(f'EPOCH #{n_gen}\tHistory Best Score: {genomes[0].score}\tBest Score: {bs}\tMean Score: {s}\tSolve Time: {solve_time}')
    
    # 모델 업데이트
    best_genomes = deepcopy(genomes[:N_BEST])
    
    # CHILDREN 생성
    for i in range(N_CHILDREN):
        new_genome = deepcopy(best_genomes[0])
        a_genome = np.random.choice(best_genomes)
        b_genome = np.random.choice(best_genomes)
        
        for j in range(input_length):
            cut = np.random.randint(new_genome.m1_event_net.w1.shape[1])
            new_genome.m1_event_net.w1[j, :cut] = a_genome.m1_event_net.w1[j, :cut]
            new_genome.m1_event_net.w1[j, cut:] = b_genome.m1_event_net.w1[j, cut:]
            
            cut = np.random.randint(new_genome.m2_event_net.w1.shape[1])
            new_genome.m2_event_net.w1[j, :cut] = a_genome.m2_event_net.w1[j, :cut]
            new_genome.m2_event_net.w1[j, cut:] = b_genome.m2_event_net.w1[j, cut:]
        
        for j in range(h1):
            cut = np.random.randint(new_genome.m1_event_net.w2.shape[1])
            new_genome.m1_event_net.w2[j, :cut] = a_genome.m1_event_net.w2[j, :cut]
            new_genome.m1_event_net.w2[j, cut:] = b_genome.m1_event_net.w2[j, cut:]
        
            cut = np.random.randint(new_genome.m2_event_net.w2.shape[1])
            new_genome.m2_event_net.w2[j, :cut] = a_genome.m2_event_net.w2[j, :cut]
            new_genome.m2_event_net.w2[j, cut:] = b_genome.m2_event_net.w2[j, cut:]
            
        for j in range(h2):
            cut = np.random.randint(new_genome.m1_event_net.w3.shape[1])
            new_genome.m1_event_net.w3[j, :cut] = a_genome.m1_event_net.w3[j, :cut]
            new_genome.m1_event_net.w3[j, cut:] = b_genome.m1_event_net.w3[j, cut:]
            
            cut = np.random.randint(new_genome.m2_event_net.w3.shape[1])
            new_genome.m2_event_net.w3[j, :cut] = a_genome.m2_event_net.w3[j, :cut]
            new_genome.m2_event_net.w3[j, cut:] = b_genome.m2_event_net.w3[j, cut:]
        
        for j in range(h3):
            cut = np.random.randint(new_genome.m1_event_net.w4.shape[1])
            new_genome.m1_event_net.w4[j, :cut] = a_genome.m1_event_net.w4[j, :cut]
            new_genome.m1_event_net.w4[j, cut:] = b_genome.m1_event_net.w4[j, cut:]
            
            cut = np.random.randint(new_genome.m2_event_net.w4.shape[1])
            new_genome.m2_event_net.w4[j, :cut] = a_genome.m2_event_net.w4[j, :cut]
            new_genome.m2_event_net.w4[j, cut:] = b_genome.m2_event_net.w4[j, cut:]
            
        best_genomes.append(new_genome)
    
    # 모델 초기화
    genomes = []
    for i in range(int(N_POPULATION / len(best_genomes))):
        for bg in best_genomes:
            new_genome = deepcopy(bg)            
            mean = 0
            stddev = 0.2                        
            # 50% 확률로 모델 변형
            if np.random.uniform(0, 1) < PROB_MUTATION:
                new_genome.m1_event_net.w1 += new_genome.m1_event_net.w1 * np.random.normal(mean, stddev, size=(input_length, h1)) * np.random.randint(0, 2, (input_length, h1))
            if np.random.uniform(0, 1) < PROB_MUTATION:
                new_genome.m2_event_net.w1 += new_genome.m2_event_net.w1 * np.random.normal(mean, stddev, size=(input_length, h1)) * np.random.randint(0, 2, (input_length, h1))
                
            if np.random.uniform(0, 1) < PROB_MUTATION:
                new_genome.m1_event_net.w2 += new_genome.m1_event_net.w2 * np.random.normal(mean, stddev, size=(h1, h2)) * np.random.randint(0, 2, (h1, h2))
            if np.random.uniform(0, 1) < PROB_MUTATION:
                new_genome.m2_event_net.w2 += new_genome.m2_event_net.w2 * np.random.normal(mean, stddev, size=(h1, h2)) * np.random.randint(0, 2, (h1, h2))
                
            if np.random.uniform(0, 1) < PROB_MUTATION:
                new_genome.m1_event_net.w3 += new_genome.m1_event_net.w3 * np.random.normal(mean, stddev, size=(h2, h3)) * np.random.randint(0, 2, (h2, h3))
            if np.random.uniform(0, 1) < PROB_MUTATION:
                new_genome.m2_event_net.w3 += new_genome.m2_event_net.w3 * np.random.normal(mean, stddev, size=(h2, h3)) * np.random.randint(0, 2, (h2, h3))
                
            if np.random.uniform(0, 1) < PROB_MUTATION:
                new_genome.m1_event_net.w4 += new_genome.m1_event_net.w4 * np.random.normal(mean, stddev, size=(h3, output_length_1)) * np.random.randint(0, 2, (h3, output_length_1))                
            if np.random.uniform(0, 1) < PROB_MUTATION:
                new_genome.m2_event_net.w4 += new_genome.m2_event_net.w4 * np.random.normal(mean, stddev, size=(h3, output_length_1)) * np.random.randint(0, 2, (h3, output_length_1)) 
                
            # amount
            
            if np.random.uniform(0, 1) < PROB_MUTATION:
                new_genome.m1_amount_net.w1 += new_genome.m1_amount_net.w1 * np.random.normal(mean, stddev, size=(input_length, h1)) * np.random.randint(0, 2, (input_length, h1))
            if np.random.uniform(0, 1) < PROB_MUTATION:
                new_genome.m2_amount_net.w1 += new_genome.m2_amount_net.w1 * np.random.normal(mean, stddev, size=(input_length, h1)) * np.random.randint(0, 2, (input_length, h1))
                
            if np.random.uniform(0, 1) < PROB_MUTATION:
                new_genome.m1_amount_net.w2 += new_genome.m1_amount_net.w2 * np.random.normal(mean, stddev, size=(h1, h2)) * np.random.randint(0, 2, (h1, h2))
            if np.random.uniform(0, 1) < PROB_MUTATION:
                new_genome.m2_amount_net.w2 += new_genome.m2_amount_net.w2 * np.random.normal(mean, stddev, size=(h1, h2)) * np.random.randint(0, 2, (h1, h2))
                
            if np.random.uniform(0, 1) < PROB_MUTATION:
                new_genome.m1_amount_net.w3 += new_genome.m1_amount_net.w3 * np.random.normal(mean, stddev, size=(h2, h3)) * np.random.randint(0, 2, (h2, h3))
            if np.random.uniform(0, 1) < PROB_MUTATION:
                new_genome.m2_amount_net.w3 += new_genome.m2_amount_net.w3 * np.random.normal(mean, stddev, size=(h2, h3)) * np.random.randint(0, 2, (h2, h3))
                
            if np.random.uniform(0, 1) < PROB_MUTATION:
                new_genome.m1_amount_net.w4 += new_genome.m1_amount_net.w4 * np.random.normal(mean, stddev, size=(h3, output_length_2)) * np.random.randint(0, 2, (h3, output_length_2))                
            if np.random.uniform(0, 1) < PROB_MUTATION:
                new_genome.m2_amount_net.w4 += new_genome.m2_amount_net.w4 * np.random.normal(mean, stddev, size=(h3, output_length_2)) * np.random.randint(0, 2, (h3, output_length_2)) 
            genomes.append(new_genome)
            
    if REVERSE:
        if bs < score_ini:
            genomes[len(genomes)//2:] = [Genome(score_ini, input_length, output_length_1, output_length_2, input_dic, h1, h2, h3) for _ in range(N_POPULATION//2)]
    else:
        if bs > score_ini:
            genomes[len(genomes)//2:] = [Genome(score_ini, input_length, output_length_1, output_length_2, input_dic, h1, h2, h3) for _ in range(N_POPULATION//2)]  
        
    n_gen += 1

EPOCH #1	History Best Score: 87.27203330913528	Best Score: 87.27203330913528	Mean Score: 82.97966712369546	Solve Time: 30.26993203163147
EPOCH #2	History Best Score: 89.11653097632227	Best Score: 89.11653097632227	Mean Score: 88.74804273808073	Solve Time: 35.23268413543701
EPOCH #3	History Best Score: 89.795798755362	Best Score: 89.795798755362	Mean Score: 88.96638635635405	Solve Time: 31.762394905090332
EPOCH #4	History Best Score: 89.85314230082612	Best Score: 89.85314230082612	Mean Score: 87.9647894078277	Solve Time: 29.396706104278564
EPOCH #5	History Best Score: 90.22620897838857	Best Score: 90.22620897838857	Mean Score: 88.61647613901089	Solve Time: 52.32519507408142
EPOCH #6	History Best Score: 90.22620897838857	Best Score: 89.66683276586188	Mean Score: 88.96569365512339	Solve Time: 38.36196303367615
EPOCH #7	History Best Score: 90.22620897838857	Best Score: 89.02976053145188	Mean Score: 88.58962840578874	Solve Time: 35.136098861694336
EPOCH #8	History Best Score: 90.22620897838

## 4. 결과 및 결언 (Conclusion & Discussion)

### scroe 계산

### 시뮬레이터 속도를 개선한 sim_block을 사용함



In [6]:
from module.sim_block import Simulator
simulator = Simulator()
data = Data()
submission = best_genomes[0].predict(data)
score, df_stock =simulator.get_score(submission)
print(f'score : {score}')

score : 90.62471187370645


### Submission 파일 만들기

In [7]:
# PRT 개수 계산
PRTs = df_stock[['PRT_1', 'PRT_2', 'PRT_3', 'PRT_4']].values
PRTs = (PRTs[:-1] - PRTs[1:])[24*23:]
PRTs = np.ceil(PRTs * 1.1)
PAD = np.zeros((24*23+1, 4))
PRTs = np.append(PRTs, PAD, axis=0).astype(int)

# Submission 파일에 PRT 입력
submission.loc[:, 'PRT_1':'PRT_4'] = PRTs
submission.to_csv(f'submission_{score}.csv', index=False)