# Setting

In [2]:
!pip install numpy pandas tqdm matplotlib tensorflow

Collecting numpy
  Downloading numpy-1.21.6-cp37-cp37m-win_amd64.whl.metadata (2.2 kB)
Collecting pandas
  Downloading pandas-1.3.5-cp37-cp37m-win_amd64.whl.metadata (12 kB)
Collecting tqdm
  Downloading tqdm-4.67.0-py3-none-any.whl.metadata (57 kB)
     ---------------------------------------- 57.6/57.6 kB ? eta 0:00:00
Collecting matplotlib
  Downloading matplotlib-3.5.3-cp37-cp37m-win_amd64.whl.metadata (6.7 kB)
Collecting tensorflow
  Downloading tensorflow-2.11.0-cp37-cp37m-win_amd64.whl.metadata (2.5 kB)
Collecting pytz>=2017.3 (from pandas)
  Downloading pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting cycler>=0.10 (from matplotlib)
  Downloading cycler-0.11.0-py3-none-any.whl.metadata (785 bytes)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.38.0-py3-none-any.whl.metadata (138 kB)
     -------------------------------------- 138.5/138.5 kB 8.0 MB/s eta 0:00:00
Collecting kiwisolver>=1.0.1 (from matplotlib)
  Downloading kiwisolver-1.4.5-cp3

In [3]:
import numpy as np
import random 
import os
import sys
import pandas as pd
from tqdm import tqdm

import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter

In [4]:
import tensorflow as tf 

from tensorflow.keras.models import load_model
from tensorflow.keras.losses import MeanAbsoluteError

from keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization, Concatenate,AveragePooling1D
from keras.models import Model, load_model

from keras.optimizers import Adam, RMSprop

In [5]:
def set_seed(seed=42):    
    '''랜덤시드 고정. Hyperparm tuning 제외 모든 학습환경에서 
    같은 성능이 나오게 합니다.'''
    tf.random.set_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
set_seed(42)

In [6]:
print("Tensorflow GPU availability: ", tf.config.list_physical_devices('GPU'))

Tensorflow GPU availability:  []


# Import Data

In [9]:
DATA_PATH = './data/'
train_np = np.load(DATA_PATH+ 'data_train.npy', mmap_mode='r')
train_fgs_np = np.load(DATA_PATH + 'data_train_FGS.npy', mmap_mode='r')

print(f'Train data shape: {train_np}')
print(f'Train FGS data shape: {train_fgs_np.shape}')

train_sample = []
train_fgs_sample = []
for i in tqdm(range(len(train_np)), desc='loading train data'):
    train_sample.append(train_np[i])
    
for i in tqdm(range(len(train_fgs_np)), desc='loading train FGS data'):
    train_fgs_sample.append(train_fgs_np[i])
    
train = np.array(train_sample)
train_fgs = np.array(train_fgs_sample)

Train data shape: [[[[ 1.88955519e+02  1.08366985e+02  5.90633542e+01 ...  1.91694517e+02
     2.54063843e+02  1.66011867e+02]
   [ 3.67620699e+01  1.57874156e+02  1.72848628e+02 ...  1.55217230e+02
     2.28993275e+02  7.92664335e+01]
   [ 6.79881939e+01  1.36239765e+02  2.77543206e+02 ...  1.48340189e+02
    -3.77345830e+01 -1.79199181e+00]
   ...
   [ 8.23059115e+02  7.79071176e+02  9.24898338e+02 ...  9.79443347e+02
     9.54523156e+02  1.10677499e+03]
   [ 8.67730008e+02  1.09155820e+03  8.79908470e+02 ...  8.11119008e+02
     9.34101513e+02  9.26174721e+02]
   [ 1.04584193e+03  7.94109785e+02  8.34430277e+02 ...  8.15952201e+02
     8.30136751e+02  9.05976194e+02]]

  [[ 1.95389974e+02  2.02666014e+02  1.88385170e+02 ...  1.89581272e+02
     2.65692507e+02  2.51403598e+02]
   [ 1.06958642e+02  1.57874156e+02  1.34025461e+02 ...  1.62756794e+02
     3.65977702e+01  9.95518931e+01]
   [ 9.10335121e+00 -4.07578814e+00  1.12026359e+02 ... -6.66193811e+01
     2.30929035e+02  2.722251

loading train data: 100%|██████████| 673/673 [00:00<00:00, 313501.40it/s]


loading train FGS data: 100%|██████████| 673/673 [00:00<00:00, 171983.59it/s]


KeyboardInterrupt: 

## From here:

In [None]:
'''실험 기본 셋업'''
''' 기본 Approach - 1D CNN으로 mean 값 뽑아낸 후 2D CNN으로 구체적인 내용 뽑기'''
TUNING_MODE = 'OFF' # ON/OFF

METRIC1 = 'MeanAbsoluteError'
METRIC2 = 'MeanAbsoluteError'

OPTIMIZER = 'adam' # adam, rmsprop
LR = 0.001   #adam과 rmsprop의 defaut값
BATCH_SIZE = 64
EPOCHS = 15

#-------아직 구현 안 된 부분-------

DECAY_RATE=0.2
DECAY_STEP=200

In [None]:
train = train_np
train_fgs = train_fgs_np

# Model

In [None]:
# 차후 learning rate decay 하는 scheduler도 train함수에 도입할 것. 현재는 없음.
def scheduler(epoch, lr, decay_rate, decay_step):
    if epoch%decay_step == 0 and epoch:
        return lr * decay_rate
    return lr

In [None]:
def build_1d_cnn(trial=None):
    model = tf.keras.Sequential()
    model.add(Conv1D(32, 3, activation='relu'))
    model.add(MaxPooling1D(pool_size=(2,2), padding='same'))
    model.add(BatchNormalization())
    model.add(Conv1D(64, 3, activation='relu'))
    model.add(MaxPooling1D(pool_size=(2,2), padding='same'))
    model.add(Conv1D(128, 3, activation='relu'))
    model.add(MaxPooling1D(pool_size=(2,2), padding='same'))
    model.add(Conv1D(256, 3, activation='relu'))
    
    model.add(Flatten())

    model.add(Dense(500, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(100, activation='relu'))
    model.add(Dropout(0.1))
    
    if OPTIMIZER == 'adam':
        optimizer = Adam(learning_rate=LR)
    elif OPTIMIZER == 'rmsprop':
        optimizer = RMSprop(learning_rate=LR)
    
    model.compile(optimizer=optimizer, loss=LOSS_FUNCTION, metrics=[METRIC1])
    return model

# 1D CNN

In [None]:
model = build_1d_cnn(trial=None)
history = model.fit(X_train, y_train, batch_size = BATCH_SIZE, epochs=EPOCHS, validation_split=0.2)

## Learning Curve

In [None]:
plt.figure(figsize=(15,6))

plt.subplot(1,2,1)
plt.plot(history.history['loss'], label='Train loss', color='#8502d1')
plt.plot(history.history['val_loss'], label='Validation loss', color='darkorange')
plt.lengend()
plt.title("Loss Evolution")
plt.xlabel('Epochs')
plt.ylabel('Loss')

plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Acc.', color='#8502d1')
plt.plot(history.history['val_accuracy'], label='Validation Acc.', color='darkorange')
plt.lengend()
plt.title("Accuracy Evolution")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')

plt.show()