# 기학습 모형 시뮬레이션 데이터 적합

- 기학습 모형에 시뮬레이션 데이터를 적합하기 위해 정해진 변수 안에서 랜덤으로 시계열 데이터를 생성하고 전처리함

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [386]:
########################################################################################################
# window function
# 각 TS 당 timestep 별로 시퀀스 생성
def window(df, timestep):
    for i in range(0, timestep):
        df['shift_{}'.format(i)] = df.iloc[:,0].shift(i)
        df['shift_{}'.format(i)] = df.iloc[:,0].shift(i)
    window_df = df.dropna(axis=0) # 결측치 공간 제거
    window_df = window_df.iloc[:,::-1] # 좌우 반전


    feature= window_df.iloc[:,:-1].values
    y_label= window_df.iloc[:,-1].values

    return feature

##########################################################################################################
# 시뮬레이션 데이터 생성 함수
def generate_cosine_series(t, period, amplitude, trend_slope, trend_intercept=0, noise_level=0.1):
    series = amplitude * np.cos(2 * np.pi / period * t) + trend_slope * t + trend_intercept
    series += noise_level * np.random.randn(len(t))
    return series

def generate_sine_series(t, period, amplitude, trend_slope, trend_intercept=0, noise_level=0.1):
    series = amplitude * np.sin(2 * np.pi / period * t) + trend_slope * t + trend_intercept
    series += noise_level * np.random.randn(len(t))
    return series

def random_parameters():
    period = np.random.randint(1, 49)  # 1 to 48
    amplitude = np.random.normal(0,0.5) #np.random.uniform(0, 1)  
    trend_slope = np.random.normal(0,0.03) 
    noise_level = np.random.uniform(0, 1)  # 0 to 1
    return period, amplitude, trend_slope, noise_level

def generate_random_time_series(t):
    period, amplitude, trend_slope, noise_level = random_parameters()
    cosine_series = generate_cosine_series(t, period, amplitude, trend_slope, noise_level=noise_level)
    sine_series = generate_sine_series(t, period, amplitude, trend_slope, noise_level=noise_level)
    return np.array([cosine_series.round(3), sine_series.round(3)])

## 시계열 데이터 랜덤 생성
- 주기 : 1~48
- 사인 및 코사인 기울기 : 0~0.5
- 추세 기울기 : 0~0.03
- 잔차 레벨 : 0~1

In [444]:
np.random.seed(101)
t = np.arange(0,180,1)

simul500 = np.array([generate_random_time_series(t) for i in range(0,500)])
window_cosin_train = np.array([window(pd.DataFrame(simul500[i][0][:-36]),36) for i in range(len(simul500))])
window_sin_train = np.array([window(pd.DataFrame(simul500[i][1][:-36]),36) for i in range(len(simul500))])

window_cosin_test = np.array([window(pd.DataFrame(simul500[i][0][-36:]),36) for i in range(len(simul500))])
window_sin_test = np.array([window(pd.DataFrame(simul500[i][1][-36:]),36) for i in range(len(simul500))])

window_cosin_train.shape, window_sin_train.shape,window_cosin_test.shape, window_sin_test.shape
### train
# 500개의 타임시리즈
# 109개의 시퀀스
# 각 시퀀스의 길이 36
### test
# 500개의 타임시리즈
# 1개의 시퀀스
# 동일함

((500, 109, 36), (500, 109, 36), (500, 1, 36), (500, 1, 36))

In [498]:
# num을 기준으로 인풋 X와 y를 분리
num = 12

# cosin
cosin_X_y_split= [[[window_cosin_train [j][i][:-12], window_cosin_train [j][i][-12:]] for i in range(len(window_cosin_train [0]))] for j in range(len(window_cosin_train))]
# 인풋 X와 y를 하나의 데이터프레임으로 생성
cosin_X_train= pd.concat([pd.concat([pd.DataFrame(cosin_X_y_split[j][i][0]).T for i in range(len(cosin_X_y_split[0]))]) for j in range(len(cosin_X_y_split))])
cosin_y_train= pd.concat([pd.concat([pd.DataFrame(cosin_X_y_split[j][i][1]).T for i in range(len(cosin_X_y_split[1]))]) for j in range(len(cosin_X_y_split))])


#sin
sin_X_y_split= [[[window_sin_train [j][i][:-12], window_sin_train [j][i][-12:]] for i in range(len(window_sin_train [0]))] for j in range(len(window_sin_train))]
# 인풋 X와 y를 하나의 데이터프레임으로 생성
sin_X_train= pd.concat([pd.concat([pd.DataFrame(sin_X_y_split[j][i][0]).T for i in range(len(sin_X_y_split[0]))]) for j in range(len(sin_X_y_split))])
sin_y_train= pd.concat([pd.concat([pd.DataFrame(sin_X_y_split[j][i][1]).T for i in range(len(sin_X_y_split[1]))]) for j in range(len(sin_X_y_split))])

In [505]:
X_train = pd.concat([cosin_X_train,sin_X_train])
y_train = pd.concat([cosin_y_train,sin_y_train])

X_train.shape, y_train.shape

((109000, 24), (109000, 12))

In [510]:
from sklearn.preprocessing import MinMaxScaler

In [515]:
minmax = MinMaxScaler()
scale_df = pd.DataFrame(minmax.fit_transform(pd.concat([X_train,y_train],axis=1)))

In [521]:
sclae_X_train = scale_df.iloc[:,:24]
sclae_y_train = scale_df.iloc[:,24:]