In [None]:
## 라이브러리
import glob
import os
import datetime
import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

In [None]:
################# 데이터 불러오기
wal_resource = glob.glob('wal_data/*')
rf_resource = glob.glob('rf_data/*')

a = os.listdir('wal_data')
wal_point = os.path.basename(f'{rf_resource[1]}')

date = pd.read_csv(f'{wal_resource[1]}')['시간']
df_col = pd.to_datetime(date)


def make_df(resource):
    for i in resource:
        name = os.path.splitext(os.path.basename(f'{i}'))[0]
        if resource == wal_resource:
            s_value = pd.read_csv(f'{i}')['해발표고']
        else:
            s_value = pd.read_csv(f'{i}')['강수량']

        values = pd.Series.to_frame(s_value, name=name)
        global df_col
        df_col = pd.concat([df_col, values], axis=1)

make_df(rf_resource)
make_df(wal_resource)

In [None]:
########################################### 시간 단위 데이터
df = df_col[5::6]
date_time = pd.to_datetime(df.pop('시간'), format='%y.%m.%d %H:%M')
df.describe().transpose()
df = df.interpolate(method='values')

column_indices = {name: i for i, name in enumerate(df.columns)}

In [None]:
############################################ 훈련, 검증, 테스트 데이터 분리
n = len(df)
train_df = df[0:int(n*0.7)]
val_df = df[int(n*0.7):int(n*0.9)]
test_df = df[int(n*0.9):]

num_features = df.shape[1]

train_mean = train_df.mean()
train_std = train_df.std()

train_df = (train_df - train_mean) / train_std
val_df = (val_df - train_mean) / train_std
test_df = (test_df - train_mean) / train_std

In [None]:
###################################### 표준화

df_std = (df - train_mean) / train_std
df_std = df_std.melt(var_name='Column', value_name='Normalized')
plt.figure(figsize=(12, 6))
ax = sns.violinplot(x='Column', y='Normalized', data=df_std)
_ = ax.set_xticklabels(df.keys(), rotation=90)

In [None]:
#################### 단일 출력 및 다중 출력 예측/ 단일 타임스텝 및 다중 타임스텝 예측을 위한 창 클래스,
#################### 인덱스와 오프셋 처리, 특성창을 (features, lables)쌍으로 분리, 결과창 내용 플롯
###################  tf.data.Dataset을 사용하여 훈련, 평가 및 테스트 데이터로부터 창을 여러 배치로 효율적 생성


class WindowGenerator():
  def __init__(self, input_width, label_width, shift,
               train_df=train_df, val_df=val_df, test_df=test_df,
               label_columns=None):
    # Store the raw data.
    self.train_df = train_df
    self.val_df = val_df
    self.test_df = test_df

    # Work out the label column indices.
    self.label_columns = label_columns
    if label_columns is not None:
      self.label_columns_indices = {name: i for i, name in
                                    enumerate(label_columns)}
    self.column_indices = {name: i for i, name in
                           enumerate(train_df.columns)}

    # Work out the window parameters.
    self.input_width = input_width
    self.label_width = label_width
    self.shift = shift

    self.total_window_size = input_width + shift

    self.input_slice = slice(0, input_width)
    self.input_indices = np.arange(self.total_window_size)[self.input_slice]

    self.label_start = self.total_window_size - self.label_width
    self.labels_slice = slice(self.label_start, None)
    self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

  def __repr__(self):
    return '\n'.join([
        f'Total window size: {self.total_window_size}',
        f'Input indices: {self.input_indices}',
        f'Label indices: {self.label_indices}',
        f'Label column name(s): {self.label_columns}'])


def split_window(self, features):
  inputs = features[:, self.input_slice, :]
  labels = features[:, self.labels_slice, :]
  if self.label_columns is not None:
    labels = tf.stack(
        [labels[:, :, self.column_indices[name]] for name in self.label_columns],
        axis=-1)

  # Slicing doesn't preserve static shape information, so set the shapes
  # manually. This way the `tf.data.Datasets` are easier to inspect.
  inputs.set_shape([None, self.input_width, None])
  labels.set_shape([None, self.label_width, None])

  return inputs, labels

WindowGenerator.split_window = split_window

# Stack three slices, the length of the total window:

w2 = WindowGenerator(input_width=6, label_width=1, shift=1,
                     label_columns=['andongdaegyo'])
w2
example_window = tf.stack([np.array(train_df[:w2.total_window_size]),
                           np.array(train_df[100:100+w2.total_window_size]),
                           np.array(train_df[200:200+w2.total_window_size])])


example_inputs, example_labels = w2.split_window(example_window)

print('All shapes are: (batch, time, features)')
print(f'Window shape: {example_window.shape}')
print(f'Inputs shape: {example_inputs.shape}')
print(f'labels shape: {example_labels.shape}')

In [None]:
##################################### 플롯

w2.example = example_inputs, example_labels

def plot(self, model=None, plot_col='andongdaegyo', max_subplots=3):
  inputs, labels = self.example
  plt.figure(figsize=(12, 8))
  plot_col_index = self.column_indices[plot_col]
  max_n = min(max_subplots, len(inputs))
  for n in range(max_n):
    plt.subplot(3, 1, n+1)
    plt.ylabel(f'{plot_col} [normed]')
    plt.plot(self.input_indices, inputs[n, :, plot_col_index],
             label='Inputs', marker='.', zorder=-10)

    if self.label_columns:
      label_col_index = self.label_columns_indices.get(plot_col, None)
    else:
      label_col_index = plot_col_index

    if label_col_index is None:
      continue

    plt.scatter(self.label_indices, labels[n, :, label_col_index],
                edgecolors='k', label='Labels', c='#2ca02c', s=64)
    if model is not None:
      predictions = model(inputs)
      plt.scatter(self.label_indices, predictions[n, :, label_col_index],
                  marker='X', edgecolors='k', label='Predictions',
                  c='#ff7f0e', s=64)

    if n == 0:
      plt.legend()

  plt.xlabel('Time [h]')

WindowGenerator.plot = plot

w2.plot()

w2.plot(plot_col='dochen')

In [None]:
########################### tf.data.Dataset

def make_dataset(self, data):
  data = np.array(data, dtype=np.float32)
  ds = tf.keras.preprocessing.timeseries_dataset_from_array(
      data=data,
      targets=None,
      sequence_length=self.total_window_size,
      sequence_stride=1,
      shuffle=True,
      batch_size=32,)

  ds = ds.map(self.split_window)

  return ds

WindowGenerator.make_dataset = make_dataset


@property
def train(self):
  return self.make_dataset(self.train_df)

@property
def val(self):
  return self.make_dataset(self.val_df)

@property
def test(self):
  return self.make_dataset(self.test_df)

@property
def example(self):
  """인풋과 레이블의 배치를 가져와 플롯."""
  result = getattr(self, '_example', None)
  if result is None:
    # 배치가ㅏ 없을 때 .train에서 하나 가져오기
    result = next(iter(self.train))
    # 다음을 위해 기록
    self._example = result
  return result

WindowGenerator.train = train
WindowGenerator.val = val
WindowGenerator.test = test
WindowGenerator.example = example

#속성 보기
w2.train.element_spec

#배치 생성
for example_inputs, example_labels in w2.train.take(1):
  print(f'Inputs shape (batch, time, features): {example_inputs.shape}')
  print(f'Labels shape (batch, time, features): {example_labels.shape}')



In [None]:
########## 단일 스텝 1시간씩
single_step_window = WindowGenerator(
    input_width=1, label_width=1, shift=1,
    label_columns=['andongdaegyo'])
single_step_window


In [None]:
########## 데이터 배치 반복
for example_inputs, example_labels in single_step_window.train.take(1):
  print(f'Inputs shape (batch, time, features): {example_inputs.shape}')
  print(f'Labels shape (batch, time, features): {example_labels.shape}')

In [None]:
################# RNN-LSTM

MAX_EPOCHS = 20

def compile_and_fit(model, window, patience=2):
  early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=patience,
                                                    mode='min')

  model.compile(loss=tf.losses.MeanSquaredError(),
                optimizer=tf.optimizers.Adam(),
                metrics=[tf.metrics.MeanAbsoluteError()])

  history = model.fit(window.train, epochs=MAX_EPOCHS,
                      validation_data=window.val,
                      callbacks=[early_stopping])
  return history


wide_window = WindowGenerator(
    input_width=24, label_width=24, shift=1,
    label_columns=['andongdaegyo'])

wide_window


lstm_model = tf.keras.models.Sequential([
    # 변경 Shape [batch, time, features] => [batch, time, lstm_units]
    tf.keras.layers.LSTM(32, return_sequences=True),
    # Shape => [batch, time, features]
    tf.keras.layers.Dense(units=1)
])


print('Input shape:', wide_window.example[0].shape)
print('Output shape:', lstm_model(wide_window.example[0]).shape)


history = compile_and_fit(lstm_model, wide_window)
wide_window.plot(lstm_model)

In [None]:
import numpy
print(numpy.__version__)

In [None]:
pip uninstall numpy