In [4]:
"""Time-series Generative Adversarial Networks (TimeGAN) Codebase.
Reference: Jinsung Yoon, Daniel Jarrett, Mihaela van der Schaar, 
"Time-series Generative Adversarial Networks," 
Neural Information Processing Systems (NeurIPS), 2019.
Paper link: https://papers.nips.cc/paper/8789-time-series-generative-adversarial-networks
Last updated Date: April 24th 2020
Code author: Jinsung Yoon (jsyoon0823@gmail.com)
-----------------------------
data_loading.py
(0) MinMaxScaler: Min Max normalizer
(1) sine_data_generation: Generate sine dataset
(2) real_data_loading: Load and preprocess real data
  - stock_data: https://finance.yahoo.com/quote/GOOG/history?p=GOOG
  - energy_data: http://archive.ics.uci.edu/ml/datasets/Appliances+energy+prediction
"""

## Necessary Packages
import numpy as np
import pandas as pd

def MinMaxScaler(data):
  """Min Max normalizer.
  
  Args:
    - data: original data
  
  Returns:
    - norm_data: normalized data
  """
  numerator = data - np.min(data, 0)
  denominator = np.max(data, 0) - np.min(data, 0)
  norm_data = numerator / (denominator + 1e-27)
  return norm_data


def real_data_loading (data_name, seq_len, part):
  """Load and preprocess real-world datasets.
  
  Args:
    - data_name: stock or energy
    - seq_len: sequence length
    
  Returns:
    - data: preprocessed data.
  """  
  assert data_name in ['onephase','phase2phase','stock']
  
  if data_name == 'onephase':
    ori_data = pd.read_excel('../ElecCarFault/onephase.xlsx',engine='openpyxl') #np.loadtxt('data/stock_data.csv', delimiter = ",",skiprows = 1)
    ori_data.drop(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'],axis=1,inplace=True)
    ori_data.columns=['Time','Va','Vb','Vc','rpm','RPM_Ref','Ide','Idref','Iqe','Iqref','Ia','Ib','Ic']
    ori_data=ori_data[:-1]
    #ori_data=ori_data[['Time','Va','Vb','Vc']]
  elif data_name == 'phase2phase':
    ori_data = pd.read_excel('../ElecCarFault/phase2phase.xlsx',engine='openpyxl') #np.loadtxt('data/energy_data.csv', delimiter = ",",skiprows = 1)
    ori_data.columns=['Time','Va','Vb','Vc','rpm','RPM_Ref','Ide','Idref','Iqe','Iqref','Ia','Ib','Ic']
    #ori_data=ori_data[['Time','Va','Vb','Vc']]
  elif data_name=='stock':
    ori_data=pd.read_csv('../ElecCarFault/stock_data.csv')
  
# Normalize the data
  ori_data = pd.concat([MinMaxScaler(ori_data.drop(['Time'],axis=1)),ori_data['Time']],axis=1)

  if part=='normal':
    #~0.5초
    ori_data=ori_data.loc[ori_data['Time']<0.5,:]
  elif part=='fault':
    #0.5초~
    ori_data=ori_data.loc[ori_data['Time']>=0.5,:]
  ori_data.drop(['Time'],axis=1,inplace=True)
  # Normalize the data
  #ori_data = ori_data[::-1]
  #ori_data = MinMaxScaler(ori_data.drop(['Time'],axis=1))
  #ori_data = MinMaxScaler(ori_data)
    
  ori_data=ori_data.to_numpy()
    
  # Preprocess the dataset
  temp_data = []    
  # Cut data by sequence length
  for i in range(0, len(ori_data) - seq_len):
    _x = ori_data[i:i + seq_len]
    temp_data.append(_x)
        
  # Mix the datasets (to make it similar to i.i.d)
  idx = np.random.permutation(len(temp_data))    
  data = []
  for i in range(len(temp_data)):
    data.append(temp_data[idx[i]])
    
  return data,idx