In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
class PreProc:
    
    def __init__(self, input_dir, pred_energy):
        self.input_dir = input_dir
        self.pred_energy = pred_energy
    
    def set_train_test_files(self, train_file, test_file):
        self.train_file = train_file
        self.test_file = test_file
        
    def get_df(self, file_name):
        df = pd.read_excel(os.path.join(self.input_dir, file_name), 
                           sheet_name=self.pred_energy)
        return df
    
    def get_target(self, file_name, target_name):
        df = self.get_df(file_name)
        return df[target_name]
    
    '''
        convert pandas series or data frames to lists
    '''
    def strip(self, obj):
        if isinstance(obj, pd.Series) or isinstance(obj, pd.DataFrame):
            obj = obj.values
        return obj
    
    '''
        generate X, y from 1 dimension data
        yield X of length interval and y of length 1
    '''
    def generator_1d(self, data, interval, batch_size=32):
        begin = 0
        while True:
            X, y = [], []
            if begin + interval + batch_size - 1 < len(data):
                for i in range(batch_size):
                    X.append(data[begin + i: begin + i + interval])
                    y.append(data[begin + i + interval])
                yield np.asarray(X).reshape(batch_size, interval, 1), np.asarray(y).reshape(batch_size, 1)
                begin += 1
            else:
                begin = 0

### Test

In [3]:
input_dir = '../data'
data = PreProc(input_dir, '蒸気')
data.set_train_test_files('201801010800.xlsx', '201802010800.xlsx')
data.get_df(data.train_file)
y=data.strip(data.get_target(data.train_file, '需要蒸気'))
gen = data.generator_1d(y, 24, 2)

In [4]:
a=next(gen)
print(a[0].shape, a[1].shape)

(2, 24, 1) (2, 1)
