In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import glob
import re

################################################################
# 변수
################################################################
## CSV 폴더
sourceCsvFolder = '../datasets/**/*.csv'
## 결과 파일명
resultFileName = f'./MultiResults_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
## 쿼리 조건
conditions = [
    ['avg90',    0.05, 0.05],  # 상위  5%, 하위  5%를 제외한 90%의 일일 평균값
    ['avg90H10', 0.05, 0.85],  # 상위  5%, 하위 85%를 제외한 10%의 일일 평균값
    ['avg90L10', 0.85, 0.05],  # 상위 85%, 하위  5%를 제외한 10%의 일일 평균값
]

################################################################
# 소스코드
################################################################
def ReadSingleCSV(filename):
    df = pd.read_csv(filename,parse_dates=[0], na_values=[' '])
    # df에서 colums 추출하기
    col = list(map(lambda x:x.replace('\\','/'), df.columns))

    # 설비 명 찾기
    machineName = re.match('^//([^/]*)/', col[2]).group(1)

    # 첫번째 열 이름 바꾸기
    col[0] = "Date"

    # Column 내 장비명을 삭제하여 정규화하기
    col2 = map(lambda x: re.sub('^//([^/]*)/', '', x), col)

    # df의 Columns 이름 변경
    df.columns = col2

    #df에 machineName 컬럼 추가
    df["MachineName"] = machineName 
    df.set_index(["Date", "MachineName"], inplace=True)

    ret_cond = []
    for x in conditions:
        df90 = df.where(df <= df.quantile(q=(1-x[1]))).where(df >= df.quantile(q=x[2])).unstack().resample('1h').mean().interpolate()
        df90["Type"] = x[0]
        df90.set_index("Type", append=True, inplace=True )
        ret_cond.append(df90.unstack())
    
    # 결과 합치기
    ret = pd.concat(ret_cond)
    return ret







In [2]:
result = pd.DataFrame()

files = glob.glob(sourceCsvFolder, recursive=True)

files

['../datasets/DataCollector01.csv']

In [3]:
for idx, x in enumerate(files):
    try:
        df = ReadSingleCSV(x)
        result = pd.concat([df, result])
    except Exception as ex:
        print (f'Runtime Error: {ex}')
    print (f'{idx+1}/{len(files)} : {x} ')

1/1 : ../datasets/DataCollector01.csv 


In [4]:
lastret = result.resample(rule='1d', level=0).mean()

In [12]:
lastret.stack(level=1).stack(level=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,Type,avg90,avg90H10,avg90L10
Date,MachineName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-02-05,WIN-3OPFVMF4N3A,Memory/% Committed Bytes In Use,1.732069e+01,1.741042e+01,1.728683e+01
2020-02-05,WIN-3OPFVMF4N3A,Memory/Available MBytes,6.950058e+03,6.953695e+03,6.941401e+03
2020-02-05,WIN-3OPFVMF4N3A,Memory/Cache Bytes,9.009619e+07,9.131839e+07,8.955987e+07
2020-02-05,WIN-3OPFVMF4N3A,Network Interface(6TO4 Adapter)/Bytes Total/sec,0.000000e+00,0.000000e+00,0.000000e+00
2020-02-05,WIN-3OPFVMF4N3A,Network Interface(6TO4 Adapter)/Output Queue Length,0.000000e+00,0.000000e+00,0.000000e+00
...,...,...,...,...,...
2020-02-06,WIN-3OPFVMF4N3A,Processor(0)/% User Time,1.710648e-02,1.710648e-02,0.000000e+00
2020-02-06,WIN-3OPFVMF4N3A,Processor(1)/% User Time,2.515314e-02,2.515314e-02,0.000000e+00
2020-02-06,WIN-3OPFVMF4N3A,Processor(2)/% User Time,6.227633e-02,3.128287e-01,0.000000e+00
2020-02-06,WIN-3OPFVMF4N3A,Processor(3)/% User Time,2.861972e-02,2.861972e-02,0.000000e+00
