In [9]:
import pandas as pd
import numpy as np

file_path = 'rtu_data_full.csv'
output_file = 'hourly_refined_data.csv'
chunk_size = 200000 

resampled_chunks = []

print("데이터 정제 및 1시간 단위 리샘플링 시작")

# 1. 효율적인 로드를 위한 데이터 타입 지정
dtypes = {
    'module(equipment)': 'category',
    'operation': 'float32',  # 비중 계산을 위해 float로 로드
    'activePower': 'float32',
    'voltageR': 'float32', 'currentR': 'float32' # 이상치 제거용
}

for i, chunk in enumerate(pd.read_csv(file_path, chunksize=chunk_size, dtype=dtypes)):
    # (1) 시간축 정렬
    chunk['localtime'] = pd.to_datetime(chunk['localtime'].astype(str), format='%Y%m%d%H%M%S')
    
    # (2) 물리적 이상치 제거
    # 가동상태(operation)가 0인데 전력(activePower)이 비정상적으로 높은 경우 처리
    chunk.loc[(chunk['operation'] == 0) & (chunk['activePower'] > 100), 'activePower'] = 0
    # 전압(voltage)이 0인데 전류(current)가 흐르는 모순 데이터 보정
    chunk.loc[(chunk['voltageR'] < 10) & (chunk['currentR'] > 0.5), 'currentR'] = 0
    
    chunk.set_index('localtime', inplace=True)
    
    # (3) 리샘플링 및 시간당 평균/비중 계산
    # activePower: 시간당 평균값(kW)은 1시간 동안의 사용량(kWh)과 같음
    # operation: 해당 시간 내 가동 시간 비중(0.0 ~ 1.0)으로 변환
    resampled = chunk.groupby('module(equipment)').resample('H').agg({
        'activePower': 'mean',
        'operation': 'mean', 
        'voltageR': 'mean', 'voltageS': 'mean', 'voltageT': 'mean',
        'voltageRS': 'mean', 'voltageST': 'mean', 'voltageTR': 'mean',
        'currentR': 'mean', 'currentS': 'mean', 'currentT': 'mean',
        'powerFactorR': 'mean', 'powerFactorS': 'mean', 'powerFactorT': 'mean',
        'reactivePowerLagging': 'mean',
        'accumActiveEnergy': 'max' # 누적값은 해당 시간의 마지막 값 유지
    }).reset_index()
    
    resampled_chunks.append(resampled)
    if i % 5 == 0: print(f"{i*chunk_size} 행 정제 중...")

# 2. 모든 데이터 통합 및 결측치 처리
df_hourly = pd.concat(resampled_chunks)
df_final = df_hourly.groupby(['localtime', 'module(equipment)']).mean().reset_index()

# (4) 결측치 처리: 선형 보간법(Linear Interpolation) 적용
df_final = df_final.sort_values(['module(equipment)', 'localtime'])
df_final = df_final.groupby('module(equipment)').apply(lambda x: x.interpolate(method='linear')).reset_index(drop=True)

# 3. 컬럼명 정리 및 저장
df_final = df_final.rename(columns={'localtime': 'datetime'})
df_final.to_csv(output_file, index=False)
print(f"'{output_file}' 생성")

데이터 정제 및 1시간 단위 리샘플링 시작


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


0 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


1000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


2000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


3000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


4000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


5000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


6000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


7000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


8000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


9000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


10000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


11000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


12000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


13000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


14000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


15000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


16000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


17000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


18000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


19000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


20000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


21000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


22000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


23000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


24000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


25000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


26000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


27000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


28000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


29000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


30000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


31000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


32000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({


33000000 행 정제 중...


  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  resampled = chunk.groupby('module(equipment)').resample('H').agg({
  df_final = df_final.groupby('module(equipment)').apply(lambda x: x.interpolate(method='linear')).reset_index(drop=True)
  df_final = df_final.groupby('module(equipment)').apply(lambda x: x.interpolate(method='linear')).reset_index(drop=True)
  df_final = df_final.groupby('module(equipment)').apply(lambda x: x.interpolate(method='linear')).reset_index(drop=True)
  df_final = df_final.groupby('module(equipment)').apply(lambda x: x.interpolate(method='linear')).reset_index(drop=True)
  df_final = df_final.groupby('module(equipment)').apply(lambda x: x.interpolate(method='linear'))

'hourly_refined_data.csv' 생성


In [10]:
df_final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46813 entries, 0 to 46812
Data columns (total 18 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   datetime              46813 non-null  datetime64[ns]
 1   module(equipment)     46813 non-null  object        
 2   activePower           46813 non-null  float32       
 3   operation             46813 non-null  float32       
 4   voltageR              46813 non-null  float32       
 5   voltageS              46813 non-null  float64       
 6   voltageT              46813 non-null  float64       
 7   voltageRS             46813 non-null  float64       
 8   voltageST             46813 non-null  float64       
 9   voltageTR             46813 non-null  float64       
 10  currentR              46813 non-null  float32       
 11  currentS              46813 non-null  float64       
 12  currentT              46813 non-null  float64       
 13  powerFactorR    

In [1]:
import pandas as pd

"""
설비별 전력 소비와 물리 변수 간 상관관계 분석

목적:
- 각 설비(module)별로 유효전력(activePower)과 가장 높은 통계적 연관성을 보이는
  물리 변수를 탐색하여,
- 이후 예측 모델의 핵심 입력 피처 선정을 위한 근거로 활용한다.

※ 본 분석은 상관관계(correlation) 기반의 탐색적 분석으로,
  인과관계를 직접적으로 증명하지는 않는다.
"""

# 1. 전처리 완료된 1시간 단위 데이터 로드
df = pd.read_csv('hourly_refined_data.csv')

# 2. 분석 대상 물리 변수 정의
# (전압, 전류, 역률, 무효전력 등 전력 소비와 물리적으로 연관된 변수들)
physical_cols = [
    'voltageR', 'voltageS', 'voltageT',
    'currentR', 'currentS', 'currentT',
    'powerFactorR', 'powerFactorS', 'powerFactorT',
    'reactivePowerLagging'
]

# 3. 설비별 분석 결과를 저장할 리스트
results = []

# 4. 설비(module) 단위 반복 분석
for module in df['module(equipment)'].unique():
    
    # 4-1. 특정 설비의 데이터만 필터링
    df_mod = df[df['module(equipment)'] == module]
    
    # 4-2. 유효전력(activePower)과 물리 변수 간 상관계수 계산
    # → Pearson 상관계수 사용 (pandas 기본)
    corr = (
        df_mod[['activePower'] + physical_cols]
        .corr()['activePower']
        .drop('activePower')
    )
    
    # 4-3. 절댓값 기준으로 activePower와 가장 강한 상관을 보이는 변수 선정
    top_var = corr.abs().idxmax()
    top_score = corr[top_var]
    
    # 4-4. 결과 정리
    results.append({
        '설비명': module,
        '전력 변동 설명 변수': top_var,
        '상관계수': round(top_score, 4),
        '관계 방향': '양의 상관(비례)' if top_score > 0 else '음의 상관(반비례)'
    })

# 5. 결과를 DataFrame으로 변환
df_factors = pd.DataFrame(results)

# 6. 상관계수 절댓값 기준으로 정렬 (영향력 크기 중심 해석)
df_factors = df_factors.reindex(
    df_factors['상관계수'].abs().sort_values(ascending=False).index
)

print("\n--- 설비별 전력 소비 상관관계 분석 결과 ---")
df_factors



--- 설비별 전력 소비 상관관계 분석 결과 ---


Unnamed: 0,설비명,전력 변동 설명 변수,상관계수,관계 방향
0,1(PM-3),reactivePowerLagging,0.7917,양의 상관(비례)
7,17(6호기),reactivePowerLagging,0.7718,양의 상관(비례)
5,15(예비건조기),reactivePowerLagging,0.7559,양의 상관(비례)
10,3(분쇄기(2)),reactivePowerLagging,0.7557,양의 상관(비례)
4,14(2호기),reactivePowerLagging,0.7197,양의 상관(비례)
11,4(분쇄기(1)),currentT,0.7129,양의 상관(비례)
9,2(L-1전등),currentS,0.7029,양의 상관(비례)
3,13(3호기),reactivePowerLagging,0.6813,양의 상관(비례)
12,5(좌측분전반),reactivePowerLagging,0.6782,양의 상관(비례)
2,12(4호기),reactivePowerLagging,0.6745,양의 상관(비례)
