# 05. Pandas 고급 분석 - 실습 문제

## 실습 안내
- 총 10개 문제
- 시계열 분석, 이동평균, OEE, SPC 등 실무 분석
- 종합적인 제조 데이터 분석 능력 배양
- 실제 공장 모니터링 대시보드 구축

## 데이터 로드 및 전처리

In [111]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# 데이터 불러오기
production_df = pd.read_csv('../data/05_production.csv', encoding='utf-8-sig')
quality_df = pd.read_csv('../data/07_quality_inspection.csv', encoding='utf-8-sig', na_values=['\\N'])
sensor_df = pd.read_csv('../data/08_sensor_data.csv', encoding='utf-8-sig')
operation_df = pd.read_csv('../data/06_equipment_operation.csv', encoding='utf-8-sig')
equipment_df = pd.read_csv('../data/01_equipment.csv', encoding='utf-8-sig')
maintenance_df = pd.read_csv('../data/10_maintenance_history.csv', encoding='utf-8-sig')

# 날짜/시간 변환
production_df['production_date'] = pd.to_datetime(production_df['production_date'])
production_df['start_time'] = pd.to_datetime(production_df['start_time'])
production_df['end_time'] = pd.to_datetime(production_df['end_time'])
quality_df['inspection_time'] = pd.to_datetime(quality_df['inspection_time'])
sensor_df['measurement_time'] = pd.to_datetime(sensor_df['measurement_time'])
operation_df['start_time'] = pd.to_datetime(operation_df['start_time'])
operation_df['end_time'] = pd.to_datetime(operation_df['end_time'])

print("데이터 로드 완료!")

데이터 로드 완료!


---
## 문제 1: 일별 생산량 추이 분석

**시나리오**: 일별 생산량 추이를 분석하여 생산 패턴을 파악하세요.

**요구사항**:
1. 일별로 다음 집계:
   - 생산 건수
   - 총 생산량
   - 총 불량수
   - 불량률 (%)
2. 처음 30일 데이터 출력
3. 불량률이 가장 높았던 날 찾기

**힌트**: `groupby('production_date')`, 계산 컬럼, `idxmax()`

In [112]:
# 1. 일별로 다음 집계:
#    - 생산 건수
#    - 총 생산량
#    - 총 불량수
#    - 불량률 (%)
production_df['불량률'] = (production_df['defect_quantity']/production_df['actual_quantity']*100).round(4)
prod_summary = production_df.groupby('production_date').agg({'production_id':'count',
                                                             'actual_quantity':'sum',
                                                             'defect_quantity':'sum',
                                                             '불량률':'mean'}).round(2)
prod_summary.columns = ['생산 건수', '총 생산량', '총 불량수', '불량률']
prod_summary

Unnamed: 0_level_0,생산 건수,총 생산량,총 불량수,불량률
production_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-01-01,20,2019,114,5.59
2024-01-02,22,2380,128,5.41
2024-01-03,16,1848,95,5.29
2024-01-04,22,2358,128,5.42
2024-01-05,20,2330,123,5.25
...,...,...,...,...
2024-03-27,22,2496,386,15.63
2024-03-28,20,2107,310,14.94
2024-03-29,22,2551,391,15.38
2024-03-30,20,2050,301,14.93


In [113]:
# 2. 처음 30일 데이터 출력
prod_summary.head(30)

Unnamed: 0_level_0,생산 건수,총 생산량,총 불량수,불량률
production_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-01-01,20,2019,114,5.59
2024-01-02,22,2380,128,5.41
2024-01-03,16,1848,95,5.29
2024-01-04,22,2358,128,5.42
2024-01-05,20,2330,123,5.25
2024-01-06,22,2503,138,5.59
2024-01-07,24,2831,146,5.26
2024-01-08,22,2471,130,5.2
2024-01-09,20,2210,114,5.21
2024-01-10,22,2491,126,5.13


In [114]:
# 3. 불량률이 가장 높았던 날 찾기
prod_summary['불량률'].idxmax()

Timestamp('2024-03-27 00:00:00')

---
## 문제 2: 센서 데이터 시간별 리샘플링

**시나리오**: INJ-001 설비의 센서 데이터를 시간 단위로 집계하세요.

**요구사항**:
1. sensor_df에서 equipment_id='INJ-001'인 데이터 필터링
2. measurement_time을 인덱스로 설정
3. 4시간 단위로 리샘플링하여 다음 집계:
   - temperature: 평균
   - pressure: 평균
   - vibration: 최대값
4. 처음 20개 결과 출력

**힌트**: `set_index()`, `resample('4H')`, `agg()`

In [115]:
# 1. sensor_df에서 equipment_id='INJ-001'인 데이터 필터링
sensor_1 = sensor_df[sensor_df['equipment_id'] == 'INJ-001'].copy()
sensor_1

Unnamed: 0,sensor_id,equipment_id,measurement_time,temperature,pressure,vibration,current,voltage,rpm,created_at
0,1,INJ-001,2024-01-01 00:00:00,183.93,148.65,2.6838,48.05,218.83,1795.32,2026-01-30 00:45:52
5,6,INJ-001,2024-01-01 01:00:00,179.26,155.23,2.6619,41.47,221.62,1792.30,2026-01-30 00:45:52
10,11,INJ-001,2024-01-01 02:00:00,179.78,146.19,2.3795,42.07,221.48,1805.22,2026-01-30 00:45:52
15,16,INJ-001,2024-01-01 03:00:00,181.30,143.83,2.4134,47.61,211.96,1803.69,2026-01-30 00:45:52
20,21,INJ-001,2024-01-01 04:00:00,179.96,152.57,2.4701,44.85,215.77,1769.70,2026-01-30 00:45:52
...,...,...,...,...,...,...,...,...,...,...
10895,10896,INJ-001,2024-03-31 19:00:00,194.25,152.07,2.5405,45.38,215.45,1817.42,2026-01-30 00:45:52
10900,10901,INJ-001,2024-03-31 20:00:00,192.63,145.85,2.4024,46.84,222.41,1773.20,2026-01-30 00:45:52
10905,10906,INJ-001,2024-03-31 21:00:00,196.04,149.04,2.5558,41.28,227.03,1830.42,2026-01-30 00:45:52
10910,10911,INJ-001,2024-03-31 22:00:00,194.29,145.70,2.4921,43.75,216.40,1778.40,2026-01-30 00:45:52


In [116]:
# 2. measurement_time을 인덱스로 설정
sensor_1.set_index('measurement_time', inplace=True)
sensor_1

Unnamed: 0_level_0,sensor_id,equipment_id,temperature,pressure,vibration,current,voltage,rpm,created_at
measurement_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-01-01 00:00:00,1,INJ-001,183.93,148.65,2.6838,48.05,218.83,1795.32,2026-01-30 00:45:52
2024-01-01 01:00:00,6,INJ-001,179.26,155.23,2.6619,41.47,221.62,1792.30,2026-01-30 00:45:52
2024-01-01 02:00:00,11,INJ-001,179.78,146.19,2.3795,42.07,221.48,1805.22,2026-01-30 00:45:52
2024-01-01 03:00:00,16,INJ-001,181.30,143.83,2.4134,47.61,211.96,1803.69,2026-01-30 00:45:52
2024-01-01 04:00:00,21,INJ-001,179.96,152.57,2.4701,44.85,215.77,1769.70,2026-01-30 00:45:52
...,...,...,...,...,...,...,...,...,...
2024-03-31 19:00:00,10896,INJ-001,194.25,152.07,2.5405,45.38,215.45,1817.42,2026-01-30 00:45:52
2024-03-31 20:00:00,10901,INJ-001,192.63,145.85,2.4024,46.84,222.41,1773.20,2026-01-30 00:45:52
2024-03-31 21:00:00,10906,INJ-001,196.04,149.04,2.5558,41.28,227.03,1830.42,2026-01-30 00:45:52
2024-03-31 22:00:00,10911,INJ-001,194.29,145.70,2.4921,43.75,216.40,1778.40,2026-01-30 00:45:52


In [117]:
# 3. 4시간 단위로 리샘플링하여 다음 집계:
#    - temperature: 평균
#    - pressure: 평균
#    - vibration: 최대값
sensor_1_4h = sensor_1.resample('4h').agg({'temperature':'mean',
                                           'pressure':'mean',
                                           'vibration':'max'}).round(2)
sensor_1_4h

Unnamed: 0_level_0,temperature,pressure,vibration
measurement_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-01-01 00:00:00,181.07,148.48,2.68
2024-01-01 04:00:00,179.98,152.66,2.59
2024-01-01 08:00:00,182.58,146.17,2.54
2024-01-01 12:00:00,179.02,148.13,2.80
2024-01-01 16:00:00,180.33,151.48,2.64
...,...,...,...
2024-03-31 04:00:00,195.04,150.07,2.66
2024-03-31 08:00:00,196.25,148.43,2.69
2024-03-31 12:00:00,196.20,149.14,2.77
2024-03-31 16:00:00,196.05,149.23,2.59


In [118]:
# 4. 처음 20개 결과 출력
sensor_1_4h.head(20)

Unnamed: 0_level_0,temperature,pressure,vibration
measurement_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-01-01 00:00:00,181.07,148.48,2.68
2024-01-01 04:00:00,179.98,152.66,2.59
2024-01-01 08:00:00,182.58,146.17,2.54
2024-01-01 12:00:00,179.02,148.13,2.8
2024-01-01 16:00:00,180.33,151.48,2.64
2024-01-01 20:00:00,180.96,149.12,2.97
2024-01-02 00:00:00,179.9,146.0,2.74
2024-01-02 04:00:00,178.24,151.72,2.7
2024-01-02 08:00:00,180.26,149.71,2.81
2024-01-02 12:00:00,181.34,149.94,2.81


---
## 문제 3: 생산량 이동평균 계산

**시나리오**: 일별 생산량에 이동평균을 적용하여 추세를 파악하세요.

**요구사항**:
1. 일별 총 생산량 집계
2. 7일 이동평균 계산
3. 30일 이동평균 계산
4. 원본, 7일MA, 30일MA를 함께 출력 (처음 60일)
5. 이동평균이 실제 생산량보다 높은 날 (하락 추세) 찾기

**힌트**: `rolling(window=n).mean()`, 비교 연산

In [119]:
# 1. 일별 총 생산량 집계
daily_prod = production_df.groupby('production_date')['actual_quantity'].sum()
daily_prod

production_date
2024-01-01    2019
2024-01-02    2380
2024-01-03    1848
2024-01-04    2358
2024-01-05    2330
              ... 
2024-03-27    2496
2024-03-28    2107
2024-03-29    2551
2024-03-30    2050
2024-03-31    2133
Name: actual_quantity, Length: 91, dtype: int64

In [120]:
# 2. 7일 이동평균 계산
daily_prod_7MA = daily_prod.rolling(7).mean().round(2)
daily_prod_7MA

production_date
2024-01-01        NaN
2024-01-02        NaN
2024-01-03        NaN
2024-01-04        NaN
2024-01-05        NaN
               ...   
2024-03-27    2199.43
2024-03-28    2151.86
2024-03-29    2238.29
2024-03-30    2192.86
2024-03-31    2195.14
Name: actual_quantity, Length: 91, dtype: float64

In [121]:
# 3. 30일 이동평균 계산
daily_prod_30MA = daily_prod.rolling(30).mean().round(2)
daily_prod_30MA

production_date
2024-01-01        NaN
2024-01-02        NaN
2024-01-03        NaN
2024-01-04        NaN
2024-01-05        NaN
               ...   
2024-03-27    2283.93
2024-03-28    2280.20
2024-03-29    2278.27
2024-03-30    2274.40
2024-03-31    2275.13
Name: actual_quantity, Length: 91, dtype: float64

In [122]:
# 4. 원본, 7일MA, 30일MA를 함께 출력 (처음 60일)
# daily_prod_analysis = pd.concat([daily_prod, daily_prod_7MA, daily_prod_30MA], axis=1)
# daily_prod_analysis.columns = ['actual_quantity', 'actual_quantity_7MA', 'actual_quantity_30MA']
daily_prod_analysis = pd.DataFrame(data={'생산량':daily_prod, '7일MA':daily_prod_7MA, '30일MA':daily_prod_30MA})
daily_prod_analysis.head(60)

Unnamed: 0_level_0,생산량,7일MA,30일MA
production_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-01-01,2019,,
2024-01-02,2380,,
2024-01-03,1848,,
2024-01-04,2358,,
2024-01-05,2330,,
2024-01-06,2503,,
2024-01-07,2831,2324.14,
2024-01-08,2471,2388.71,
2024-01-09,2210,2364.43,
2024-01-10,2491,2456.29,


In [123]:
# 5. 이동평균이 실제 생산량보다 높은 날 (하락 추세) 찾기
daily_prod_analysis[daily_prod_analysis['7일MA'] > daily_prod_analysis['생산량']]

Unnamed: 0_level_0,생산량,7일MA,30일MA
production_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-01-09,2210,2364.43,
2024-01-11,2254,2441.43,
2024-01-12,2376,2448.0,
2024-01-13,2325,2422.57,
2024-01-14,2168,2327.86,
2024-01-15,1960,2254.86,
2024-01-17,1813,2201.29,
2024-01-20,1722,2169.57,
2024-01-22,1996,2191.0,
2024-01-24,2135,2223.0,


---
## 문제 4: 전일 대비 생산량 변화 분석

**시나리오**: 일별 생산량의 전일 대비 변화를 분석하세요.

**요구사항**:
1. 일별 총 생산량 집계
2. 전일 생산량 추가 (`shift()`)
3. 전일 대비 증감량 계산 (`diff()`)
4. 전일 대비 변화율(%) 계산 (`pct_change()`)
5. 변화율이 가장 큰 날(증가) 상위 5개 출력
6. 변화율이 가장 작은 날(감소) 상위 5개 출력

**힌트**: shift, diff, pct_change, sort_values

In [124]:
# 1. 일별 총 생산량 집계
daily_prod = production_df.groupby('production_date')['actual_quantity'].sum()
daily_prod

production_date
2024-01-01    2019
2024-01-02    2380
2024-01-03    1848
2024-01-04    2358
2024-01-05    2330
              ... 
2024-03-27    2496
2024-03-28    2107
2024-03-29    2551
2024-03-30    2050
2024-03-31    2133
Name: actual_quantity, Length: 91, dtype: int64

In [125]:
# 2. 전일 생산량 추가 (`shift()`)
daily_prod.shift(1)

production_date
2024-01-01       NaN
2024-01-02    2019.0
2024-01-03    2380.0
2024-01-04    1848.0
2024-01-05    2358.0
               ...  
2024-03-27    2132.0
2024-03-28    2496.0
2024-03-29    2107.0
2024-03-30    2551.0
2024-03-31    2050.0
Name: actual_quantity, Length: 91, dtype: float64

In [126]:
# 3. 전일 대비 증감량 계산 (`diff()`)
daily_prod.diff()

production_date
2024-01-01      NaN
2024-01-02    361.0
2024-01-03   -532.0
2024-01-04    510.0
2024-01-05    -28.0
              ...  
2024-03-27    364.0
2024-03-28   -389.0
2024-03-29    444.0
2024-03-30   -501.0
2024-03-31     83.0
Name: actual_quantity, Length: 91, dtype: float64

In [127]:
# 4. 전일 대비 변화율(%) 계산 (`pct_change()`)
(daily_prod.pct_change()*100).round(2)

production_date
2024-01-01      NaN
2024-01-02    17.88
2024-01-03   -22.35
2024-01-04    27.60
2024-01-05    -1.19
              ...  
2024-03-27    17.07
2024-03-28   -15.58
2024-03-29    21.07
2024-03-30   -19.64
2024-03-31     4.05
Name: actual_quantity, Length: 91, dtype: float64

In [128]:
# 5. 변화율이 가장 큰 날(증가) 상위 5개 출력
prod_change = pd.DataFrame(data={'생산량':daily_prod,
                                 '전일생산량':daily_prod.shift(1),
                                 '증감량':daily_prod.diff(),
                                 '변화율':(daily_prod.pct_change()*100).round(2)})

# prod_change.sort_values('변화율', ascending=False).head(5)
prod_change.nlargest(5, '변화율')

Unnamed: 0_level_0,생산량,전일생산량,증감량,변화율
production_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-02-05,2684,1861.0,823.0,44.22
2024-01-18,2496,1813.0,683.0,37.67
2024-01-21,2282,1722.0,560.0,32.52
2024-03-02,2781,2111.0,670.0,31.74
2024-03-09,2433,1876.0,557.0,29.69


In [129]:
# 6. 변화율이 가장 작은 날(감소) 상위 5개 출력
# prod_change.sort_values('변화율').head(5)
prod_change.nsmallest(5, '변화율')

Unnamed: 0_level_0,생산량,전일생산량,증감량,변화율
production_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-01-20,1722,2515.0,-793.0,-31.53
2024-01-17,1813,2513.0,-700.0,-27.86
2024-02-04,1861,2443.0,-582.0,-23.82
2024-01-03,1848,2380.0,-532.0,-22.35
2024-03-22,1946,2440.0,-494.0,-20.25


---
## 문제 5: 센서 데이터 이상치 탐지 (3-Sigma)

**시나리오**: PRESS-001 설비의 압력 센서 이상치를 탐지하세요.

**요구사항**:
1. sensor_df에서 equipment_id='PRESS-001' 필터링
2. pressure 컬럼의 평균과 표준편차 계산
3. 3-Sigma 방식으로 상한/하한 계산:
   - 하한 = 평균 - 3×표준편차
   - 상한 = 평균 + 3×표준편차
4. 이상치 플래그 컬럼 생성
5. 이상치 건수 및 비율 출력
6. 이상치 데이터 처음 10개 출력

**힌트**: mean(), std(), 조건 연산

In [130]:
# 1. sensor_df에서 equipment_id='PRESS-001' 필터링
sensor_2 = sensor_df[sensor_df['equipment_id'] == 'PRESS-001'].copy()
sensor_2

Unnamed: 0,sensor_id,equipment_id,measurement_time,temperature,pressure,vibration,current,voltage,rpm,created_at
2,3,PRESS-001,2024-01-01 00:00:00,82.29,202.09,3.4924,120.14,372.88,489.11,2026-01-30 00:45:52
7,8,PRESS-001,2024-01-01 01:00:00,89.25,199.71,3.8572,120.72,376.77,507.23,2026-01-30 00:45:52
12,13,PRESS-001,2024-01-01 02:00:00,83.78,202.47,4.0678,119.62,381.51,499.31,2026-01-30 00:45:52
17,18,PRESS-001,2024-01-01 03:00:00,85.13,198.35,3.6726,118.05,383.94,523.17,2026-01-30 00:45:52
22,23,PRESS-001,2024-01-01 04:00:00,90.96,197.01,3.4260,121.03,382.57,510.30,2026-01-30 00:45:52
...,...,...,...,...,...,...,...,...,...,...
10897,10898,PRESS-001,2024-03-31 19:00:00,87.08,192.21,3.5833,118.62,372.98,476.41,2026-01-30 00:45:52
10902,10903,PRESS-001,2024-03-31 20:00:00,85.11,199.46,3.4071,120.77,379.07,512.11,2026-01-30 00:45:52
10907,10908,PRESS-001,2024-03-31 21:00:00,84.77,198.27,3.4478,120.93,379.83,523.54,2026-01-30 00:45:52
10912,10913,PRESS-001,2024-03-31 22:00:00,87.89,203.67,3.5983,121.21,378.43,491.99,2026-01-30 00:45:52


In [131]:
# 2. pressure 컬럼의 평균과 표준편차 계산
sensor_2_mean = sensor_2['pressure'].mean()
sensor_2_std = sensor_2['pressure'].std()
print(sensor_2_mean, sensor_2_std)

199.9600457875458 3.3883257307462116


In [132]:
# 3. 3-Sigma 방식으로 상한/하한 계산:
#    - 하한 = 평균 - 3×표준편차
#    - 상한 = 평균 + 3×표준편차
upper_bound = sensor_2_mean + 3*sensor_2_std
lower_bound = sensor_2_mean - 3*sensor_2_std
print(upper_bound, lower_bound)

210.12502297978443 189.79506859530716


In [133]:
# 4. 이상치 플래그 컬럼 생성
sensor_2['outlier'] = (sensor_2['pressure'] < lower_bound) | (sensor_2['pressure'] > upper_bound)
sensor_2

Unnamed: 0,sensor_id,equipment_id,measurement_time,temperature,pressure,vibration,current,voltage,rpm,created_at,outlier
2,3,PRESS-001,2024-01-01 00:00:00,82.29,202.09,3.4924,120.14,372.88,489.11,2026-01-30 00:45:52,False
7,8,PRESS-001,2024-01-01 01:00:00,89.25,199.71,3.8572,120.72,376.77,507.23,2026-01-30 00:45:52,False
12,13,PRESS-001,2024-01-01 02:00:00,83.78,202.47,4.0678,119.62,381.51,499.31,2026-01-30 00:45:52,False
17,18,PRESS-001,2024-01-01 03:00:00,85.13,198.35,3.6726,118.05,383.94,523.17,2026-01-30 00:45:52,False
22,23,PRESS-001,2024-01-01 04:00:00,90.96,197.01,3.4260,121.03,382.57,510.30,2026-01-30 00:45:52,False
...,...,...,...,...,...,...,...,...,...,...,...
10897,10898,PRESS-001,2024-03-31 19:00:00,87.08,192.21,3.5833,118.62,372.98,476.41,2026-01-30 00:45:52,False
10902,10903,PRESS-001,2024-03-31 20:00:00,85.11,199.46,3.4071,120.77,379.07,512.11,2026-01-30 00:45:52,False
10907,10908,PRESS-001,2024-03-31 21:00:00,84.77,198.27,3.4478,120.93,379.83,523.54,2026-01-30 00:45:52,False
10912,10913,PRESS-001,2024-03-31 22:00:00,87.89,203.67,3.5983,121.21,378.43,491.99,2026-01-30 00:45:52,False


In [134]:
# 5. 이상치 건수 및 비율 출력
print('outlier_cnt:', sum(sensor_2['outlier']))
print('outlier_pct:', round(sum(sensor_2['outlier'])/len(sensor_2)*100, 2), '%')

outlier_cnt: 6
outlier_pct: 0.27 %


In [135]:
# 6. 이상치 데이터 처음 10개 출력
sensor_2[sensor_2['outlier']].head(10)

Unnamed: 0,sensor_id,equipment_id,measurement_time,temperature,pressure,vibration,current,voltage,rpm,created_at,outlier
827,828,PRESS-001,2024-01-07 21:00:00,83.32,212.63,3.1379,117.29,383.67,458.46,2026-01-30 00:45:52,True
1467,1468,PRESS-001,2024-01-13 05:00:00,83.58,210.36,3.5575,119.84,377.42,490.4,2026-01-30 00:45:52,True
4427,4428,PRESS-001,2024-02-06 21:00:00,81.04,212.1,3.2676,123.47,383.09,503.62,2026-01-30 00:45:52,True
5402,5403,PRESS-001,2024-02-15 00:00:00,81.97,211.25,3.671,119.35,381.96,486.34,2026-01-30 00:45:52,True
5997,5998,PRESS-001,2024-02-19 23:00:00,83.74,188.69,3.842,118.22,377.65,521.25,2026-01-30 00:45:52,True
6862,6863,PRESS-001,2024-02-27 04:00:00,85.74,210.45,3.5355,124.14,388.72,495.46,2026-01-30 00:45:52,True


---
## 문제 6: OEE (Overall Equipment Effectiveness) 계산

**시나리오**: 생산 데이터에서 설비별 OEE를 계산하세요.

**요구사항**:
1. production_df에서 다음 계산:
   - 양품률 = good_quantity / actual_quantity
   - 실제 작업시간(분) = (end_time - start_time)
   - 성능률 = (actual_quantity × cycle_time) / 실제작업시간
   - 가동률 = 1.0 (간소화)
   - OEE = 가동률 × 성능률 × 양품률 × 100
2. 설비별 평균 OEE 계산
3. OEE 순위 출력 (높은 순)
4. OEE가 70% 미만인 설비 찾기

**힌트**: 시간 차이 계산, 복합 계산, groupby

In [136]:
# 1. production_df에서 다음 계산:
#    - 양품률 = good_quantity / actual_quantity
#    - 실제 작업시간(분) = (end_time - start_time)
#    - 성능률 = (actual_quantity × cycle_time) / 실제작업시간
#    - 가동률 = 1.0 (간소화)
#    - OEE = 가동률 × 성능률 × 양품률 × 100
production_df['양품률'] = (production_df['good_quantity']/production_df['actual_quantity']).round(4)
production_df['실제 작업시간(분)'] = ((production_df['end_time'] - production_df['start_time']).dt.total_seconds()/60).round(2)
production_df['성능률'] = ((production_df['actual_quantity']*production_df['cycle_time'])/(production_df['실제 작업시간(분)']*60)).clip(upper=1.0).round(4)
# production_df['성능률'] = (production_df['cycle_time']/production_df['실제 작업시간(분)']).clip(upper=1.0).round(4)
production_df['가동률'] = 1.0
production_df['OEE'] = (production_df['가동률']*production_df['성능률']*production_df['양품률']*100).round(2)
production_df

Unnamed: 0,production_id,equipment_id,product_code,production_date,start_time,end_time,target_quantity,actual_quantity,good_quantity,defect_quantity,...,operator_id,shift,created_at,updated_at,불량률,양품률,실제 작업시간(분),성능률,가동률,OEE
0,1,INJ-001,BUMPER-A,2024-01-01,2024-01-01 08:14:00,2024-01-01 09:53:32,97,81,77,4,...,OP003,DAY,2026-01-30 00:42:48,2026-01-30 00:42:48,4.9383,0.9506,99.53,1.0,1.0,95.06
1,2,INJ-001,BUMPER-A,2024-01-01,2024-01-01 21:02:00,2024-01-01 22:33:43,83,78,72,6,...,OP006,NIGHT,2026-01-30 00:42:48,2026-01-30 00:42:48,7.6923,0.9231,91.72,1.0,1.0,92.31
2,3,INJ-002,BUMPER-A,2024-01-01,2024-01-01 10:12:00,2024-01-01 13:16:28,149,135,132,3,...,OP001,DAY,2026-01-30 00:42:48,2026-01-30 00:42:48,2.2222,0.9778,184.47,1.0,1.0,97.78
3,4,INJ-002,DASH-C,2024-01-01,2024-01-01 12:48:00,2024-01-01 15:16:31,100,92,90,2,...,OP003,DAY,2026-01-30 00:42:48,2026-01-30 00:42:48,2.1739,0.9783,148.52,1.0,1.0,97.83
4,5,INJ-002,DOOR-B,2024-01-01,2024-01-01 20:48:00,2024-01-01 23:12:13,123,129,122,7,...,OP004,NIGHT,2026-01-30 00:42:48,2026-01-30 00:42:48,5.4264,0.9457,144.22,1.0,1.0,94.57
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1867,1868,PRESS-002,BUMPER-A,2024-03-31,2024-03-31 20:19:00,2024-03-31 23:25:19,150,144,119,25,...,OP006,NIGHT,2026-01-30 00:42:48,2026-01-30 00:42:48,17.3611,0.8264,186.32,1.0,1.0,82.64
1868,1869,PRESS-002,DASH-C,2024-03-31,2024-04-01 00:15:00,2024-04-01 02:59:58,136,130,109,21,...,OP004,NIGHT,2026-01-30 00:42:48,2026-01-30 00:42:48,16.1538,0.8385,164.97,1.0,1.0,83.85
1869,1870,PRESS-002,BUMPER-A,2024-03-31,2024-04-01 05:53:00,2024-04-01 07:26:15,84,80,66,14,...,OP004,NIGHT,2026-01-30 00:42:48,2026-01-30 00:42:48,17.5000,0.8250,93.25,1.0,1.0,82.50
1870,1871,ASM-001,BUMPER-A,2024-03-31,2024-03-31 10:24:00,2024-03-31 13:25:41,143,121,101,20,...,OP003,DAY,2026-01-30 00:42:48,2026-01-30 00:42:48,16.5289,0.8347,181.68,1.0,1.0,83.47


In [137]:
# 2. 설비별 평균 OEE 계산
eq_OEE = (production_df.groupby('equipment_id')['OEE'].mean()).round(2)
eq_OEE

equipment_id
ASM-001      87.88
INJ-001      89.24
INJ-002      91.30
PRESS-001    90.09
PRESS-002    89.32
Name: OEE, dtype: float64

In [138]:
# 3. OEE 순위 출력 (높은 순)
eq_OEE.sort_values(ascending=False)

equipment_id
INJ-002      91.30
PRESS-001    90.09
PRESS-002    89.32
INJ-001      89.24
ASM-001      87.88
Name: OEE, dtype: float64

In [139]:
# 4. OEE가 70% 미만인 설비 찾기
eq_OEE[eq_OEE < 70]

Series([], Name: OEE, dtype: float64)

---
## 문제 7: SPC 관리도 데이터 생성

**시나리오**: DASH-C 제품의 측정값에 대한 SPC X-bar 관리도를 생성하세요.

**요구사항**:
1. quality_df에서 product_code='DASH-C' 필터링
2. 일별로 measurement_value의 평균, 최소, 최대, 표준편차 계산
3. 전체 일별 평균의 중심선(CL) 계산
4. 관리상한선(UCL) = CL + 3×표준편차
5. 관리하한선(LCL) = CL - 3×표준편차
6. 관리 이탈 일자 찾기 (평균이 UCL 초과 또는 LCL 미만)
7. 처음 20일 데이터 출력

**힌트**: 날짜 추출, groupby, 통계 계산, 조건 필터링

In [140]:
# 1. quality_df에서 product_code='DASH-C' 필터링
qual_1 = quality_df[quality_df['product_code'] == 'DASH-C'].copy()
qual_1


Unnamed: 0,inspection_id,production_id,equipment_id,product_code,inspection_time,inspection_type,result,defect_code,measurement_value,measurement_unit,inspector_id,lot_no,sample_size,notes,created_at
37,38,4,INJ-002,DASH-C,2024-01-01 13:12:49,FINAL,PASS,,399.6176,mm,OP007,LOT2024010100202,1,,2026-01-30 01:24:59
38,39,4,INJ-002,DASH-C,2024-01-01 13:17:56,FINAL,PASS,,400.6489,mm,OP007,LOT2024010100202,1,,2026-01-30 01:24:59
39,40,4,INJ-002,DASH-C,2024-01-01 13:31:41,FINAL,PASS,,397.8915,mm,OP008,LOT2024010100202,1,,2026-01-30 01:24:59
40,41,4,INJ-002,DASH-C,2024-01-01 13:05:20,FINAL,PASS,,401.8200,mm,OP008,LOT2024010100202,1,,2026-01-30 01:24:59
41,42,4,INJ-002,DASH-C,2024-01-01 14:32:12,FINAL,PASS,,404.6305,mm,OP008,LOT2024010100202,1,,2026-01-30 01:24:59
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37339,37340,1869,PRESS-002,DASH-C,2024-04-01 02:52:41,FINAL,FAIL,D003,388.9337,mm,OP007,LOT2024033100211,1,,2026-01-30 01:25:02
37340,37341,1869,PRESS-002,DASH-C,2024-04-01 00:37:26,FINAL,FAIL,D003,406.3608,mm,OP007,LOT2024033100211,1,,2026-01-30 01:25:02
37341,37342,1869,PRESS-002,DASH-C,2024-04-01 00:56:32,FINAL,FAIL,D003,410.8572,mm,OP007,LOT2024033100211,1,,2026-01-30 01:25:02
37342,37343,1869,PRESS-002,DASH-C,2024-04-01 00:34:54,FINAL,FAIL,D005,387.0993,mm,OP008,LOT2024033100211,1,,2026-01-30 01:25:02


In [141]:
# 2. 일별로 measurement_value의 평균, 최소, 최대, 표준편차 계산
qual_1_daily = qual_1.groupby(qual_1['inspection_time'].dt.date)['measurement_value'].agg(['mean', 'min', 'max', 'std']).round(2)
qual_1_daily

Unnamed: 0_level_0,mean,min,max,std
inspection_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-01-01,400.53,385.78,414.00,6.14
2024-01-02,399.22,385.11,414.54,6.86
2024-01-03,399.32,385.11,414.22,7.11
2024-01-04,401.06,385.31,414.87,6.62
2024-01-05,400.29,385.30,414.45,6.55
...,...,...,...,...
2024-03-28,401.51,385.66,414.47,8.71
2024-03-29,399.34,385.08,414.68,9.06
2024-03-30,398.84,385.38,414.46,8.29
2024-03-31,401.13,385.17,414.69,8.32


In [142]:
# 3. 전체 일별 평균의 중심선(CL) 계산
qual_1_daily_CL = qual_1_daily['mean'].mean()
qual_1_daily_CL

np.float64(400.0377173913042)

In [143]:
# 4. 관리상한선(UCL) = CL + 3×표준편차qual_1_daily_UCL
qual_1_daily_UCL = qual_1_daily_CL + 3*qual_1_daily['mean'].std()
qual_1_daily_UCL

np.float64(402.30819390097884)

In [144]:
# 5. 관리하한선(LCL) = CL - 3×표준편차
qual_1_daily_LCL = qual_1_daily_CL - 3*qual_1_daily['mean'].std()
qual_1_daily_LCL

np.float64(397.7672408816296)

In [145]:
# 6. 관리 이탈 일자 찾기 (평균이 UCL 초과 또는 LCL 미만)
# qual_1_daily[(qual_1_daily['mean'] > qual_1_daily_UCL) | (qual_1_daily['mean'] < qual_1_daily_LCL)]
# 관리상태 컬럼을 만들되, 초기값은 OK로 셋팅하고, UCL과 LCL 밖의 데이터는 따로 표시
qual_1_daily['관리상태'] = 'OK'
qual_1_daily.loc[(qual_1_daily['mean'] > qual_1_daily_UCL), '관리상태'] = 'UCL초과'
qual_1_daily.loc[(qual_1_daily['mean'] < qual_1_daily_LCL), '관리상태'] = 'LCL미만'
qual_1_daily[qual_1_daily['관리상태'] != 'OK']

Unnamed: 0_level_0,mean,min,max,std,관리상태
inspection_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1


In [146]:
# 7. 처음 20일 데이터 출력

---
## 문제 8: 월별 생산 트렌드 및 변화율 분석

**시나리오**: 월별 생산 추이와 전월 대비 변화율을 분석하세요.

**요구사항**:
1. production_date에서 년-월 추출
2. 월별로 다음 집계:
   - 생산 건수
   - 총 생산량
   - 평균 불량률
   - 설비 가동 수 (equipment_id unique count)
3. 전월 대비 생산량 변화율(%) 계산
4. 전월 대비 불량률 변화(차이) 계산
5. 처음 12개월 데이터 출력

**힌트**: `dt.to_period('M')`, pct_change, diff

In [147]:
# 1. production_date에서 년-월 추출
production_df['year-month'] = production_df['production_date'].dt.to_period('M')
production_df

Unnamed: 0,production_id,equipment_id,product_code,production_date,start_time,end_time,target_quantity,actual_quantity,good_quantity,defect_quantity,...,shift,created_at,updated_at,불량률,양품률,실제 작업시간(분),성능률,가동률,OEE,year-month
0,1,INJ-001,BUMPER-A,2024-01-01,2024-01-01 08:14:00,2024-01-01 09:53:32,97,81,77,4,...,DAY,2026-01-30 00:42:48,2026-01-30 00:42:48,4.9383,0.9506,99.53,1.0,1.0,95.06,2024-01
1,2,INJ-001,BUMPER-A,2024-01-01,2024-01-01 21:02:00,2024-01-01 22:33:43,83,78,72,6,...,NIGHT,2026-01-30 00:42:48,2026-01-30 00:42:48,7.6923,0.9231,91.72,1.0,1.0,92.31,2024-01
2,3,INJ-002,BUMPER-A,2024-01-01,2024-01-01 10:12:00,2024-01-01 13:16:28,149,135,132,3,...,DAY,2026-01-30 00:42:48,2026-01-30 00:42:48,2.2222,0.9778,184.47,1.0,1.0,97.78,2024-01
3,4,INJ-002,DASH-C,2024-01-01,2024-01-01 12:48:00,2024-01-01 15:16:31,100,92,90,2,...,DAY,2026-01-30 00:42:48,2026-01-30 00:42:48,2.1739,0.9783,148.52,1.0,1.0,97.83,2024-01
4,5,INJ-002,DOOR-B,2024-01-01,2024-01-01 20:48:00,2024-01-01 23:12:13,123,129,122,7,...,NIGHT,2026-01-30 00:42:48,2026-01-30 00:42:48,5.4264,0.9457,144.22,1.0,1.0,94.57,2024-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1867,1868,PRESS-002,BUMPER-A,2024-03-31,2024-03-31 20:19:00,2024-03-31 23:25:19,150,144,119,25,...,NIGHT,2026-01-30 00:42:48,2026-01-30 00:42:48,17.3611,0.8264,186.32,1.0,1.0,82.64,2024-03
1868,1869,PRESS-002,DASH-C,2024-03-31,2024-04-01 00:15:00,2024-04-01 02:59:58,136,130,109,21,...,NIGHT,2026-01-30 00:42:48,2026-01-30 00:42:48,16.1538,0.8385,164.97,1.0,1.0,83.85,2024-03
1869,1870,PRESS-002,BUMPER-A,2024-03-31,2024-04-01 05:53:00,2024-04-01 07:26:15,84,80,66,14,...,NIGHT,2026-01-30 00:42:48,2026-01-30 00:42:48,17.5000,0.8250,93.25,1.0,1.0,82.50,2024-03
1870,1871,ASM-001,BUMPER-A,2024-03-31,2024-03-31 10:24:00,2024-03-31 13:25:41,143,121,101,20,...,DAY,2026-01-30 00:42:48,2026-01-30 00:42:48,16.5289,0.8347,181.68,1.0,1.0,83.47,2024-03


In [148]:
# 2. 월별로 다음 집계:
#    - 생산 건수
#    - 총 생산량
#    - 평균 불량률
#    - 설비 가동 수 (equipment_id unique count)

monthly_prod = production_df.groupby('year-month').agg({'production_id':'count',
                                                        'actual_quantity':'sum',
                                                        '불량률':'mean',
                                                        'equipment_id':'nunique'}).round(2)
monthly_prod.columns = ['생산 건수', '총 생산량', '평균 불량률', '설비 가동 수']
monthly_prod

Unnamed: 0_level_0,생산 건수,총 생산량,평균 불량률,설비 가동 수
year-month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-01,626,69849,5.3,5
2024-02,602,66390,10.36,5
2024-03,644,70365,14.87,5


In [149]:
# 3. 전월 대비 생산량 변화율(%) 계산
(monthly_prod['총 생산량'].pct_change()*100).round(2)

year-month
2024-01     NaN
2024-02   -4.95
2024-03    5.99
Freq: M, Name: 총 생산량, dtype: float64

In [150]:
# 4. 전월 대비 불량률 변화(차이) 계산
monthly_prod['평균 불량률'].diff()

year-month
2024-01     NaN
2024-02    5.06
2024-03    4.51
Freq: M, Name: 평균 불량률, dtype: float64

In [151]:
# 5. 처음 12개월 데이터 출력
monthly_prod.head(12)

Unnamed: 0_level_0,생산 건수,총 생산량,평균 불량률,설비 가동 수
year-month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-01,626,69849,5.3,5
2024-02,602,66390,10.36,5
2024-03,644,70365,14.87,5


---
## 문제 9: 설비 고장 예측을 위한 특성 생성

**시나리오**: 설비 정비 이력과 생산/센서 데이터를 결합하여 고장 예측 특성을 생성하세요.

**요구사항**:
1. 설비별 정비 이력 집계:
   - 총 정비 건수
   - 총 정지 시간
   - 고장 정비 건수 (maintenance_type='BREAKDOWN')
2. 설비별 생산 집계:
   - 평균 사이클 타임
   - 평균 불량률
3. 설비별 센서 집계 (최근 30일):
   - 평균 온도
   - 평균 진동
   - 온도 표준편차
4. equipment_df에 위 세 집계를 모두 결합
5. 위험도 점수 계산:
   - 위험도 = (고장건수 × 10) + (평균불량률 × 5) + (온도표준편차 × 2)
6. 위험도 상위 5개 설비 출력

**힌트**: 각각 집계 후 순차적 merge, 복합 계산

In [152]:
# 1. 설비별 정비 이력 집계:
#    - 총 정비 건수
#    - 총 정지 시간
#    - 고장 정비 건수 (maintenance_type='BREAKDOWN')
def breakdown_cnt(x):
    return (x == 'BREAKDOWN').sum()

main_summary = maintenance_df.groupby('equipment_id').agg({'start_time':'count',
                                                           'downtime_hours':'sum',
                                                           'maintenance_type':breakdown_cnt}).round(2)
main_summary.columns = ['총 정비 건수', '총 정지 시간', '고장 정비 건수']
main_summary

Unnamed: 0_level_0,총 정비 건수,총 정지 시간,고장 정비 건수
equipment_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ASM-001,19,30.79,3
INJ-001,17,27.85,2
INJ-002,23,40.36,5
PRESS-001,18,29.69,2
PRESS-002,21,37.71,4


In [153]:
# 2. 설비별 생산 집계:
#    - 평균 사이클 타임
#    - 평균 불량률

prod_summary = production_df.groupby('equipment_id').agg({'cycle_time':'mean',
                                                           '불량률':'mean'}).round(2)
prod_summary.columns = ['평균 사이클 타임', '평균 불량률']
prod_summary

Unnamed: 0_level_0,평균 사이클 타임,평균 불량률
equipment_id,Unnamed: 1_level_1,Unnamed: 2_level_1
ASM-001,94.43,12.12
INJ-001,71.71,10.76
INJ-002,77.83,8.7
PRESS-001,73.57,9.91
PRESS-002,72.56,10.68


In [154]:
# 3. 설비별 센서 집계 (최근 30일):
#    - 평균 온도
#    - 평균 진동
#    - 온도 표준편차
recent_30 = sensor_df[sensor_df['measurement_time'] > sensor_df['measurement_time'].max() - pd.Timedelta(days=30)]
sensor_summary = recent_30.groupby('equipment_id').agg({'temperature':['mean', 'std'],
                                                        'vibration':'mean'}).round(4)
sensor_summary.columns = ['평균 온도', '온도 표준편차', '평균 진동']
sensor_summary

Unnamed: 0_level_0,평균 온도,온도 표준편차,평균 진동
equipment_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ASM-001,25.0252,2.2377,1.1957
INJ-001,192.5466,2.5545,2.499
INJ-002,174.9354,2.2355,2.2871
PRESS-001,85.0486,2.1992,3.4934
PRESS-002,88.097,2.0991,4.0808


In [155]:
# 4. equipment_df에 위 세 집계를 모두 결합
eq_summary = pd.merge(equipment_df, main_summary, on='equipment_id', how='left')
eq_summary = pd.merge(eq_summary, prod_summary, on='equipment_id', how='left')
eq_summary = pd.merge(eq_summary, sensor_summary, on='equipment_id', how='left')
eq_summary

Unnamed: 0,equipment_id,equipment_name,equipment_type,location,rated_capacity,installation_date,status,created_at,updated_at,총 정비 건수,총 정지 시간,고장 정비 건수,평균 사이클 타임,평균 불량률,평균 온도,온도 표준편차,평균 진동
0,INJ-001,사출기 1호기,사출기,A동 1라인,150.0,2020-03-15,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00,17,27.85,2,71.71,10.76,192.5466,2.5545,2.499
1,INJ-002,사출기 2호기,사출기,A동 1라인,150.0,2021-06-20,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00,23,40.36,5,77.83,8.7,174.9354,2.2355,2.2871
2,PRESS-001,프레스 1호기,프레스,A동 2라인,200.0,2019-05-10,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00,18,29.69,2,73.57,9.91,85.0486,2.1992,3.4934
3,PRESS-002,프레스 2호기,프레스,A동 2라인,200.0,2022-08-25,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00,21,37.71,4,72.56,10.68,88.097,2.0991,4.0808
4,ASM-001,조립라인 1호기,조립라인,B동 1라인,100.0,2020-11-30,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00,19,30.79,3,94.43,12.12,25.0252,2.2377,1.1957


In [156]:
# 5. 위험도 점수 계산:
#    - 위험도 = (고장건수 × 10) + (평균불량률 × 5) + (온도표준편차 × 2)
eq_summary['위험도'] = eq_summary['고장 정비 건수']*10 \
                      + eq_summary['평균 불량률']*5 \
                      + eq_summary['온도 표준편차']*2
eq_summary['위험도'] = eq_summary['위험도'].round(2)
eq_summary

Unnamed: 0,equipment_id,equipment_name,equipment_type,location,rated_capacity,installation_date,status,created_at,updated_at,총 정비 건수,총 정지 시간,고장 정비 건수,평균 사이클 타임,평균 불량률,평균 온도,온도 표준편차,평균 진동,위험도
0,INJ-001,사출기 1호기,사출기,A동 1라인,150.0,2020-03-15,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00,17,27.85,2,71.71,10.76,192.5466,2.5545,2.499,78.91
1,INJ-002,사출기 2호기,사출기,A동 1라인,150.0,2021-06-20,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00,23,40.36,5,77.83,8.7,174.9354,2.2355,2.2871,97.97
2,PRESS-001,프레스 1호기,프레스,A동 2라인,200.0,2019-05-10,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00,18,29.69,2,73.57,9.91,85.0486,2.1992,3.4934,73.95
3,PRESS-002,프레스 2호기,프레스,A동 2라인,200.0,2022-08-25,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00,21,37.71,4,72.56,10.68,88.097,2.0991,4.0808,97.6
4,ASM-001,조립라인 1호기,조립라인,B동 1라인,100.0,2020-11-30,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00,19,30.79,3,94.43,12.12,25.0252,2.2377,1.1957,95.08


In [157]:
# 6. 위험도 상위 5개 설비 출력
eq_summary.nlargest(5, '위험도')

Unnamed: 0,equipment_id,equipment_name,equipment_type,location,rated_capacity,installation_date,status,created_at,updated_at,총 정비 건수,총 정지 시간,고장 정비 건수,평균 사이클 타임,평균 불량률,평균 온도,온도 표준편차,평균 진동,위험도
1,INJ-002,사출기 2호기,사출기,A동 1라인,150.0,2021-06-20,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00,23,40.36,5,77.83,8.7,174.9354,2.2355,2.2871,97.97
3,PRESS-002,프레스 2호기,프레스,A동 2라인,200.0,2022-08-25,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00,21,37.71,4,72.56,10.68,88.097,2.0991,4.0808,97.6
4,ASM-001,조립라인 1호기,조립라인,B동 1라인,100.0,2020-11-30,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00,19,30.79,3,94.43,12.12,25.0252,2.2377,1.1957,95.08
0,INJ-001,사출기 1호기,사출기,A동 1라인,150.0,2020-03-15,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00,17,27.85,2,71.71,10.76,192.5466,2.5545,2.499,78.91
2,PRESS-001,프레스 1호기,프레스,A동 2라인,200.0,2019-05-10,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00,18,29.69,2,73.57,9.91,85.0486,2.1992,3.4934,73.95


---
## 문제 10: 종합 제조 대시보드 생성

**시나리오**: 경영진을 위한 종합 제조 대시보드 데이터를 생성하세요.

**요구사항**:

### Part A: 전체 현황 (최근 30일)
1. 최근 30일 데이터 필터링
2. 다음 KPI 계산:
   - 총 생산량
   - 평균 일 생산량
   - 전체 불량률
   - 가동 설비 수
   - 평균 OEE

### Part B: 설비 타입별 분석
1. equipment_df와 production_df 결합
2. 설비 타입별로 집계:
   - 설비 수
   - 총 생산량
   - 평균 불량률
   - 평균 OEE

### Part C: Top/Bottom 설비
1. OEE 상위 3개 설비
2. 불량률 하위 3개 설비 (낮을수록 좋음)
3. 생산량 상위 3개 설비

### Part D: 일별 추이 (최근 30일)
1. 일별 생산량 및 7일 이동평균
2. 일별 불량률 및 7일 이동평균

모든 결과를 출력하세요.

**힌트**: 날짜 필터링, 복합 집계, merge, rolling, 종합적 데이터 처리

Part A: 전체 현황

In [158]:
# 1. 최근 30일 데이터 필터링
recent_30_prod = production_df[production_df['production_date'] > production_df['production_date'].max() - pd.Timedelta(days=30)]
recent_30_prod

Unnamed: 0,production_id,equipment_id,product_code,production_date,start_time,end_time,target_quantity,actual_quantity,good_quantity,defect_quantity,...,shift,created_at,updated_at,불량률,양품률,실제 작업시간(분),성능률,가동률,OEE,year-month
1248,1249,INJ-001,DASH-C,2024-03-02,2024-03-02 08:10:00,2024-03-02 10:33:21,111,105,90,15,...,DAY,2026-01-30 00:42:48,2026-01-30 00:42:48,14.2857,0.8571,143.35,1.0,1.0,85.71,2024-03
1249,1250,INJ-001,DASH-C,2024-03-02,2024-03-02 13:28:00,2024-03-02 16:09:45,141,133,116,17,...,DAY,2026-01-30 00:42:48,2026-01-30 00:42:48,12.7820,0.8722,161.75,1.0,1.0,87.22,2024-03
1250,1251,INJ-001,DOOR-B,2024-03-02,2024-03-02 22:37:00,2024-03-02 23:59:59,121,114,96,18,...,NIGHT,2026-01-30 00:42:48,2026-01-30 00:42:48,15.7895,0.8421,82.98,1.0,1.0,84.21,2024-03
1251,1252,INJ-001,BUMPER-A,2024-03-02,2024-03-03 01:11:00,2024-03-03 03:45:36,130,123,104,19,...,NIGHT,2026-01-30 00:42:48,2026-01-30 00:42:48,15.4472,0.8455,154.60,1.0,1.0,84.55,2024-03
1252,1253,INJ-002,BUMPER-A,2024-03-02,2024-03-02 08:29:00,2024-03-02 10:35:05,105,110,95,15,...,DAY,2026-01-30 00:42:48,2026-01-30 00:42:48,13.6364,0.8636,126.08,1.0,1.0,86.36,2024-03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1867,1868,PRESS-002,BUMPER-A,2024-03-31,2024-03-31 20:19:00,2024-03-31 23:25:19,150,144,119,25,...,NIGHT,2026-01-30 00:42:48,2026-01-30 00:42:48,17.3611,0.8264,186.32,1.0,1.0,82.64,2024-03
1868,1869,PRESS-002,DASH-C,2024-03-31,2024-04-01 00:15:00,2024-04-01 02:59:58,136,130,109,21,...,NIGHT,2026-01-30 00:42:48,2026-01-30 00:42:48,16.1538,0.8385,164.97,1.0,1.0,83.85,2024-03
1869,1870,PRESS-002,BUMPER-A,2024-03-31,2024-04-01 05:53:00,2024-04-01 07:26:15,84,80,66,14,...,NIGHT,2026-01-30 00:42:48,2026-01-30 00:42:48,17.5000,0.8250,93.25,1.0,1.0,82.50,2024-03
1870,1871,ASM-001,BUMPER-A,2024-03-31,2024-03-31 10:24:00,2024-03-31 13:25:41,143,121,101,20,...,DAY,2026-01-30 00:42:48,2026-01-30 00:42:48,16.5289,0.8347,181.68,1.0,1.0,83.47,2024-03


In [159]:
# 2. 다음 KPI 계산:
#    - 총 생산량
#    - 평균 일 생산량
#    - 전체 불량률
#    - 가동 설비 수
#    - 평균 OEE

print(f"총 생산량: {recent_30_prod['actual_quantity'].sum()} 개")
print(f"평균 일 생산량: {(recent_30_prod.groupby(production_df['production_date'])['actual_quantity'].sum().mean()).round(2)} 개")
print(f"전체 불량률: {(production_df['불량률'].mean()).round(2)} %")
print(f"가동 설비 수: {production_df['equipment_id'].nunique()} 개")
print(f"평균 OEE: {(production_df['OEE'].mean()).round(2)}")

총 생산량: 68254 개
평균 일 생산량: 2275.13 개
전체 불량률: 10.22 %
가동 설비 수: 5 개
평균 OEE: 89.78


Part B: 설비 타입별 분석

In [160]:
# 1. equipment_df와 production_df 결합
eq_prod = pd.merge(production_df, equipment_df, on='equipment_id', how='left')
eq_prod

Unnamed: 0,production_id,equipment_id,product_code,production_date,start_time,end_time,target_quantity,actual_quantity,good_quantity,defect_quantity,...,OEE,year-month,equipment_name,equipment_type,location,rated_capacity,installation_date,status,created_at_y,updated_at_y
0,1,INJ-001,BUMPER-A,2024-01-01,2024-01-01 08:14:00,2024-01-01 09:53:32,97,81,77,4,...,95.06,2024-01,사출기 1호기,사출기,A동 1라인,150.0,2020-03-15,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00
1,2,INJ-001,BUMPER-A,2024-01-01,2024-01-01 21:02:00,2024-01-01 22:33:43,83,78,72,6,...,92.31,2024-01,사출기 1호기,사출기,A동 1라인,150.0,2020-03-15,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00
2,3,INJ-002,BUMPER-A,2024-01-01,2024-01-01 10:12:00,2024-01-01 13:16:28,149,135,132,3,...,97.78,2024-01,사출기 2호기,사출기,A동 1라인,150.0,2021-06-20,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00
3,4,INJ-002,DASH-C,2024-01-01,2024-01-01 12:48:00,2024-01-01 15:16:31,100,92,90,2,...,97.83,2024-01,사출기 2호기,사출기,A동 1라인,150.0,2021-06-20,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00
4,5,INJ-002,DOOR-B,2024-01-01,2024-01-01 20:48:00,2024-01-01 23:12:13,123,129,122,7,...,94.57,2024-01,사출기 2호기,사출기,A동 1라인,150.0,2021-06-20,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1867,1868,PRESS-002,BUMPER-A,2024-03-31,2024-03-31 20:19:00,2024-03-31 23:25:19,150,144,119,25,...,82.64,2024-03,프레스 2호기,프레스,A동 2라인,200.0,2022-08-25,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00
1868,1869,PRESS-002,DASH-C,2024-03-31,2024-04-01 00:15:00,2024-04-01 02:59:58,136,130,109,21,...,83.85,2024-03,프레스 2호기,프레스,A동 2라인,200.0,2022-08-25,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00
1869,1870,PRESS-002,BUMPER-A,2024-03-31,2024-04-01 05:53:00,2024-04-01 07:26:15,84,80,66,14,...,82.50,2024-03,프레스 2호기,프레스,A동 2라인,200.0,2022-08-25,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00
1870,1871,ASM-001,BUMPER-A,2024-03-31,2024-03-31 10:24:00,2024-03-31 13:25:41,143,121,101,20,...,83.47,2024-03,조립라인 1호기,조립라인,B동 1라인,100.0,2020-11-30,ACTIVE,2024-01-01 00:00:00,2024-01-01 00:00:00


In [161]:
# 2. 설비 타입별로 집계:
#    - 설비 수
#    - 총 생산량
#    - 평균 불량률
#    - 평균 OEE
eq_type_summary = eq_prod.groupby('equipment_type').agg({'equipment_id':'nunique',
                                                         'actual_quantity':'sum',
                                                         '불량률':'mean',
                                                         'OEE':'mean'}).round(2)
eq_type_summary.columns = ['설비 수', '총 생산량', '평균 불량률', '평균 OEE']
eq_type_summary

Unnamed: 0_level_0,설비 수,총 생산량,평균 불량률,평균 OEE
equipment_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
사출기,2,80121,9.48,90.52
조립라인,1,22485,12.12,87.88
프레스,2,103998,10.3,89.7


Part C: Top/Bottom 설비

In [162]:
# 1. OEE 상위 3개 설비
production_df.groupby('equipment_id')['OEE'].mean().round(2).nlargest(3)

equipment_id
INJ-002      91.30
PRESS-001    90.09
PRESS-002    89.32
Name: OEE, dtype: float64

In [163]:
# 2. 불량률 하위 3개 설비 (낮을수록 좋음)
production_df.groupby('equipment_id')['불량률'].mean().round(2).nsmallest(3)

equipment_id
INJ-002       8.70
PRESS-001     9.91
PRESS-002    10.68
Name: 불량률, dtype: float64

In [164]:
# 3. 생산량 상위 3개 설비
production_df.groupby('equipment_id')['actual_quantity'].sum().round(2).nlargest(3)

equipment_id
PRESS-001    52069
INJ-002      51958
PRESS-002    51929
Name: actual_quantity, dtype: int64

Part D: 일별 추이

In [165]:
# 1. 일별 생산량 및 7일 이동평균
daily_prod_7MA = production_df.groupby('production_date')['actual_quantity'].sum().rolling(7).mean().round(2)
daily_prod_7MA

production_date
2024-01-01        NaN
2024-01-02        NaN
2024-01-03        NaN
2024-01-04        NaN
2024-01-05        NaN
               ...   
2024-03-27    2199.43
2024-03-28    2151.86
2024-03-29    2238.29
2024-03-30    2192.86
2024-03-31    2195.14
Name: actual_quantity, Length: 91, dtype: float64

In [166]:
# 2. 일별 불량률 및 7일 이동평균
daily_dfrate_7MA = production_df.groupby('production_date')['불량률'].mean().rolling(7).mean().round(2)
daily_dfrate_7MA

production_date
2024-01-01      NaN
2024-01-02      NaN
2024-01-03      NaN
2024-01-04      NaN
2024-01-05      NaN
              ...  
2024-03-27    14.94
2024-03-28    14.98
2024-03-29    15.03
2024-03-30    15.04
2024-03-31    14.98
Name: 불량률, Length: 91, dtype: float64

---
## 수고하셨습니다!

### 학습 완료 체크리스트
- [ ] 시계열 데이터 집계 (resample)
- [ ] 이동평균 계산 (rolling)
- [ ] 변화율 및 차분 분석 (shift, diff, pct_change)
- [ ] 이상치 탐지 (3-Sigma, IQR)
- [ ] OEE 계산 및 설비 효율 분석
- [ ] SPC 관리도 데이터 생성
- [ ] 다중 테이블 결합 및 종합 분석
- [ ] 실무 대시보드 데이터 구축

