# Cycle Time 예측

## 라이브러리 import

In [129]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from glob import glob
import os, random, time, gc, warnings

from tqdm import tqdm_notebook

import lightgbm as lgbm
from sklearn.linear_model import Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import mean_squared_error

from sklearn.feature_selection import RFECV


from sklearn.cluster import KMeans

from datetime import datetime

from math import sqrt
import io

warnings.filterwarnings('ignore')

pd.set_option('max_columns', 500)
pd.set_option('max_rows', 500)

%matplotlib inline

## 데이터 전처리

### 데이터 확인

In [130]:
#데이터 불러오기
df=pd.read_csv('D:\\test2\\data-0409-table5-2.csv')
print(df.shape)
df.tail()

(235, 15)


Unnamed: 0,id,Uid,completed,CycleNumber,MCIn1,MC1PrTime,MCOut1,MCIn2,MC2PrTime,MCOut2,MCIn3,MC3PrTime,MCOut3,Unnamed: 13,Unnamed: 14
230,563,c37b84f4,1,5,43187.99999,23.26,43428.00001,43276.00003,2.99,43488.99996,43498.99999,0.25,43502.99996,True,
231,117,c37b7e02,1,1,0,0.11,43430,0,0.25,43493.00003,43500.99997,0.28,43505.00004,True,
232,413,c37b9987,1,4,0,0.02,43433.99997,43435.99996,0.71,43496.00001,43505.00004,0.48,43513.99998,True,
233,64,c37b9728,1,1,43211.00002,0.75,43437.00004,43441.00001,8.26,43516.99996,43524.99999,4.07,43531.00004,True,
234,id,Uid,completed,CycleNumber,MCIn1,MC1PrTime,MCOut1,MCIn2,MC2PrTime,MCOut2,MCIn3,MC3PrTime,MCOut3,,


In [131]:
#마지막 행 삭제
df_dropped_1 = df.drop([df.index[len(df)-1]])

#Unnamed: 13, Unnamed: 14 열 삭제
drop_column=['Unnamed: 13', 'Unnamed: 14'] 
df_dropped_2=df_dropped_1.drop(drop_column, axis=1, inplace=False)

print(df_dropped_2.shape)
df_dropped_2.head()

(234, 13)


Unnamed: 0,id,Uid,completed,CycleNumber,MCIn1,MC1PrTime,MCOut1,MCIn2,MC2PrTime,MCOut2,MCIn3,MC3PrTime,MCOut3
0,501,c37b82d7,1,5,36991.99999,0.12,36993.99997,36995.99996,0.11,37000.00002,37009.99996,2.03,37014.00002
1,476,c37b7a6e,1,4,0.0,2.2,37007.99997,0.0,3.84,37017.0,37027.00002,1.27,37031.0
2,301,c37b83cb,1,3,0.0,2.36,37031.0,0.0,4.1,37044.0,37048.99997,3.38,37055.00002
3,276,c37b7dfe,1,3,0.0,4.98,37036.99996,37047.99997,2.93,37058.99999,37066.00003,0.21,37079.00004
4,326,c37b770b,1,3,0.0,6.87,37044.99999,37053.00003,0.68,37063.99996,37071.99999,0.07,37083.00001


In [132]:
df_dropped_2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 234 entries, 0 to 233
Data columns (total 13 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   id           234 non-null    object
 1   Uid          234 non-null    object
 2   completed    234 non-null    object
 3   CycleNumber  234 non-null    object
 4   MCIn1        234 non-null    object
 5   MC1PrTime    234 non-null    object
 6   MCOut1       234 non-null    object
 7   MCIn2        234 non-null    object
 8   MC2PrTime    234 non-null    object
 9   MCOut2       234 non-null    object
 10  MCIn3        234 non-null    object
 11  MC3PrTime    234 non-null    object
 12  MCOut3       234 non-null    object
dtypes: object(13)
memory usage: 25.6+ KB


In [133]:
#모든 열의 데이터 타입이 object라서 숫자 값은 float로 데이터 타입 변환
df_dropped_3 = df_dropped_2.astype({'completed': 'float', 'CycleNumber': 'float', 'MCIn1': 'float'
                                    ,'MC1PrTime': 'float', 'MCOut1': 'float', 'MCIn2': 'float'
                                    ,'MC2PrTime': 'float', 'MCOut2': 'float', 'MCIn3': 'float'
                                    ,'MC3PrTime': 'float', 'MCOut3': 'float'})

In [134]:
df_dropped_3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 234 entries, 0 to 233
Data columns (total 13 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   id           234 non-null    object 
 1   Uid          234 non-null    object 
 2   completed    234 non-null    float64
 3   CycleNumber  234 non-null    float64
 4   MCIn1        234 non-null    float64
 5   MC1PrTime    234 non-null    float64
 6   MCOut1       234 non-null    float64
 7   MCIn2        234 non-null    float64
 8   MC2PrTime    234 non-null    float64
 9   MCOut2       234 non-null    float64
 10  MCIn3        234 non-null    float64
 11  MC3PrTime    234 non-null    float64
 12  MCOut3       234 non-null    float64
dtypes: float64(11), object(2)
memory usage: 25.6+ KB


In [135]:
#열별로 결측값 개수 확인
count1=0

for i in df_dropped_3:
    count2 = 0
    for j in df_dropped_3[i]:
        if j == 0:
            count2+=1
    print('-'*50)
    print(i,"열의 결측값 개수:",count2)
    count1+=count2
    
print('-'*50)
print('전체 결측치:',count1)

--------------------------------------------------
id 열의 결측값 개수: 0
--------------------------------------------------
Uid 열의 결측값 개수: 0
--------------------------------------------------
completed 열의 결측값 개수: 0
--------------------------------------------------
CycleNumber 열의 결측값 개수: 0
--------------------------------------------------
MCIn1 열의 결측값 개수: 104
--------------------------------------------------
MC1PrTime 열의 결측값 개수: 1
--------------------------------------------------
MCOut1 열의 결측값 개수: 26
--------------------------------------------------
MCIn2 열의 결측값 개수: 60
--------------------------------------------------
MC2PrTime 열의 결측값 개수: 0
--------------------------------------------------
MCOut2 열의 결측값 개수: 7
--------------------------------------------------
MCIn3 열의 결측값 개수: 0
--------------------------------------------------
MC3PrTime 열의 결측값 개수: 0
--------------------------------------------------
MCOut3 열의 결측값 개수: 0
--------------------------------------------------
전체 결측치: 198


### 이상치 제거

In [136]:
#Mcout3+MC3processtime 다음거랑 비교해서 부족한 애들 확인
try :
    for i, j in enumerate(df_dropped_3['MCOut3']):
        if (df_dropped_3['MC3PrTime'][i] + j) > df_dropped_3['MCOut3'][i+1]:
            print('-'*70)
            print(i,'번째 행 확인바람')
            print('MCOut3+MC3PrTime:',df_dropped_3['MC3PrTime'][i] + j,'다음행 MCOut3:',df_dropped_3['MCOut3'][i+1])
except :
    print('-'*70)
    print(i+1,'=',len(df_dropped_3),'이면 정상실행')
    pass

----------------------------------------------------------------------
18 번째 행 확인바람
MCOut3+MC3PrTime: 37423.02996 다음행 MCOut3: 37419.00002
----------------------------------------------------------------------
32 번째 행 확인바람
MCOut3+MC3PrTime: 37760.49004 다음행 MCOut3: 37760.0
----------------------------------------------------------------------
54 번째 행 확인바람
MCOut3+MC3PrTime: 38517.35004 다음행 MCOut3: 38512.00002
----------------------------------------------------------------------
62 번째 행 확인바람
MCOut3+MC3PrTime: 38843.260010000005 다음행 MCOut3: 38842.99998
----------------------------------------------------------------------
71 번째 행 확인바람
MCOut3+MC3PrTime: 39057.12996 다음행 MCOut3: 39056.99999
----------------------------------------------------------------------
122 번째 행 확인바람
MCOut3+MC3PrTime: 40272.71003 다음행 MCOut3: 40271.99999
----------------------------------------------------------------------
128 번째 행 확인바람
MCOut3+MC3PrTime: 40535.5 다음행 MCOut3: 40530.99997
---------------------------------

In [137]:
#Mcout3+MC3processtime 다음거랑 비교해서 부족한 애들 더해주기
try :
    for i, j in enumerate(df_dropped_3['MCOut3']):
        if (df_dropped_3['MC3PrTime'][i] + j) > df_dropped_3['MCOut3'][i+1]:
            #일단 1을 더했습니다
            df_dropped_3['MCOut3'][i+1] = df_dropped_3['MC3PrTime'][i] + j + 1
except :
    print(i+1,'=',len(df_dropped_3),'이면 정상실행')
    pass

234 = 234 이면 정상실행


In [138]:
#Mcout3+MC3processtime 다음거랑 비교해서 부족한 애들 다시 확인
try :
    for i, j in enumerate(df_dropped_3['MCOut3']):
        if (df_dropped_3['MC3PrTime'][i] + j) > df_dropped_3['MCOut3'][i+1]:
            print('-'*70)
            print(i,'번째 행 확인바람')
            print('MCOut3+MC3PrTime:',df_dropped_3['MC3PrTime'][i] + j,'다음행 MCOut3:',df_dropped_3['MCOut3'][i+1])
except :
    print(i+1,'=',len(df_dropped_3),'이면 정상실행')
    pass

234 = 234 이면 정상실행


In [139]:
#Mcout3 열의 값중 혹시 시간 순서가 맞지 않는 값이 있는지 다시 확인
count=0
for i,j in enumerate(df_dropped_3['MCOut3']):
    if i == len(df_dropped_3)-1:
        print('마지막 행(',i,')은 비교불가')
        break
    if df_dropped_3['MCOut3'][i] >= df_dropped_3['MCOut3'][i+1]:
        print('인덱스:',i)
        count+=1
        
#count가 0의 값을 가지면 완벽한 6열을 제작 완료
print('전처리 후 이상값:',count)

마지막 행( 233 )은 비교불가
전처리 후 이상값: 0


In [140]:
#MCIn3가 오름차순인지 확인
count=0
for i,j in enumerate(df_dropped_3['MCIn3']):
    if i == len(df_dropped_3)-1:
        print('마지막 행(',i,')은 비교불가')
        break
    if df_dropped_3['MCIn3'][i] >= df_dropped_3['MCIn3'][i+1]:
        print('인덱스:',i)
        count+=1
        
#count가 0의 값을 가지면 완벽한 5열을 제작 완료
print('이상값:',count)

마지막 행( 233 )은 비교불가
이상값: 0


In [141]:
# MCOut2 + MC2PrTime < MCIn3 이 아닌 값 확인
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        print('-'*70)
        print(i,'번째 행 확인바람')
        print('MCOut2+MC2PrTime:',df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]
             ,'MCIn3:',df_dropped_3['MCIn3'][i])
        count += 1
        
print('-'*70)
print('이상값:',count)

----------------------------------------------------------------------
6 번째 행 확인바람
MCOut2+MC2PrTime: 37131.59996 MCIn3: 37082.00002
----------------------------------------------------------------------
7 번째 행 확인바람
MCOut2+MC2PrTime: 37184.67997 MCIn3: 37139.99999
----------------------------------------------------------------------
8 번째 행 확인바람
MCOut2+MC2PrTime: 37222.37004 MCIn3: 37193.0
----------------------------------------------------------------------
9 번째 행 확인바람
MCOut2+MC2PrTime: 37237.09001 MCIn3: 37215.00003
----------------------------------------------------------------------
10 번째 행 확인바람
MCOut2+MC2PrTime: 37246.49004 MCIn3: 37236.99997
----------------------------------------------------------------------
11 번째 행 확인바람
MCOut2+MC2PrTime: 37292.35001 MCIn3: 37256.00003
----------------------------------------------------------------------
12 번째 행 확인바람
MCOut2+MC2PrTime: 37340.050010000006 MCIn3: 37294.00004
----------------------------------------------------------------------

MCOut2+MC2PrTime: 42340.69996 MCIn3: 42322.0
----------------------------------------------------------------------
217 번째 행 확인바람
MCOut2+MC2PrTime: 43008.57004 MCIn3: 43007.00003
----------------------------------------------------------------------
220 번째 행 확인바람
MCOut2+MC2PrTime: 43083.02996 MCIn3: 43079.0
----------------------------------------------------------------------
228 번째 행 확인바람
MCOut2+MC2PrTime: 43483.309989999994 MCIn3: 43480.00002
----------------------------------------------------------------------
233 번째 행 확인바람
MCOut2+MC2PrTime: 43525.25996 MCIn3: 43524.99999
----------------------------------------------------------------------
이상값: 66


In [142]:
# MCOut2 + MC2PrTime < MCIn3 이 아닌 행의 MCOut2 값 0으로 대체
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        df_dropped_3['MCOut2'][i] = 0

In [143]:
# MCOut2 + MC2PrTime < MCIn3 이 아닌 값 다시 확인
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        print('-'*70)
        print(i,'번째 행 확인바람')
        print('MCOut2+MC2PrTime:',df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]
             ,'MCIn3:',df_dropped_3['MCIn3'][i])
        count += 1
        
print('전처리 후 이상값:',count)

전처리 후 이상값: 0


In [149]:
#MAX, MIN비교해서 ran값으로 입력
count=0
for i, j in enumerate(df_dropped_3['MCOut2']):
    if j == 0:
        #위 max 구하기
        list1=[]
        for k in range(0, i):
            list1.append(df_dropped_3['MCOut2'][k])
        up=max(list1)
        print('-'*50)
        print('up:', up)
        
        #왼쪽 max 구하기
        left=max(df_dropped_3['MCIn2'][i], df_dropped_3['MCOut1'][i], df_dropped_3['MCIn1'][i])
        print('left:',left)
        
        #오른쪽 min 구하기
        right=min(df_dropped_3['MCIn3'][i], df_dropped_3['MCOut3'][i])
        print('right:',right)
        
        #밑 min 구하기
        list2=[]
        for k in range(i+1, len(df_dropped_3['MCOut2'])-1):
            list2.append(df_dropped_3['MCOut2'][k])
        list2=set(list2) #중복값 삭제
        try :
            list2.remove(0) #min이 0인 경우는 삭제해서 생각 하지않기
        except :
            pass
        try :
            down=min(list2)
        except :
            print('마지막 행(',i,')입니다.')
        print('down:',down)
        
        # MAX, MIN비교해서 랜덤 값으로 대체
        try :
            df_dropped_3['MCOut2'][i] = np.random.randint(max(up, left),min(right, down), size=1)
        except :
            count+=1
            print('해당 행은 무시(',count,')')
            try :
                df_dropped_3['MCOut2'][i] = np.random.randint(max(up, left),max(right, down), size=1)
            except :
                df_dropped_3['MCOut2'][i] = np.random.randint(min(up, left),max(right, down), size=1)
            pass
        # 18

In [150]:
# MCOut2 + MC2PrTime < MCIn3 이 아닌 값 확인
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        print('-'*70)
        print(i,'번째 행 확인바람')
        print('MCOut2+MC2PrTime:',df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]
             ,'MCIn3:',df_dropped_3['MCIn3'][i])
        count += 1
        
print('-'*70)
print('이상값:',count)

----------------------------------------------------------------------
22 번째 행 확인바람
MCOut2+MC2PrTime: 37445.1 MCIn3: 37436.99999
----------------------------------------------------------------------
25 번째 행 확인바람
MCOut2+MC2PrTime: 37556.12 MCIn3: 37545.99998
----------------------------------------------------------------------
30 번째 행 확인바람
MCOut2+MC2PrTime: 37643.86 MCIn3: 37641.99997
----------------------------------------------------------------------
45 번째 행 확인바람
MCOut2+MC2PrTime: 38186.12 MCIn3: 38186.00004
----------------------------------------------------------------------
67 번째 행 확인바람
MCOut2+MC2PrTime: 38913.11 MCIn3: 38849.00003
----------------------------------------------------------------------
121 번째 행 확인바람
MCOut2+MC2PrTime: 40219.13 MCIn3: 40138.99998
----------------------------------------------------------------------
122 번째 행 확인바람
MCOut2+MC2PrTime: 40227.77 MCIn3: 40179.99997
----------------------------------------------------------------------
124 번째 행 확인바람
MCOu

In [151]:
# MCOut2 + MC2PrTime < MCIn3 이 아닌 행의 MCOut2 값 0으로 대체
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        df_dropped_3['MCOut2'][i] = 0

In [152]:
# MCOut2 + MC2PrTime < MCIn3 이 아닌 값 다시 확인
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        print('-'*70)
        print(i,'번째 행 확인바람')
        print('MCOut2+MC2PrTime:',df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]
             ,'MCIn3:',df_dropped_3['MCIn3'][i])
        count += 1
        
print('전처리 후 이상값:',count)

전처리 후 이상값: 0


In [155]:
#MAX, MIN비교해서 ran값으로 입력
count=0
for i, j in enumerate(df_dropped_3['MCOut2']):
    if j == 0:
        #위 max 구하기
        list1=[]
        for k in range(0, i):
            list1.append(df_dropped_3['MCOut2'][k])
        up=max(list1)
        print('-'*50)
        print('up:', up)
        
        #왼쪽 max 구하기
        left=max(df_dropped_3['MCIn2'][i], df_dropped_3['MCOut1'][i], df_dropped_3['MCIn1'][i])
        print('left:',left)
        
        #오른쪽 min 구하기
        right=min(df_dropped_3['MCIn3'][i], df_dropped_3['MCOut3'][i])
        print('right:',right)
        
        #밑 min 구하기
        list2=[]
        for k in range(i+1, len(df_dropped_3['MCOut2'])-1):
            list2.append(df_dropped_3['MCOut2'][k])
        list2=set(list2) #중복값 삭제
        try :
            list2.remove(0) #min이 0인 경우는 삭제해서 생각 하지않기
        except :
            pass
        try :
            down=min(list2)
        except :
            print('마지막 행(',i,')입니다.')
        print('down:',down)
        
        # MAX, MIN비교해서 랜덤 값으로 대체
        try :
            df_dropped_3['MCOut2'][i] = np.random.randint(max(up, left),min(right, down), size=1)
        except :
            count+=1
            print('해당 행은 무시(',count,')')
            try :
                df_dropped_3['MCOut2'][i] = np.random.randint(max(up, left),max(right, down), size=1)
            except :
                df_dropped_3['MCOut2'][i] = np.random.randint(min(up, left),max(right, down), size=1)
            pass
        # 18

In [156]:
# MCOut2 + MC2PrTime < MCIn3 이 아닌 값 확인
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        print('-'*70)
        print(i,'번째 행 확인바람')
        print('MCOut2+MC2PrTime:',df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]
             ,'MCIn3:',df_dropped_3['MCIn3'][i])
        count += 1
        
print('-'*70)
print('이상값:',count)

----------------------------------------------------------------------
22 번째 행 확인바람
MCOut2+MC2PrTime: 37447.1 MCIn3: 37436.99999
----------------------------------------------------------------------
45 번째 행 확인바람
MCOut2+MC2PrTime: 38186.12 MCIn3: 38186.00004
----------------------------------------------------------------------
67 번째 행 확인바람
MCOut2+MC2PrTime: 38886.11 MCIn3: 38849.00003
----------------------------------------------------------------------
121 번째 행 확인바람
MCOut2+MC2PrTime: 40205.13 MCIn3: 40138.99998
----------------------------------------------------------------------
122 번째 행 확인바람
MCOut2+MC2PrTime: 40220.77 MCIn3: 40179.99997
----------------------------------------------------------------------
124 번째 행 확인바람
MCOut2+MC2PrTime: 40242.31 MCIn3: 40237.00004
----------------------------------------------------------------------
이상값: 6


In [157]:
# MCOut2 + MC2PrTime < MCIn3 이 아닌 행의 MCOut2 값 0으로 대체
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        df_dropped_3['MCOut2'][i] = 0

In [158]:
# MCOut2 + MC2PrTime < MCIn3 이 아닌 값 다시 확인
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        print('-'*70)
        print(i,'번째 행 확인바람')
        print('MCOut2+MC2PrTime:',df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]
             ,'MCIn3:',df_dropped_3['MCIn3'][i])
        count += 1
        
print('전처리 후 이상값:',count)

전처리 후 이상값: 0


In [160]:
#MAX, MIN비교해서 ran값으로 입력
count=0
for i, j in enumerate(df_dropped_3['MCOut2']):
    if j == 0:
        #위 max 구하기
        list1=[]
        for k in range(0, i):
            list1.append(df_dropped_3['MCOut2'][k])
        up=max(list1)
        print('-'*50)
        print('up:', up)
        
        #왼쪽 max 구하기
        left=max(df_dropped_3['MCIn2'][i], df_dropped_3['MCOut1'][i], df_dropped_3['MCIn1'][i])
        print('left:',left)
        
        #오른쪽 min 구하기
        right=min(df_dropped_3['MCIn3'][i], df_dropped_3['MCOut3'][i])
        print('right:',right)
        
        #밑 min 구하기
        list2=[]
        for k in range(i+1, len(df_dropped_3['MCOut2'])-1):
            list2.append(df_dropped_3['MCOut2'][k])
        list2=set(list2) #중복값 삭제
        try :
            list2.remove(0) #min이 0인 경우는 삭제해서 생각 하지않기
        except :
            pass
        try :
            down=min(list2)
        except :
            print('마지막 행(',i,')입니다.')
        print('down:',down)
        
        # MAX, MIN비교해서 랜덤 값으로 대체
        try :
            df_dropped_3['MCOut2'][i] = np.random.randint(max(up, left),min(right, down), size=1)
        except :
            count+=1
            print('해당 행은 무시(',count,')')
            try :
                df_dropped_3['MCOut2'][i] = np.random.randint(max(up, left),max(right, down), size=1)
            except :
                df_dropped_3['MCOut2'][i] = np.random.randint(min(up, left),max(right, down), size=1)
            pass
        # 18

In [163]:
# MCOut2 + MC2PrTime < MCIn3 이 아닌 값 확인
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        print('-'*70)
        print(i,'번째 행 확인바람')
        print('MCOut2+MC2PrTime:',df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]
             ,'MCIn3:',df_dropped_3['MCIn3'][i])
        count += 1
        
print('-'*70)
print('이상값:',count)

# MCOut2 + MC2PrTime < MCIn3 이 아닌 행의 MCOut2 값 0으로 대체
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        df_dropped_3['MCOut2'][i] = 0
        
# MCOut2 + MC2PrTime < MCIn3 이 아닌 값 다시 확인
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        print('-'*70)
        print(i,'번째 행 확인바람')
        print('MCOut2+MC2PrTime:',df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]
             ,'MCIn3:',df_dropped_3['MCIn3'][i])
        count += 1
        
print('전처리 후 이상값:',count)

----------------------------------------------------------------------
이상값: 0
전처리 후 이상값: 0


In [165]:
#MAX, MIN비교해서 ran값으로 입력
count=0
for i, j in enumerate(df_dropped_3['MCOut2']):
    if j == 0:
        #위 max 구하기
        list1=[]
        for k in range(0, i):
            list1.append(df_dropped_3['MCOut2'][k])
        up=max(list1)
        print('-'*50)
        print('up:', up)
        
        #왼쪽 max 구하기
        left=max(df_dropped_3['MCIn2'][i], df_dropped_3['MCOut1'][i], df_dropped_3['MCIn1'][i])
        print('left:',left)
        
        #오른쪽 min 구하기
        right=min(df_dropped_3['MCIn3'][i], df_dropped_3['MCOut3'][i])
        print('right:',right)
        
        #밑 min 구하기
        list2=[]
        for k in range(i+1, len(df_dropped_3['MCOut2'])-1):
            list2.append(df_dropped_3['MCOut2'][k])
        list2=set(list2) #중복값 삭제
        try :
            list2.remove(0) #min이 0인 경우는 삭제해서 생각 하지않기
        except :
            pass
        try :
            down=min(list2)
        except :
            print('마지막 행(',i,')입니다.')
        print('down:',down)
        
        # MAX, MIN비교해서 랜덤 값으로 대체
        try :
            df_dropped_3['MCOut2'][i] = np.random.randint(max(up, left),min(right, down), size=1)
        except :
            count+=1
            print('해당 행은 무시(',count,')')
            try :
                df_dropped_3['MCOut2'][i] = np.random.randint(max(up, left),max(right, down), size=1)
            except :
                df_dropped_3['MCOut2'][i] = np.random.randint(min(up, left),max(right, down), size=1)
            pass
        # 18

In [166]:
# MCOut2 + MC2PrTime < MCIn3 이 아닌 값 확인
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        print('-'*70)
        print(i,'번째 행 확인바람')
        print('MCOut2+MC2PrTime:',df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]
             ,'MCIn3:',df_dropped_3['MCIn3'][i])
        count += 1
        
print('-'*70)
print('이상값:',count)

# MCOut2 + MC2PrTime < MCIn3 이 아닌 행의 MCOut2 값 0으로 대체
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        df_dropped_3['MCOut2'][i] = 0
        
# MCOut2 + MC2PrTime < MCIn3 이 아닌 값 다시 확인
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        print('-'*70)
        print(i,'번째 행 확인바람')
        print('MCOut2+MC2PrTime:',df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]
             ,'MCIn3:',df_dropped_3['MCIn3'][i])
        count += 1
        
print('전처리 후 이상값:',count)

----------------------------------------------------------------------
67 번째 행 확인바람
MCOut2+MC2PrTime: 38883.11 MCIn3: 38849.00003
----------------------------------------------------------------------
121 번째 행 확인바람
MCOut2+MC2PrTime: 40173.13 MCIn3: 40138.99998
----------------------------------------------------------------------
122 번째 행 확인바람
MCOut2+MC2PrTime: 40211.77 MCIn3: 40179.99997
----------------------------------------------------------------------
124 번째 행 확인바람
MCOut2+MC2PrTime: 40238.31 MCIn3: 40237.00004
----------------------------------------------------------------------
이상값: 4
전처리 후 이상값: 0


In [181]:
#MAX, MIN비교해서 ran값으로 입력
count=0
for i, j in enumerate(df_dropped_3['MCOut2']):
    if j == 0:
        #위 max 구하기
        list1=[]
        for k in range(0, i):
            list1.append(df_dropped_3['MCOut2'][k])
        up=max(list1)
        print('-'*50)
        print('up:', up)
        
        #왼쪽 max 구하기
        left=max(df_dropped_3['MCIn2'][i], df_dropped_3['MCOut1'][i], df_dropped_3['MCIn1'][i])
        print('left:',left)
        
        #오른쪽 min 구하기
        right=min(df_dropped_3['MCIn3'][i], df_dropped_3['MCOut3'][i])
        print('right:',right)
        
        #밑 min 구하기
        list2=[]
        for k in range(i+1, len(df_dropped_3['MCOut2'])-1):
            list2.append(df_dropped_3['MCOut2'][k])
        list2=set(list2) #중복값 삭제
        try :
            list2.remove(0) #min이 0인 경우는 삭제해서 생각 하지않기
        except :
            pass
        try :
            down=min(list2)
        except :
            print('마지막 행(',i,')입니다.')
        print('down:',down)
        
        # MAX, MIN비교해서 랜덤 값으로 대체
        try :
            df_dropped_3['MCOut2'][i] = np.random.randint(max(up, left),min(right, down), size=1)
        except :
            count+=1
            print('해당 행은 무시(',count,')')
            try :
                df_dropped_3['MCOut2'][i] = np.random.randint(max(up, left),max(right, down), size=1)
            except :
                df_dropped_3['MCOut2'][i] = np.random.randint(min(up, left),max(right, down), size=1)
            pass
        # 18

--------------------------------------------------
up: 38840.99999
left: 38853.0
right: 38849.00003
down: 38917.00002
해당 행은 무시( 1 )
--------------------------------------------------
up: 40106.00002
left: 40151.99998
right: 40138.99998
down: 40230.0
해당 행은 무시( 2 )
--------------------------------------------------
up: 40212.0
left: 40194.99996
right: 40179.99997
down: 40230.0
해당 행은 무시( 3 )
--------------------------------------------------
up: 40230.0
left: 40210.00004
right: 40237.00004
down: 40266.0


In [180]:
# MCOut2 + MC2PrTime < MCIn3 이 아닌 값 확인
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        print('-'*70)
        print(i,'번째 행 확인바람')
        print('MCOut2+MC2PrTime:',df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]
             ,'MCIn3:',df_dropped_3['MCIn3'][i])
        count += 1
        
print('-'*70)
print('이상값:',count)

# MCOut2 + MC2PrTime < MCIn3 이 아닌 행의 MCOut2 값 0으로 대체
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        df_dropped_3['MCOut2'][i] = 0
        
# MCOut2 + MC2PrTime < MCIn3 이 아닌 값 다시 확인
count=0
for i, j in enumerate(df_dropped_3['MCIn3']):
    if (df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]) >= df_dropped_3['MCIn3'][i]:
        print('-'*70)
        print(i,'번째 행 확인바람')
        print('MCOut2+MC2PrTime:',df_dropped_3['MCOut2'][i] + df_dropped_3['MC2PrTime'][i]
             ,'MCIn3:',df_dropped_3['MCIn3'][i])
        count += 1
        
print('전처리 후 이상값:',count)

----------------------------------------------------------------------
67 번째 행 확인바람
MCOut2+MC2PrTime: 38914.11 MCIn3: 38849.00003
----------------------------------------------------------------------
121 번째 행 확인바람
MCOut2+MC2PrTime: 40207.13 MCIn3: 40138.99998
----------------------------------------------------------------------
122 번째 행 확인바람
MCOut2+MC2PrTime: 40226.77 MCIn3: 40179.99997
----------------------------------------------------------------------
124 번째 행 확인바람
MCOut2+MC2PrTime: 40239.31 MCIn3: 40237.00004
----------------------------------------------------------------------
이상값: 4
전처리 후 이상값: 0
