# 데이터 전처리: 방송 시작 시간, 종료 시간.
- 노출(분) 기준.
- 하루에 2번 이상 방영하는 상품 있음.
- Key: `'마더코드', '상품코드', '상품군', '상품명'`

In [3]:
import warnings
warnings.filterwarnings(action='ignore')

import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', None)

import numpy as np

In [2]:
EXPOSURE_FULL_DATA = './data/노출시간채운데이터.xlsx'

In [4]:
# 데이터 로드
data_raw = pd.read_excel(EXPOSURE_FULL_DATA, usecols=lambda x: 'Unnamed' not in x)
data_raw.head(3)

Unnamed: 0,방송일시,노출(분),마더코드,상품코드,상품명,상품군,판매단가,취급액
0,2019-01-01 06:00:00,20.0,100346,201072,테이트 남성 셀린니트3종,의류,39900,2099000.0
1,2019-01-01 06:00:00,20.0,100346,201079,테이트 여성 셀린니트3종,의류,39900,4371000.0
2,2019-01-01 06:20:00,20.0,100346,201072,테이트 남성 셀린니트3종,의류,39900,3262000.0


## 기본 전처리
- 무형 상품군 제외
- 취급액 50000원인 것은 0원으로 변경

In [5]:
# 상품군 무형인 것 제외
data_raw = data_raw[data_raw['상품군'] != '무형']
data_raw = data_raw.reset_index(drop=True)
data_raw

Unnamed: 0,방송일시,노출(분),마더코드,상품코드,상품명,상품군,판매단가,취급액
0,2019-01-01 06:00:00,20.0,100346,201072,테이트 남성 셀린니트3종,의류,39900,2099000.0
1,2019-01-01 06:00:00,20.0,100346,201079,테이트 여성 셀린니트3종,의류,39900,4371000.0
2,2019-01-01 06:20:00,20.0,100346,201072,테이트 남성 셀린니트3종,의류,39900,3262000.0
3,2019-01-01 06:20:00,20.0,100346,201079,테이트 여성 셀린니트3종,의류,39900,6955000.0
4,2019-01-01 06:40:00,20.0,100346,201072,테이트 남성 셀린니트3종,의류,39900,6672000.0
...,...,...,...,...,...,...,...,...
37367,2019-12-31 23:40:00,20.0,100448,201391,일시불쿠첸압력밥솥 6인용,주방,148000,10157000.0
37368,2020-01-01 00:00:00,20.0,100448,201383,무이자쿠첸압력밥솥 10인용,주방,178000,50929000.0
37369,2020-01-01 00:00:00,20.0,100448,201390,일시불쿠첸압력밥솥 10인용,주방,168000,104392000.0
37370,2020-01-01 00:00:00,20.0,100448,201384,무이자쿠첸압력밥솥 6인용,주방,158000,13765000.0


In [6]:
# 취급액 변경
data_raw['취급액'] = data_raw['취급액'].replace(50000, 0)
data_raw[data_raw['취급액'] == 50000] 

Unnamed: 0,방송일시,노출(분),마더코드,상품코드,상품명,상품군,판매단가,취급액


In [7]:
data_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37372 entries, 0 to 37371
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   방송일시    37372 non-null  datetime64[ns]
 1   노출(분)   37372 non-null  float64       
 2   마더코드    37372 non-null  int64         
 3   상품코드    37372 non-null  int64         
 4   상품명     37372 non-null  object        
 5   상품군     37372 non-null  object        
 6   판매단가    37372 non-null  int64         
 7   취급액     37372 non-null  float64       
dtypes: datetime64[ns](1), float64(2), int64(3), object(2)
memory usage: 2.3+ MB


## 집계
- 방송 끝 시간 추가
- 마더코드, 상품코드, 상품군, 상품명으로 집계.

In [8]:
# 방송 끝나는 시간 추가
data_raw['방송끝'] = data_raw['방송일시'] + pd.to_timedelta(data_raw['노출(분)'], unit='m')
data_raw = data_raw[['방송일시', '방송끝', '노출(분)', '마더코드', '상품코드', '상품명', '상품군', '판매단가', '취급액']]

In [9]:
# key로 집계
test1 = data_raw.groupby(by=['마더코드', '상품코드', '상품군', '상품명'], as_index=False).agg({'판매단가': lambda x: list(x),
                                                                                 '취급액': lambda x: list(x),
                                                                                 '노출(분)': lambda x: list(x),
                                                                                 '방송일시': lambda x: list(x),
                                                                                 '방송끝': lambda x: list(x)})
test1['집계'] = ''

## 집계

- 방송 종료와 방송 시작이 겹치면 지운다.
- 집계 시 인덱스 맞춰서 합계, 판매단가 구해야 한다.

In [10]:
# 방송 시작, 방송 끝, 노출, 판매단가, 취급액 하나의 리스트로 만들기
for i in range(len(test1)):
    temp = []
    start_ranges = test1['방송일시'][i]
    end_ranges = test1['방송끝'][i]
    exposure_times = test1['노출(분)'][i]
    price_ranges = test1['판매단가'][i]
    sales_ranges = test1['취급액'][i]
    
    for start, end, exposure, price, sales in zip(start_ranges, end_ranges, exposure_times, price_ranges, sales_ranges):
        temp.append([start, end, exposure, price, sales])
    test1['집계'][i] = temp

test1

Unnamed: 0,마더코드,상품코드,상품군,상품명,판매단가,취급액,노출(분),방송일시,방송끝,집계
0,100000,200000,이미용,엘로엘 아쿠아클린 마스크,"[79900, 79900, 79900, 79900, 79900, 79900]","[12078000.0, 17338000.0, 18195000.0, 5905000.0...","[20.0, 20.0, 20.0, 20.0, 20.0, 15.0]","[2019-08-26 00:20:00, 2019-08-26 00:40:00, 201...","[2019-08-26 00:40:00, 2019-08-26 01:00:00, 201...","[[2019-08-26 00:20:00, 2019-08-26 00:40:00, 20..."
1,100001,200001,주방,국내생산 스텐락 심플 스텐밀폐용기 17종,"[99000, 99000, 99000, 99000]","[34218000.0, 52844000.0, 21277000.0, 42997000.0]","[30.0, 30.0, 30.0, 30.0]","[2019-01-25 16:00:00, 2019-01-25 16:30:00, 201...","[2019-01-25 16:30:00, 2019-01-25 17:00:00, 201...","[[2019-01-25 16:00:00, 2019-01-25 16:30:00, 30..."
2,100002,200002,농수축,이보은의 우삽겹 12팩세트,"[49900, 49900, 49900, 49900, 49900, 49900, 499...","[34069000.0, 42276000.0, 52138000.0, 21834000....","[20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20....","[2019-10-29 17:00:00, 2019-10-29 17:20:00, 201...","[2019-10-29 17:20:00, 2019-10-29 17:40:00, 201...","[[2019-10-29 17:00:00, 2019-10-29 17:20:00, 20..."
3,100006,200008,잡화,AAA 19 F/W 남성 트레킹화 3종,"[49800, 49800, 49800, 49800, 49800, 49800, 498...","[14064000.0, 7780000.0, 10493000.0, 9273000.0,...","[10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10....","[2019-11-08 08:20:00, 2019-11-08 08:30:00, 201...","[2019-11-08 08:30:00, 2019-11-08 08:40:00, 201...","[[2019-11-08 08:20:00, 2019-11-08 08:30:00, 10..."
4,100006,200009,잡화,AAA 19 F/W 여성 트레킹화 3종,"[49800, 49800, 49800, 49800, 49800, 49800, 498...","[7201000.0, 5984000.0, 6587000.0, 7875000.0, 6...","[10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10....","[2019-11-08 08:20:00, 2019-11-08 08:30:00, 201...","[2019-11-08 08:30:00, 2019-11-08 08:40:00, 201...","[[2019-11-08 08:20:00, 2019-11-08 08:30:00, 10..."
...,...,...,...,...,...,...,...,...,...,...
2037,100846,202500,잡화,알비에로 마르티니 1A클라쎄 지오맵 울실크 스카프,"[99000, 99000, 99000, 99000, 99000, 99000, 990...","[6899000.0, 15888000.0, 21950000.0, 7544000.0,...","[20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20....","[2019-08-18 12:00:00, 2019-08-18 12:20:00, 201...","[2019-08-18 12:20:00, 2019-08-18 12:40:00, 201...","[[2019-08-18 12:00:00, 2019-08-18 12:20:00, 20..."
2038,100849,202509,농수축,19년 멋진밥상 흥양농협 햅쌀 20kg,"[63900, 63900, 63900, 63900, 63900, 63900, 639...","[68010000.0, 70301000.0, 78177000.0, 42213000....","[20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20....","[2019-09-04 18:00:00, 2019-09-04 18:20:00, 201...","[2019-09-04 18:20:00, 2019-09-04 18:40:00, 201...","[[2019-09-04 18:00:00, 2019-09-04 18:20:00, 20..."
2039,100849,202510,농수축,멋진밥상 흥양농협 쌀 20kg,"[59900, 59900, 59900, 59900, 59900, 59900, 599...","[51729000.0, 50196000.0, 50338000.0, 41758000....","[20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20....","[2019-02-18 14:00:00, 2019-02-18 14:20:00, 201...","[2019-02-18 14:20:00, 2019-02-18 14:40:00, 201...","[[2019-02-18 14:00:00, 2019-02-18 14:20:00, 20..."
2040,100849,202512,농수축,멋진밥상 흥양농협 햅쌀 20kg,"[61000, 61000, 61000, 61000, 61000, 61000, 610...","[83893000.0, 44297000.0, 65082000.0, 69522000....","[20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20....","[2019-01-01 16:40:00, 2019-01-05 16:00:00, 201...","[2019-01-01 17:00:00, 2019-01-05 16:20:00, 201...","[[2019-01-01 16:40:00, 2019-01-01 17:00:00, 20..."


In [11]:
test2 = test1.drop(columns=['판매단가', '취급액', '노출(분)', '방송일시', '방송끝'], axis=1)
test2

Unnamed: 0,마더코드,상품코드,상품군,상품명,집계
0,100000,200000,이미용,엘로엘 아쿠아클린 마스크,"[[2019-08-26 00:20:00, 2019-08-26 00:40:00, 20..."
1,100001,200001,주방,국내생산 스텐락 심플 스텐밀폐용기 17종,"[[2019-01-25 16:00:00, 2019-01-25 16:30:00, 30..."
2,100002,200002,농수축,이보은의 우삽겹 12팩세트,"[[2019-10-29 17:00:00, 2019-10-29 17:20:00, 20..."
3,100006,200008,잡화,AAA 19 F/W 남성 트레킹화 3종,"[[2019-11-08 08:20:00, 2019-11-08 08:30:00, 10..."
4,100006,200009,잡화,AAA 19 F/W 여성 트레킹화 3종,"[[2019-11-08 08:20:00, 2019-11-08 08:30:00, 10..."
...,...,...,...,...,...
2037,100846,202500,잡화,알비에로 마르티니 1A클라쎄 지오맵 울실크 스카프,"[[2019-08-18 12:00:00, 2019-08-18 12:20:00, 20..."
2038,100849,202509,농수축,19년 멋진밥상 흥양농협 햅쌀 20kg,"[[2019-09-04 18:00:00, 2019-09-04 18:20:00, 20..."
2039,100849,202510,농수축,멋진밥상 흥양농협 쌀 20kg,"[[2019-02-18 14:00:00, 2019-02-18 14:20:00, 20..."
2040,100849,202512,농수축,멋진밥상 흥양농협 햅쌀 20kg,"[[2019-01-01 16:40:00, 2019-01-01 17:00:00, 20..."


In [12]:
# 삭제해야 할 방송 범위
test2['삭제'] = ''

for i in range(len(test2)):
    broadcast_ranges = test2['집계'][i]
    delete_indices = []
    for j in range(len(test2['집계'][i])-1):        
        if test2['집계'][i][j][1] == test2['집계'][i][j+1][0]: # 지워야 할 인덱스
            delete_indices.append(j)
            test2['집계'][i][j+1][2] += test2['집계'][i][j][2] # 노출 시간 더해 주기
            test2['집계'][i][j+1][-1] += test2['집계'][i][j][-1] # 취급액 더해 주기
    test2['삭제'][i] = delete_indices

test2

Unnamed: 0,마더코드,상품코드,상품군,상품명,집계,삭제
0,100000,200000,이미용,엘로엘 아쿠아클린 마스크,"[[2019-08-26 00:20:00, 2019-08-26 00:40:00, 20...","[0, 1, 3, 4]"
1,100001,200001,주방,국내생산 스텐락 심플 스텐밀폐용기 17종,"[[2019-01-25 16:00:00, 2019-01-25 16:30:00, 30...","[0, 2]"
2,100002,200002,농수축,이보은의 우삽겹 12팩세트,"[[2019-10-29 17:00:00, 2019-10-29 17:20:00, 20...","[0, 1, 3, 4, 6, 7, 9, 10, 12, 13]"
3,100006,200008,잡화,AAA 19 F/W 남성 트레킹화 3종,"[[2019-11-08 08:20:00, 2019-11-08 08:30:00, 10...","[0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 1..."
4,100006,200009,잡화,AAA 19 F/W 여성 트레킹화 3종,"[[2019-11-08 08:20:00, 2019-11-08 08:30:00, 10...","[0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 1..."
...,...,...,...,...,...,...
2037,100846,202500,잡화,알비에로 마르티니 1A클라쎄 지오맵 울실크 스카프,"[[2019-08-18 12:00:00, 2019-08-18 12:20:00, 20...","[0, 1, 3, 4, 6, 7, 9, 10]"
2038,100849,202509,농수축,19년 멋진밥상 흥양농협 햅쌀 20kg,"[[2019-09-04 18:00:00, 2019-09-04 18:20:00, 20...","[0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16]"
2039,100849,202510,농수축,멋진밥상 흥양농협 쌀 20kg,"[[2019-02-18 14:00:00, 2019-02-18 14:20:00, 20...","[0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16, 18, ..."
2040,100849,202512,농수축,멋진밥상 흥양농협 햅쌀 20kg,"[[2019-01-01 16:40:00, 2019-01-01 17:00:00, 20...","[1, 2, 4, 5, 7, 8, 10, 11, 13, 14, 16, 17, 19,..."


In [13]:
# 삭제
for i in range(len(test2)):
    index_set = set(test2['삭제'][i])
    test2['집계'][i] = [v for i, v in enumerate(test2['집계'][i]) if i not in index_set]

test3 = test2.drop('삭제', axis=1)
test3

Unnamed: 0,마더코드,상품코드,상품군,상품명,집계
0,100000,200000,이미용,엘로엘 아쿠아클린 마스크,"[[2019-08-26 01:00:00, 2019-08-26 01:20:00, 60..."
1,100001,200001,주방,국내생산 스텐락 심플 스텐밀폐용기 17종,"[[2019-01-25 16:30:00, 2019-01-25 17:00:00, 60..."
2,100002,200002,농수축,이보은의 우삽겹 12팩세트,"[[2019-10-29 17:40:00, 2019-10-29 18:00:00, 60..."
3,100006,200008,잡화,AAA 19 F/W 남성 트레킹화 3종,"[[2019-11-08 09:10:00, 2019-11-08 09:20:00, 60..."
4,100006,200009,잡화,AAA 19 F/W 여성 트레킹화 3종,"[[2019-11-08 09:10:00, 2019-11-08 09:20:00, 60..."
...,...,...,...,...,...
2037,100846,202500,잡화,알비에로 마르티니 1A클라쎄 지오맵 울실크 스카프,"[[2019-08-18 12:40:00, 2019-08-18 13:00:00, 60..."
2038,100849,202509,농수축,19년 멋진밥상 흥양농협 햅쌀 20kg,"[[2019-09-04 18:40:00, 2019-09-04 19:00:00, 60..."
2039,100849,202510,농수축,멋진밥상 흥양농협 쌀 20kg,"[[2019-02-18 14:40:00, 2019-02-18 15:00:00, 60..."
2040,100849,202512,농수축,멋진밥상 흥양농협 햅쌀 20kg,"[[2019-01-01 16:40:00, 2019-01-01 17:00:00, 20..."


In [15]:
# 마지막 방송시간에서 노출 시간 빼기
test3['최종'] = ''
for i in range(len(test3['집계'])):
    temp = []
    for j in range(len(test3['집계'][i])):
        # 시작 시간, 끝 시간, 노출 시간, 단가, 합계
        temp.append([test3['집계'][i][j][1] - pd.to_timedelta(test3['집계'][i][j][2], unit='m'), \
                     test3['집계'][i][j][1], \
                     test3['집계'][i][j][2], \
                     test3['집계'][i][j][3], \
                     test3['집계'][i][j][4]])
    test3['최종'][i] = temp
    
test4 = test3.drop('집계', axis=1)
test4

Unnamed: 0,마더코드,상품코드,상품군,상품명,최종
0,100000,200000,이미용,엘로엘 아쿠아클린 마스크,"[[2019-08-26 00:20:00, 2019-08-26 01:20:00, 60..."
1,100001,200001,주방,국내생산 스텐락 심플 스텐밀폐용기 17종,"[[2019-01-25 16:00:00, 2019-01-25 17:00:00, 60..."
2,100002,200002,농수축,이보은의 우삽겹 12팩세트,"[[2019-10-29 17:00:00, 2019-10-29 18:00:00, 60..."
3,100006,200008,잡화,AAA 19 F/W 남성 트레킹화 3종,"[[2019-11-08 08:20:00, 2019-11-08 09:20:00, 60..."
4,100006,200009,잡화,AAA 19 F/W 여성 트레킹화 3종,"[[2019-11-08 08:20:00, 2019-11-08 09:20:00, 60..."
...,...,...,...,...,...
2037,100846,202500,잡화,알비에로 마르티니 1A클라쎄 지오맵 울실크 스카프,"[[2019-08-18 12:00:00, 2019-08-18 13:00:00, 60..."
2038,100849,202509,농수축,19년 멋진밥상 흥양농협 햅쌀 20kg,"[[2019-09-04 18:00:00, 2019-09-04 19:00:00, 60..."
2039,100849,202510,농수축,멋진밥상 흥양농협 쌀 20kg,"[[2019-02-18 14:00:00, 2019-02-18 15:00:00, 60..."
2040,100849,202512,농수축,멋진밥상 흥양농협 햅쌀 20kg,"[[2019-01-01 16:40:00, 2019-01-01 17:00:00, 20..."


In [16]:
# 행 분리
test5 = test4.explode('최종')
test5

Unnamed: 0,마더코드,상품코드,상품군,상품명,최종
0,100000,200000,이미용,엘로엘 아쿠아클린 마스크,"[2019-08-26 00:20:00, 2019-08-26 01:20:00, 60...."
0,100000,200000,이미용,엘로엘 아쿠아클린 마스크,"[2019-08-30 01:00:00, 2019-08-30 01:55:00, 55...."
1,100001,200001,주방,국내생산 스텐락 심플 스텐밀폐용기 17종,"[2019-01-25 16:00:00, 2019-01-25 17:00:00, 60...."
1,100001,200001,주방,국내생산 스텐락 심플 스텐밀폐용기 17종,"[2019-02-01 13:00:00, 2019-02-01 14:00:00, 60...."
2,100002,200002,농수축,이보은의 우삽겹 12팩세트,"[2019-10-29 17:00:00, 2019-10-29 18:00:00, 60...."
...,...,...,...,...,...
2041,100849,202513,농수축,멋진밥상 흥양농협 햅쌀 20kg,"[2019-11-14 16:20:00, 2019-11-14 17:20:00, 60...."
2041,100849,202513,농수축,멋진밥상 흥양농협 햅쌀 20kg,"[2019-12-05 15:00:00, 2019-12-05 15:20:00, 20...."
2041,100849,202513,농수축,멋진밥상 흥양농협 햅쌀 20kg,"[2019-12-11 15:00:00, 2019-12-11 15:20:00, 20...."
2041,100849,202513,농수축,멋진밥상 흥양농협 햅쌀 20kg,"[2019-12-14 17:40:00, 2019-12-14 18:00:00, 20...."


## 최종 컬럼으로 만들기

In [17]:
# 방송 시작, 종료, 노출 컬럼 나누기
test5['방송시작'] = test5['최종'].apply(lambda x: x[0])
test5['방송종료'] = test5['최종'].apply(lambda x: x[1])
test5['노출(분)'] = test5['최종'].apply(lambda x: x[2])
test5['판매단가'] = test5['최종'].apply(lambda x: x[3])
test5['취급액'] = test5['최종'].apply(lambda x: x[4])
test5

Unnamed: 0,마더코드,상품코드,상품군,상품명,최종,방송시작,방송종료,노출(분),판매단가,취급액
0,100000,200000,이미용,엘로엘 아쿠아클린 마스크,"[2019-08-26 00:20:00, 2019-08-26 01:20:00, 60....",2019-08-26 00:20:00,2019-08-26 01:20:00,60.0,79900,47611000.0
0,100000,200000,이미용,엘로엘 아쿠아클린 마스크,"[2019-08-30 01:00:00, 2019-08-30 01:55:00, 55....",2019-08-30 01:00:00,2019-08-30 01:55:00,55.0,79900,28951000.0
1,100001,200001,주방,국내생산 스텐락 심플 스텐밀폐용기 17종,"[2019-01-25 16:00:00, 2019-01-25 17:00:00, 60....",2019-01-25 16:00:00,2019-01-25 17:00:00,60.0,99000,87062000.0
1,100001,200001,주방,국내생산 스텐락 심플 스텐밀폐용기 17종,"[2019-02-01 13:00:00, 2019-02-01 14:00:00, 60....",2019-02-01 13:00:00,2019-02-01 14:00:00,60.0,99000,64274000.0
2,100002,200002,농수축,이보은의 우삽겹 12팩세트,"[2019-10-29 17:00:00, 2019-10-29 18:00:00, 60....",2019-10-29 17:00:00,2019-10-29 18:00:00,60.0,49900,128483000.0
...,...,...,...,...,...,...,...,...,...,...
2041,100849,202513,농수축,멋진밥상 흥양농협 햅쌀 20kg,"[2019-11-14 16:20:00, 2019-11-14 17:20:00, 60....",2019-11-14 16:20:00,2019-11-14 17:20:00,60.0,58900,129387000.0
2041,100849,202513,농수축,멋진밥상 흥양농협 햅쌀 20kg,"[2019-12-05 15:00:00, 2019-12-05 15:20:00, 20....",2019-12-05 15:00:00,2019-12-05 15:20:00,20.0,58900,52542000.0
2041,100849,202513,농수축,멋진밥상 흥양농협 햅쌀 20kg,"[2019-12-11 15:00:00, 2019-12-11 15:20:00, 20....",2019-12-11 15:00:00,2019-12-11 15:20:00,20.0,58900,45331000.0
2041,100849,202513,농수축,멋진밥상 흥양농협 햅쌀 20kg,"[2019-12-14 17:40:00, 2019-12-14 18:00:00, 20....",2019-12-14 17:40:00,2019-12-14 18:00:00,20.0,58900,51303000.0


In [18]:
# 필요 없는 컬럼 삭제
test6 = test5.drop('최종', axis=1)
test6

Unnamed: 0,마더코드,상품코드,상품군,상품명,방송시작,방송종료,노출(분),판매단가,취급액
0,100000,200000,이미용,엘로엘 아쿠아클린 마스크,2019-08-26 00:20:00,2019-08-26 01:20:00,60.0,79900,47611000.0
0,100000,200000,이미용,엘로엘 아쿠아클린 마스크,2019-08-30 01:00:00,2019-08-30 01:55:00,55.0,79900,28951000.0
1,100001,200001,주방,국내생산 스텐락 심플 스텐밀폐용기 17종,2019-01-25 16:00:00,2019-01-25 17:00:00,60.0,99000,87062000.0
1,100001,200001,주방,국내생산 스텐락 심플 스텐밀폐용기 17종,2019-02-01 13:00:00,2019-02-01 14:00:00,60.0,99000,64274000.0
2,100002,200002,농수축,이보은의 우삽겹 12팩세트,2019-10-29 17:00:00,2019-10-29 18:00:00,60.0,49900,128483000.0
...,...,...,...,...,...,...,...,...,...
2041,100849,202513,농수축,멋진밥상 흥양농협 햅쌀 20kg,2019-11-14 16:20:00,2019-11-14 17:20:00,60.0,58900,129387000.0
2041,100849,202513,농수축,멋진밥상 흥양농협 햅쌀 20kg,2019-12-05 15:00:00,2019-12-05 15:20:00,20.0,58900,52542000.0
2041,100849,202513,농수축,멋진밥상 흥양농협 햅쌀 20kg,2019-12-11 15:00:00,2019-12-11 15:20:00,20.0,58900,45331000.0
2041,100849,202513,농수축,멋진밥상 흥양농협 햅쌀 20kg,2019-12-14 17:40:00,2019-12-14 18:00:00,20.0,58900,51303000.0


In [19]:
# 자료형 변경
test6['방송시작'] = pd.to_datetime(test6['방송시작'])
test6['방송종료'] = pd.to_datetime(test6['방송종료'])
test6.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13279 entries, 0 to 2041
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   마더코드    13279 non-null  int64         
 1   상품코드    13279 non-null  int64         
 2   상품군     13279 non-null  object        
 3   상품명     13279 non-null  object        
 4   방송시작    13279 non-null  datetime64[ns]
 5   방송종료    13279 non-null  datetime64[ns]
 6   노출(분)   13279 non-null  float64       
 7   판매단가    13279 non-null  int64         
 8   취급액     13279 non-null  float64       
dtypes: datetime64[ns](2), float64(2), int64(3), object(2)
memory usage: 1.0+ MB


## 저장

In [20]:
# 최종 확인
test6

Unnamed: 0,마더코드,상품코드,상품군,상품명,방송시작,방송종료,노출(분),판매단가,취급액
0,100000,200000,이미용,엘로엘 아쿠아클린 마스크,2019-08-26 00:20:00,2019-08-26 01:20:00,60.0,79900,47611000.0
0,100000,200000,이미용,엘로엘 아쿠아클린 마스크,2019-08-30 01:00:00,2019-08-30 01:55:00,55.0,79900,28951000.0
1,100001,200001,주방,국내생산 스텐락 심플 스텐밀폐용기 17종,2019-01-25 16:00:00,2019-01-25 17:00:00,60.0,99000,87062000.0
1,100001,200001,주방,국내생산 스텐락 심플 스텐밀폐용기 17종,2019-02-01 13:00:00,2019-02-01 14:00:00,60.0,99000,64274000.0
2,100002,200002,농수축,이보은의 우삽겹 12팩세트,2019-10-29 17:00:00,2019-10-29 18:00:00,60.0,49900,128483000.0
...,...,...,...,...,...,...,...,...,...
2041,100849,202513,농수축,멋진밥상 흥양농협 햅쌀 20kg,2019-11-14 16:20:00,2019-11-14 17:20:00,60.0,58900,129387000.0
2041,100849,202513,농수축,멋진밥상 흥양농협 햅쌀 20kg,2019-12-05 15:00:00,2019-12-05 15:20:00,20.0,58900,52542000.0
2041,100849,202513,농수축,멋진밥상 흥양농협 햅쌀 20kg,2019-12-11 15:00:00,2019-12-11 15:20:00,20.0,58900,45331000.0
2041,100849,202513,농수축,멋진밥상 흥양농협 햅쌀 20kg,2019-12-14 17:40:00,2019-12-14 18:00:00,20.0,58900,51303000.0


In [23]:
test6.to_excel('./data/시간분리데이터(노출추가).xlsx')