# Data Load

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
from tqdm import tqdm
import os

In [2]:
_dir = '_19'
_file_ls = os.listdir(_dir)
_file_ls

['_1901_freezer_log.csv',
 '_1901_suhp_log.csv',
 '_1901_suth_log.csv',
 '_1902_freezer_log.csv',
 '_1902_suhp_log.csv',
 '_1902_suth_log.csv',
 '_1903_freezer_log.csv',
 '_1903_suhp_log.csv',
 '_1903_suth_log.csv']

In [3]:
# 1901
freez1 = pd.read_csv(os.path.join(_dir, _file_ls[0]))
suhp1 = pd.read_csv(os.path.join(_dir, _file_ls[1]))
freez1['gw_time'] = pd.to_datetime(freez1['gw_time'])
suhp1['gw_time'] = pd.to_datetime(suhp1['gw_time'])

In [4]:
freez1.head(3)

Unnamed: 0,equip_sensor_id,gw_time,status,fan_run,fan_trip,freez_run,freez_trip,defrost,ctrl_mode,ctrl_operation,operation,run_time,insert_dt
0,61,2019-01-01 00:00:39,1,1.0,0.0,1.0,0.0,0.0,1,1,1,0,2019-01-01 00:00:40
1,61,2019-01-01 00:01:10,1,1.0,0.0,1.0,0.0,0.0,1,1,1,1,2019-01-01 00:01:10
2,61,2019-01-01 00:01:40,1,1.0,0.0,1.0,0.0,0.0,1,1,1,1,2019-01-01 00:01:40


In [5]:
suhp1.head(3)

Unnamed: 0,equip_sensor_id,gw_time,status,color_list,avg_temperature,avg_humidity,temperature,humidity,battery,available,insert_dt
0,55,2019-01-01 00:00:38,1,"{""humidity"": ""green"", ""temperature"": ""green"", ...",-29.67,74.6,-29.67,74.6,92,1,2019-01-01 00:00:38
1,55,2019-01-01 00:01:08,1,"{""humidity"": ""green"", ""temperature"": ""green"", ...",-29.68,74.58,-29.68,74.58,92,1,2019-01-01 00:01:08
2,55,2019-01-01 00:01:38,1,"{""humidity"": ""green"", ""temperature"": ""green"", ...",-29.72,74.53,-29.72,74.53,92,1,2019-01-01 00:01:38


# freezer와 sensor의 unique

In [6]:
print('freezer ID: ',freez1['equip_sensor_id'].unique())
print('sensor ID: ',suhp1['equip_sensor_id'].unique())

freezer ID:  [ 61  62  63  64  65 281 287 308 314 320 326 344 362 368 377 383 392 410
 416 422 428 437 446 473 479 485 491]
sensor ID:  [ 55 266 275 302 338 371 404 467]


# freezer와 sensor의 ID별 데이터 개수

In [7]:
for elem in freez1['equip_sensor_id'].unique():
    print('freezer ID: {}, Data Len: {}'.format(elem,len(freez1[freez1['equip_sensor_id']==elem])))

freezer ID: 61, Data Len: 92763
freezer ID: 62, Data Len: 92763
freezer ID: 63, Data Len: 92763
freezer ID: 64, Data Len: 92763
freezer ID: 65, Data Len: 92763
freezer ID: 281, Data Len: 6531
freezer ID: 287, Data Len: 6531
freezer ID: 308, Data Len: 7023
freezer ID: 314, Data Len: 7023
freezer ID: 320, Data Len: 7023
freezer ID: 326, Data Len: 7023
freezer ID: 344, Data Len: 7100
freezer ID: 362, Data Len: 7100
freezer ID: 368, Data Len: 7100
freezer ID: 377, Data Len: 6509
freezer ID: 383, Data Len: 6509
freezer ID: 392, Data Len: 6509
freezer ID: 410, Data Len: 6972
freezer ID: 416, Data Len: 6972
freezer ID: 422, Data Len: 6608
freezer ID: 428, Data Len: 6608
freezer ID: 437, Data Len: 6608
freezer ID: 446, Data Len: 6608
freezer ID: 473, Data Len: 6683
freezer ID: 479, Data Len: 6683
freezer ID: 485, Data Len: 6683
freezer ID: 491, Data Len: 7121


In [8]:
for elem in suhp1['equip_sensor_id'].unique():
    print('sensor ID: {}, Data Len: {}'.format(elem,len(suhp1[suhp1['equip_sensor_id']==elem])))

sensor ID: 55, Data Len: 88390
sensor ID: 266, Data Len: 6489
sensor ID: 275, Data Len: 4063
sensor ID: 302, Data Len: 4067
sensor ID: 338, Data Len: 4318
sensor ID: 371, Data Len: 4022
sensor ID: 404, Data Len: 4213
sensor ID: 467, Data Len: 4114


In [9]:
# Sensor 55
suhp1['key'] = [x for x in range(len(suhp1))]
freez1['key'] = [None]*len(freez1)

In [10]:
# freezer data에 key mapping(15sec)
from tqdm import tqdm
for i in tqdm(range(len(suhp1))):
    freez1.loc[(suhp1.loc[i,'gw_time']-freez1['gw_time'] <=  dt.timedelta(seconds=15)) & (suhp1.loc[i,'gw_time']- freez1['gw_time'] >= dt.timedelta(seconds=0)), 'key'] = suhp1.loc[i,'key']

 47%|████▋     | 56310/119676 [19:15<21:40, 48.72it/s]


KeyboardInterrupt: 

In [None]:
# freezer data에 key mapping(15sec)
import multiprocessing

def work_func(i):
    freez1.loc[(s55.loc[i,'gw_time']-freez1['gw_time'] <=  dt.timedelta(seconds=15)) & (s55.loc[i,'gw_time']- freez1['gw_time'] >= dt.timedelta(seconds=0)), 'key'] == s55.loc[i,'key']
    return None

def process():
    num_cores = 8
    pool = multiprocessing.Pool(num_cores)
    pool.map(work_func, range(len(s55)))

process()

In [None]:
freez1

Unnamed: 0,equip_sensor_id,gw_time,status,fan_run,fan_trip,freez_run,freez_trip,defrost,ctrl_mode,ctrl_operation,operation,run_time,insert_dt,key
0,61,2019-01-01 00:00:39,1,1.0,0.0,1.0,0.0,0.0,1,1,1,0,2019-01-01 00:00:40,
1,61,2019-01-01 00:01:10,1,1.0,0.0,1.0,0.0,0.0,1,1,1,1,2019-01-01 00:01:10,
2,61,2019-01-01 00:01:40,1,1.0,0.0,1.0,0.0,0.0,1,1,1,1,2019-01-01 00:01:40,
3,61,2019-01-01 00:02:10,1,1.0,0.0,1.0,0.0,0.0,1,1,1,2,2019-01-01 00:02:11,
4,61,2019-01-01 00:02:40,1,1.0,0.0,1.0,0.0,0.0,1,1,1,2,2019-01-01 00:02:41,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
613337,491,2019-01-31 23:57:50,1,0.0,0.0,0.0,0.0,1.0,0,1,1,1437,2019-01-31 23:57:50,
613338,491,2019-01-31 23:58:21,1,0.0,0.0,0.0,0.0,1.0,0,1,1,1438,2019-01-31 23:58:21,
613339,491,2019-01-31 23:58:51,1,0.0,0.0,0.0,0.0,1.0,0,1,1,1438,2019-01-31 23:58:51,
613340,491,2019-01-31 23:59:21,1,0.0,0.0,0.0,0.0,1.0,0,1,1,1439,2019-01-31 23:59:21,


In [None]:
# freezer data에 key mapping(15sec)
import multiprocessing

def work_func(i):
    freez1.loc[(suhp1.loc[i,'gw_time']-freez1['gw_time'] <=  dt.timedelta(seconds=15)) & (suhp1.loc[i,'gw_time']- freez1['gw_time'] >= dt.timedelta(seconds=0)), 'key'] == suhp1.loc[i,'key']
    return None

def process():
    num_cores = 8
    pool = multiprocessing.Pool(num_cores)
    iteration = len(suhp1)
    with tqdm(total = iteration) as pbar:
        for _ in tqdm(pool.imap_unordered(work_func, range(iteration))):
            pbar.update

process()

  0%|          | 0/119676 [00:00<?, ?it/s]