In [2]:
import pandas as pd
import numpy as np
import datetime
import matplotlib.pyplot as plt

In [64]:
min_date = datetime.datetime.strptime('2022-03-07 14:48:00', '%Y-%m-%d %H:%M:%S')
max_date = datetime.datetime.strptime('2022-04-15 15:15:00', '%Y-%m-%d %H:%M:%S')

endpoints = ['107', '120', '121', '124', '134', '199', '190']
out = '196'
h_out = '181'
cols = ['PM1', 'PM2.5', 'PM10']

In [67]:
dates = pd.date_range(min_date, max_date, freq='min').strftime('%Y-%m-%d %H:%M:%S').tolist()

In [66]:
dts = []
for date in dates:
    dts.append(datetime.datetime.strptime(date, '%Y-%m-%d %H:%M:%S'))

In [68]:
dfs = []

for endpoint in endpoints:
    src = f'../datasets/indoor_particles/csv/particle{endpoint}.csv'
    print('[INFO] src: ' + src)
    df = pd.read_csv(src)
    tmp_dts = []
    for time in df['DATE'].values:
        time = time[:16]
        tmp_dts.append(datetime.datetime.strptime(time, '%Y-%m-%d %H:%M'))
    df['DATE'] = tmp_dts
    df = df.drop_duplicates('DATE', keep='last')
    df.index = df['DATE']
    if endpoint != '190':
        dfs.append(df.drop(columns=['DATE']))
    else:
        dfs[4] = pd.concat([dfs[4], df.drop(columns=['DATE'])], axis=0)
        dfs[4].sort_index(inplace=True)
        dfs[4].drop(dfs[4][dfs[4].index.duplicated(keep='last')].index, inplace=True)

[INFO] src: ../datasets/indoor_particles/csv/particle107.csv
[INFO] src: ../datasets/indoor_particles/csv/particle120.csv
[INFO] src: ../datasets/indoor_particles/csv/particle121.csv
[INFO] src: ../datasets/indoor_particles/csv/particle124.csv
[INFO] src: ../datasets/indoor_particles/csv/particle134.csv
[INFO] src: ../datasets/indoor_particles/csv/particle199.csv
[INFO] src: ../datasets/indoor_particles/csv/particle190.csv


In [69]:
for idx, df in enumerate(dfs):
    dfs[idx] = df[(df.index > min_date) & (df.index < max_date)]

In [71]:
pm1 = np.zeros(len(dates))
pm25 = np.zeros(len(dates))
pm10 = np.zeros(len(dates))

avg_df = pd.DataFrame(data={'PM1': pm1, 'PM2.5': pm25, 'PM10': pm10}, index=dts)

In [72]:
for dt in avg_df.index.values:
    for df in dfs:
        avg_df[avg_df.index == dt] += df[df.index == dt]
avg_df /= 5

In [73]:
avg_df = avg_df.dropna()

In [75]:
h_out_src = '../datasets/indoor_particles/csv/particle181.csv'
out_src = '../datasets/indoor_particles/csv/particle196.csv'
h_out_df = pd.read_csv(h_out_src)
out_df = pd.read_csv(out_src)

In [76]:
h_out_dts = []
out_dts = []
for dt in h_out_df['DATE']:
    dt = dt[:16]
    h_out_dts.append(datetime.datetime.strptime(dt, '%Y-%m-%d %H:%M'))

for dt in out_df['DATE']:
    dt = dt[:16]
    out_dts.append(datetime.datetime.strptime(dt, '%Y-%m-%d %H:%M'))

h_out_df['DATE'] = h_out_dts
h_out_df = h_out_df.drop_duplicates('DATE', keep='last')
h_out_df.index = h_out_df['DATE']
h_out_df.drop(columns=['DATE'], inplace=True)

out_df['DATE'] = out_dts
out_df = out_df.drop_duplicates('DATE', keep='last')
out_df.index = out_df['DATE']
out_df.drop(columns=['DATE'], inplace=True)

In [77]:
h_out_df = h_out_df[(h_out_df.index > min_date) & (h_out_df.index < max_date)]
out_df = out_df[(out_df.index > min_date) & (out_df.index < max_date)]

In [100]:
h_out_df.columns = ['PM1_H_OUT', 'PM2.5_H_OUT', 'PM10_H_OUT']
out_df.columns = ['PM1_OUT', 'PM2.5_OUT', 'PM10_OUT']

In [103]:
total_df = pd.concat([avg_df, h_out_df, out_df], join='outer', axis=1).fillna(0)

In [119]:
total_df['PERSON_NUMBER'] = np.zeros(total_df.shape[0])
total_df['AIR_PURIFIER'] = np.zeros(total_df.shape[0])
total_df['WINDOW'] = np.zeros(total_df.shape[0])

In [124]:
def trim_df(df):
    tmp_dts = []
    for time in df['DATE'].values:
        tmp_dts.append(datetime.datetime.strptime(time, '%Y-%m-%d %H:%M:%S'))
    df['DATE'] = tmp_dts
    df.drop_duplicates('DATE', keep='last', inplace=True)
    df.index = df['DATE']
    df.drop(columns=['DATE'], inplace=True)
    return df

In [143]:
person_df = trim_df(pd.read_csv('../datasets/inout/person.csv'))
window_df = trim_df(pd.read_csv('../datasets/inout/window.csv'))
ap_df = trim_df(pd.read_csv('../datasets/inout/air_purifier.csv'))

In [167]:
total_df[total_df['WINDOW'] < 0]

Unnamed: 0,PM1,PM2.5,PM10,PM1_H_OUT,PM2.5_H_OUT,PM10_H_OUT,PM1_OUT,PM2.5_OUT,PM10_OUT,PERSON_NUMBER,AIR_PURIFIER,WINDOW


In [147]:
size = len(person_df.index.values)
pn_df = total_df['PERSON_NUMBER']

for idx, dt in enumerate(person_df.index.values):
    if idx == size - 1:
        pn_df[pn_df.index > dt] = person_df.iloc[idx]['PERSON_NUMBER']
    else:
        pn_df[(pn_df.index > dt) & (pn_df.index < person_df.index.values[idx+1])] = person_df.iloc[idx]['PERSON_NUMBER']

In [161]:
size = len(ap_df.index.values)
ap = total_df['AIR_PURIFIER']

for idx, dt in enumerate(ap_df.index.values):
    if idx == size - 1:
        ap[ap.index > dt] = 1 if ap_df.iloc[idx]['ACTIVITY'] == 'On' else 0
    else:
        ap[(ap.index > dt) & (ap.index < ap_df.index.values[idx+1])] = 1 if ap_df.iloc[idx]['ACTIVITY'] == 'On' else 0

In [164]:
size = len(window_df.index.values)
win = total_df['WINDOW']

for idx, dt in enumerate(window_df.index.values):
    if idx == size - 1:
        win[win.index > dt] = 1 if window_df.iloc[idx]['ACTIVITY'] == 'Opened' else 0
    else:
        win[(win.index > dt) & (win.index < window_df.index.values[idx+1])] = 1 if window_df.iloc[idx]['ACTIVITY'] == 'Opened' else 0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  win[(win.index > dt) & (win.index < window_df.index.values[idx+1])] = 1 if window_df.iloc[idx]['ACTIVITY'] == 'Opened' else 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  win[win.index > dt] = 1 if window_df.iloc[idx]['ACTIVITY'] == 'Opened' else 0


In [170]:
total_df.to_csv('~/Desktop/particles_inout.csv', index_label='DATE')

In [171]:
pd.read_csv('~/Desktop/particles_inout.csv')

Unnamed: 0,DATE,PM1,PM2.5,PM10,PM1_H_OUT,PM2.5_H_OUT,PM10_H_OUT,PM1_OUT,PM2.5_OUT,PM10_OUT,PERSON_NUMBER,AIR_PURIFIER,WINDOW
0,2022-03-07 14:49:00,8.0,12.4,13.8,0.0,0.0,0.0,21.0,31.0,32.0,0.0,0.0,0.0
1,2022-03-07 14:50:00,7.8,12.0,12.2,4.0,8.0,8.0,20.0,28.0,30.0,0.0,0.0,0.0
2,2022-03-07 14:51:00,8.2,12.6,13.0,5.0,6.0,7.0,22.0,32.0,34.0,0.0,0.0,0.0
3,2022-03-07 14:52:00,8.2,12.8,13.6,4.0,7.0,7.0,19.0,31.0,34.0,0.0,0.0,0.0
4,2022-03-07 14:53:00,7.8,11.4,12.0,4.0,8.0,9.0,21.0,31.0,34.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
56181,2022-04-15 15:10:00,18.4,28.6,31.6,10.0,18.0,19.0,19.0,31.0,34.0,5.0,0.0,1.0
56182,2022-04-15 15:11:00,19.6,30.2,32.6,10.0,16.0,16.0,18.0,27.0,27.0,5.0,0.0,1.0
56183,2022-04-15 15:12:00,20.0,30.0,33.4,10.0,15.0,17.0,20.0,28.0,32.0,5.0,0.0,1.0
56184,2022-04-15 15:13:00,19.6,30.6,32.6,10.0,17.0,17.0,16.0,25.0,26.0,5.0,0.0,1.0
