In [14]:
import os
import pandas as pd
from ucimlrepo import fetch_ucirepo 

## Room Detection

In [15]:
id_col = 'id'
time_col = 'date'
target_col = 'Occupancy'

In [16]:
dataset_handle = 'OccupancyDetection'
dataset_name = 'occupancy_detection'
processed_dir = './../../processed/'
output_dir = f'./../../processed/{dataset_name}/'
os.makedirs(output_dir, exist_ok=True)
full_outp_fname = os.path.join(output_dir, f'{dataset_name}.csv')
test_key_outp_fname = os.path.join(output_dir, f'{dataset_name}_test_key.csv')
train_outp_fname = os.path.join(output_dir, f'{dataset_name}_train.csv')
test_outp_fname = os.path.join(output_dir, f'{dataset_name}_test.csv')

## Download dataset

In [17]:
# fetch dataset 
occupancy_detection = fetch_ucirepo(id=357) 
  
# data (as pandas dataframes) 
X = occupancy_detection.data.features 
y = occupancy_detection.data.targets 

data = X.copy()
data[target_col] = y
data = data.sort_values(by=[time_col])

## Insert id column

In [18]:
data.insert(0, id_col, 0)

In [19]:
data.dropna(inplace=True)

## Train/Test split

In [20]:
size = data.shape[0]
test_split = 0.2

train_size = int(size * (1 - test_split))
test_size = size - train_size

train = data.iloc[:train_size]
test = data.iloc[train_size:]

test_key = test[[id_col, target_col]]
test = test.drop(target_col, axis=1)


In [21]:
data.to_csv(full_outp_fname, index=False)
train.to_csv(train_outp_fname, index=False)
test.to_csv(test_outp_fname, index=False)
test_key.to_csv(test_key_outp_fname, index=False)

In [23]:
data

Unnamed: 0,id,date,Temperature,Humidity,Light,CO2,HumidityRatio,Occupancy
8144,0,2015-02-02 14:19:00,23.7,26.272,585.2,749.2,0.00476416302416414,1.0
8145,0,2015-02-02 14:19:59,23.718,26.29,578.4,760.4,0.00477266099212519,1.0
8146,0,2015-02-02 14:21:00,23.73,26.23,572.666666666667,769.666666666667,0.00476515255246541,1.0
8147,0,2015-02-02 14:22:00,23.7225,26.125,493.75,774.75,0.00474377335599685,1.0
8148,0,2015-02-02 14:23:00,23.754,26.2,488.6,779,0.00476659399998615,1.0
...,...,...,...,...,...,...,...,...
20557,0,2015-02-18 09:15:00,20.815,27.7175,429.75,1505.25,0.00421296819328694,1.0
20558,0,2015-02-18 09:16:00,20.865,27.745,423.5,1514.5,0.00423026193160229,1.0
20559,0,2015-02-18 09:16:59,20.89,27.745,423.5,1521.5,0.00423681810140671,1.0
20560,0,2015-02-18 09:17:59,20.89,28.0225,418.75,1632,0.0042794854718673,1.0
