# analysis_example

This Notebook provides an example of how to analyse the REFIT 'observation_sensor_data.csv' file.

The file itself can be downloaded from here: https://repository.lboro.ac.uk/account/articles/21533127


## Setup

In [8]:
import pandas as pd
import os
import urllib.request
import zipfile

## Read in the survey data

In [13]:
df_survey=pd.read_csv('observation_survey_data.csv',
                      na_filter=False,  # so that empty string values remain as empty string (i.e. "")
                      parse_dates=['resultTime']  # converts to Pandas TimeStamp values
                      )
df_survey.head()

Unnamed: 0,id,type,madeBySensor,usedProcedure,hasFeatureOfInterest,observedProperty,hasSimpleResult,hasResult,value,unit,resultTime
0,Observation25312398,Observation,Researcher,SiteVisit,Building01,occupancyType,,Single_family_dwelling,,,2013-10-01 00:00:00+00:00
1,Observation25312399,Observation,Researcher,SiteVisit,Building01,builtFormType,,Detached_house_or_bungalow,,,2013-10-01 00:00:00+00:00
2,Observation25312400,Observation,Researcher,SiteVisit,Building01,orientation,,,327.0,DEG,2013-10-01 00:00:00+00:00
3,Observation25312401,Observation,Researcher,SiteVisit,Building01,wallTypeMainBuilding,,Masonry-Boxwall-Cavity,,,2013-10-01 00:00:00+00:00
4,Observation25312402,Observation,Researcher,SiteVisit,Building01,wallAgeBandMainBuilding,,1975_-_1980,,,2013-10-01 00:00:00+00:00


In [15]:
df_survey.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2581 entries, 0 to 2580
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype              
---  ------                --------------  -----              
 0   id                    2581 non-null   object             
 1   type                  2581 non-null   object             
 2   madeBySensor          2581 non-null   object             
 3   usedProcedure         2581 non-null   object             
 4   hasFeatureOfInterest  2581 non-null   object             
 5   observedProperty      2581 non-null   object             
 6   hasSimpleResult       2581 non-null   object             
 7   hasResult             2581 non-null   object             
 8   value                 2581 non-null   object             
 9   unit                  2581 non-null   object             
 10  resultTime            2578 non-null   datetime64[ns, UTC]
dtypes: datetime64[ns, UTC](1), object(10)
memory usage: 221.9+ KB


## Read in the sensor data

Takes a minute or so as the file is 3.2 GB...

In [2]:
df=pd.read_csv('observation_sensor_data.csv',
               na_filter=False,  # so that empty string values remain as empty string (i.e. "")
               parse_dates=['resultTime']  # converts to Pandas TimeStamp values
              )
df

Unnamed: 0,id,type,madeBySensor,usedProcedure,hasFeatureOfInterest,observedProperty,value,unit,resultTime
0,Observation1,Observation,Onset_Hobo_U12_Sensor1,Procedure1,Space13,Air_temperature,17.772,DEG_C,2013-10-02 05:00:00+00:00
1,Observation2,Observation,Onset_Hobo_U12_Sensor1,Procedure1,Space13,Air_temperature,18.081,DEG_C,2013-10-02 05:30:00+00:00
2,Observation3,Observation,Onset_Hobo_U12_Sensor1,Procedure1,Space13,Air_temperature,18.176,DEG_C,2013-10-02 06:00:00+00:00
3,Observation4,Observation,Onset_Hobo_U12_Sensor1,Procedure1,Space13,Air_temperature,18.176,DEG_C,2013-10-02 06:30:00+00:00
4,Observation5,Observation,Onset_Hobo_U12_Sensor1,Procedure1,Space13,Air_temperature,18.105,DEG_C,2013-10-02 07:00:00+00:00
...,...,...,...,...,...,...,...,...,...
25312392,Observation25312393,Observation,RWE_Smoke_detector_Sensor1449,Procedure2359,Building16,Alarm,-10005.000,,2014-12-02 16:33:00+00:00
25312393,Observation25312394,Observation,RWE_Smoke_detector_Sensor1449,Procedure2359,Building16,Alarm,-10005.000,,2014-12-28 13:22:00+00:00
25312394,Observation25312395,Observation,RWE_Smoke_detector_Sensor1449,Procedure2359,Building16,Alarm,-10005.000,,2015-02-17 08:39:00+00:00
25312395,Observation25312396,Observation,RWE_Smoke_detector_Sensor1449,Procedure2359,Building16,Alarm,-10005.000,,2015-02-28 08:09:00+00:00


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25312397 entries, 0 to 25312396
Data columns (total 9 columns):
 #   Column                Dtype              
---  ------                -----              
 0   id                    object             
 1   type                  object             
 2   madeBySensor          object             
 3   usedProcedure         object             
 4   hasFeatureOfInterest  object             
 5   observedProperty      object             
 6   value                 float64            
 7   unit                  object             
 8   resultTime            datetime64[ns, UTC]
dtypes: datetime64[ns, UTC](1), float64(1), object(7)
memory usage: 1.7+ GB


## Example 1 - Find air temperatures measured by Hobo sensors in 2014

In [4]:
df[df.madeBySensor.str.startswith('Onset') 
   & (df.observedProperty=='Air_temperature')
   & (df.resultTime>=pd.Timestamp('2014-01-01T00:00:00Z'))
   & (df.resultTime<pd.Timestamp('2015-01-01T00:00:00Z'))
  ]

Unnamed: 0,id,type,madeBySensor,usedProcedure,hasFeatureOfInterest,observedProperty,value,unit,resultTime
4354,Observation4355,Observation,Onset_Hobo_U12_Sensor2,Procedure2,Space13,Air_temperature,17.677222,DEG_C,2014-01-01 00:00:00+00:00
4355,Observation4356,Observation,Onset_Hobo_U12_Sensor2,Procedure2,Space13,Air_temperature,17.937778,DEG_C,2014-01-01 00:30:00+00:00
4356,Observation4357,Observation,Onset_Hobo_U12_Sensor2,Procedure2,Space13,Air_temperature,18.247222,DEG_C,2014-01-01 01:00:00+00:00
4357,Observation4358,Observation,Onset_Hobo_U12_Sensor2,Procedure2,Space13,Air_temperature,17.486111,DEG_C,2014-01-01 01:30:00+00:00
4358,Observation4359,Observation,Onset_Hobo_U12_Sensor2,Procedure2,Space13,Air_temperature,17.011111,DEG_C,2014-01-01 02:00:00+00:00
...,...,...,...,...,...,...,...,...,...
8041986,Observation8041987,Observation,Onset_Hobo_pendant_Sensor425,Procedure425,Space151,Air_temperature,18.426000,DEG_C,2014-12-31 22:45:00+00:00
8041987,Observation8041988,Observation,Onset_Hobo_pendant_Sensor425,Procedure425,Space151,Air_temperature,18.426000,DEG_C,2014-12-31 23:00:00+00:00
8041988,Observation8041989,Observation,Onset_Hobo_pendant_Sensor425,Procedure425,Space151,Air_temperature,18.521000,DEG_C,2014-12-31 23:15:00+00:00
8041989,Observation8041990,Observation,Onset_Hobo_pendant_Sensor425,Procedure425,Space151,Air_temperature,18.616000,DEG_C,2014-12-31 23:30:00+00:00


## Example 2 - Find air temperatures measured by Hobo sensors in 2014 in Building01

In [16]:
building01_space_ids=\
    df_survey[(df_survey.observedProperty=='hasBuilding') & (df_survey.hasResult=='Building01')].hasFeatureOfInterest.values
building01_space_ids

array(['Space1', 'Space2', 'Space3', 'Space4', 'Space5', 'Space6',
       'Space7', 'Space8', 'Space9', 'Space10', 'Space11', 'Space12',
       'Space13', 'Space14', 'Space15', 'Space16', 'Space17', 'Space18',
       'Space325', 'Space346'], dtype=object)

In [17]:
df_result=df[df.madeBySensor.str.startswith('Onset') 
   & (df.observedProperty=='Air_temperature')
   & (df.resultTime>=pd.Timestamp('2014-01-01T00:00:00Z'))
   & (df.resultTime<pd.Timestamp('2015-01-01T00:00:00Z'))
   & (df.hasFeatureOfInterest.isin(building01_space_ids))
  ]
df_result

Unnamed: 0,id,type,madeBySensor,usedProcedure,hasFeatureOfInterest,observedProperty,value,unit,resultTime
4354,Observation4355,Observation,Onset_Hobo_U12_Sensor2,Procedure2,Space13,Air_temperature,17.677222,DEG_C,2014-01-01 00:00:00+00:00
4355,Observation4356,Observation,Onset_Hobo_U12_Sensor2,Procedure2,Space13,Air_temperature,17.937778,DEG_C,2014-01-01 00:30:00+00:00
4356,Observation4357,Observation,Onset_Hobo_U12_Sensor2,Procedure2,Space13,Air_temperature,18.247222,DEG_C,2014-01-01 01:00:00+00:00
4357,Observation4358,Observation,Onset_Hobo_U12_Sensor2,Procedure2,Space13,Air_temperature,17.486111,DEG_C,2014-01-01 01:30:00+00:00
4358,Observation4359,Observation,Onset_Hobo_U12_Sensor2,Procedure2,Space13,Air_temperature,17.011111,DEG_C,2014-01-01 02:00:00+00:00
...,...,...,...,...,...,...,...,...,...
586115,Observation586116,Observation,Onset_Hobo_pendant_Sensor43,Procedure43,Space1,Air_temperature,16.618000,DEG_C,2014-12-31 22:45:00+00:00
586116,Observation586117,Observation,Onset_Hobo_pendant_Sensor43,Procedure43,Space1,Air_temperature,16.713000,DEG_C,2014-12-31 23:00:00+00:00
586117,Observation586118,Observation,Onset_Hobo_pendant_Sensor43,Procedure43,Space1,Air_temperature,16.713000,DEG_C,2014-12-31 23:15:00+00:00
586118,Observation586119,Observation,Onset_Hobo_pendant_Sensor43,Procedure43,Space1,Air_temperature,16.713000,DEG_C,2014-12-31 23:30:00+00:00
