# Data Fusion project

## Load dataset

In [56]:
import numpy as np
import pandas as pd

# Load dataset
dataframe = pd.read_csv('joaq.csv')
dataframe

Unnamed: 0,entity_id,state,last_updated
0,app_memory,0.022,2023-01-27 00:02:53.470979
1,battery_power,0.0,2023-01-27 00:02:53.473223
2,last_update_trigger,SensorWorker,2023-01-27 00:02:53.477684
3,light_sensor,86,2023-01-27 00:02:53.479552
4,active_notification_count,1,2023-01-27 00:02:53.485171
...,...,...,...
9972,light_sensor,255,2023-01-30 10:54:42.332498
9973,pressure_sensor,1028.7,2023-01-30 10:54:42.334350
9974,app_memory,0.015,2023-01-30 11:01:38.050349
9975,battery_temperature,21.4,2023-01-30 11:01:38.052370


In [57]:
dataframe['last_updated'] = pd.to_datetime(dataframe['last_updated']).dt.round('15min')
dataframe = dataframe.groupby(['last_updated', 'entity_id']).agg({'state': 'max'}).reset_index()
dataframe

Unnamed: 0,last_updated,entity_id,state
0,2023-01-27 00:00:00,active_notification_count,1
1,2023-01-27 00:00:00,app_memory,0.022
2,2023-01-27 00:00:00,battery_power,0.0
3,2023-01-27 00:00:00,last_update_trigger,SensorWorker
4,2023-01-27 00:00:00,light_sensor,86
...,...,...,...
2970,2023-01-30 11:00:00,battery_temperature,21.5
2971,2023-01-30 11:00:00,bluetooth_connection,1
2972,2023-01-30 11:00:00,last_update_trigger,android.bluetooth.device.action.ACL_CONNECTED
2973,2023-01-30 11:00:00,light_sensor,255


In [58]:
print(dataframe.info())
dataframe.dtypes

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2975 entries, 0 to 2974
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   last_updated  2975 non-null   datetime64[ns]
 1   entity_id     2975 non-null   object        
 2   state         2975 non-null   object        
dtypes: datetime64[ns](1), object(2)
memory usage: 69.9+ KB
None


last_updated    datetime64[ns]
entity_id               object
state                   object
dtype: object

In [59]:
dataframe['entity_id'].value_counts()

app_memory                    221
battery_power                 219
battery_temperature           199
battery_level                 188
pressure_sensor               166
last_update_trigger           165
active_notification_count     164
light_sensor                  133
device_tracker                124
total_rx_gb                   106
doze_mode                     106
screen_brightness             105
app_rx_gb                     105
interactive                   101
proximity_sensor               92
mobile_rx_gb                   91
last_used_app                  90
device_locked                  87
steps_sensor                   77
detected_activity              76
total_tx_gb                    73
mobile_tx_gb                   63
app_tx_gb                      61
wifi_signal_strength           20
wifi_link_speed                20
volume_level_accessibility     17
volume_level_music             17
wifi_frequency                 14
bluetooth_connection            9
wifi_connectio

## Data manipulation

In [60]:
# get all unique sensors
sensor_columns = dataframe['entity_id'].unique()
sensor_columns

array(['active_notification_count', 'app_memory', 'battery_power',
       'last_update_trigger', 'light_sensor', 'pressure_sensor',
       'app_rx_gb', 'app_tx_gb', 'battery_level', 'battery_temperature',
       'detected_activity', 'device_locked', 'device_tracker',
       'doze_mode', 'interactive', 'last_used_app', 'proximity_sensor',
       'screen_brightness', 'steps_sensor', 'total_rx_gb', 'total_tx_gb',
       'wifi_frequency', 'wifi_link_speed', 'wifi_signal_strength',
       'headphones', 'volume_level_accessibility', 'volume_level_call',
       'volume_level_music', 'music_active', 'mobile_rx_gb',
       'mobile_tx_gb', 'network_type', 'wifi_connection', 'audio_mode',
       'phone_state', 'volume_level_system', 'bluetooth_connection',
       'bluetooth_state', 'battery_state', 'charger_type', 'is_charging',
       'battery_health', 'ble_transmitter', 'do_not_disturb_sensor',
       'mobile_data', 'power_save', 'screen_off_timeout', 'speakerphone',
       'wifi_state'], dtype

### Create new dataframe

In [61]:
# create new dataframe with sensors as column headers
df = pd.DataFrame(columns=sensor_columns)
# insert timestamp column as index
df.insert(0, 'timestamp', dataframe['last_updated'].unique())
df = df.set_index('timestamp')
df

Unnamed: 0_level_0,active_notification_count,app_memory,battery_power,last_update_trigger,light_sensor,pressure_sensor,app_rx_gb,app_tx_gb,battery_level,battery_temperature,...,charger_type,is_charging,battery_health,ble_transmitter,do_not_disturb_sensor,mobile_data,power_save,screen_off_timeout,speakerphone,wifi_state
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-01-27 00:00:00,,,,,,,,,,,...,,,,,,,,,,
2023-01-27 00:15:00,,,,,,,,,,,...,,,,,,,,,,
2023-01-27 00:30:00,,,,,,,,,,,...,,,,,,,,,,
2023-01-27 00:45:00,,,,,,,,,,,...,,,,,,,,,,
2023-01-27 01:00:00,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-01-30 10:00:00,,,,,,,,,,,...,,,,,,,,,,
2023-01-30 10:15:00,,,,,,,,,,,...,,,,,,,,,,
2023-01-30 10:30:00,,,,,,,,,,,...,,,,,,,,,,
2023-01-30 10:45:00,,,,,,,,,,,...,,,,,,,,,,


In [62]:
print(df.info())
df.dtypes

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 222 entries, 2023-01-27 00:00:00 to 2023-01-30 11:00:00
Data columns (total 49 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   active_notification_count   0 non-null      object
 1   app_memory                  0 non-null      object
 2   battery_power               0 non-null      object
 3   last_update_trigger         0 non-null      object
 4   light_sensor                0 non-null      object
 5   pressure_sensor             0 non-null      object
 6   app_rx_gb                   0 non-null      object
 7   app_tx_gb                   0 non-null      object
 8   battery_level               0 non-null      object
 9   battery_temperature         0 non-null      object
 10  detected_activity           0 non-null      object
 11  device_locked               0 non-null      object
 12  device_tracker              0 non-null      object
 13  doze_mode    

active_notification_count     object
app_memory                    object
battery_power                 object
last_update_trigger           object
light_sensor                  object
pressure_sensor               object
app_rx_gb                     object
app_tx_gb                     object
battery_level                 object
battery_temperature           object
detected_activity             object
device_locked                 object
device_tracker                object
doze_mode                     object
interactive                   object
last_used_app                 object
proximity_sensor              object
screen_brightness             object
steps_sensor                  object
total_rx_gb                   object
total_tx_gb                   object
wifi_frequency                object
wifi_link_speed               object
wifi_signal_strength          object
headphones                    object
volume_level_accessibility    object
volume_level_call             object
v

### Fill new dataframe

In [63]:
for i, row in dataframe.iterrows():
    df.at[row['last_updated'], row['entity_id']] = row['state']

df

Unnamed: 0_level_0,active_notification_count,app_memory,battery_power,last_update_trigger,light_sensor,pressure_sensor,app_rx_gb,app_tx_gb,battery_level,battery_temperature,...,charger_type,is_charging,battery_health,ble_transmitter,do_not_disturb_sensor,mobile_data,power_save,screen_off_timeout,speakerphone,wifi_state
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-01-27 00:00:00,1,0.022,0.0,SensorWorker,86,1014.6,,,,,...,,,,,,,,,,
2023-01-27 00:15:00,1,0.023,0.0,io.homeassistant.companion.android.UPDATE_SENSORS,6,1014.6,0.0202,0.0097,69,20.1,...,,,,,,,,,,
2023-01-27 00:30:00,3,0.024,0.0,io.homeassistant.companion.android.UPDATE_SENSORS,62,1014.7,0.0204,,67,30.8,...,,,,,,,,,,
2023-01-27 00:45:00,1,0.023,0.0,io.homeassistant.companion.android.UPDATE_SENSORS,8,1014.7,0.021,0.0099,62,28.9,...,,,,,,,,,,
2023-01-27 01:00:00,1,0.021,0.0,android.os.action.DEVICE_IDLE_MODE_CHANGED,,1014.6,,,57,30.6,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-01-30 10:00:00,2,0.018,0.0,android.os.action.DEVICE_IDLE_MODE_CHANGED,920,1029.0,0.0546,0.0268,31,27.3,...,,,,,,,,,,
2023-01-30 10:15:00,2,0.018,0.0,android.os.action.DEVICE_IDLE_MODE_CHANGED,7,1029.2,0.0548,0.0269,25,25.8,...,,,,,,,,,,
2023-01-30 10:30:00,2,0.017,0.0,android.os.action.DEVICE_IDLE_MODE_CHANGED,246,1028.7,0.055,0.027,23,23.6,...,,,,,,,,,,
2023-01-30 10:45:00,1,0.017,0.0,android.intent.action.TIME_TICK,45,1029.1,0.0552,0.0271,20,22.6,...,,,,,,,,,,


In [64]:
print(df.info())
df.dtypes

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 222 entries, 2023-01-27 00:00:00 to 2023-01-30 11:00:00
Data columns (total 49 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   active_notification_count   164 non-null    object
 1   app_memory                  221 non-null    object
 2   battery_power               219 non-null    object
 3   last_update_trigger         165 non-null    object
 4   light_sensor                133 non-null    object
 5   pressure_sensor             166 non-null    object
 6   app_rx_gb                   105 non-null    object
 7   app_tx_gb                   61 non-null     object
 8   battery_level               188 non-null    object
 9   battery_temperature         199 non-null    object
 10  detected_activity           76 non-null     object
 11  device_locked               87 non-null     object
 12  device_tracker              124 non-null    object
 13  doze_mode    

active_notification_count     object
app_memory                    object
battery_power                 object
last_update_trigger           object
light_sensor                  object
pressure_sensor               object
app_rx_gb                     object
app_tx_gb                     object
battery_level                 object
battery_temperature           object
detected_activity             object
device_locked                 object
device_tracker                object
doze_mode                     object
interactive                   object
last_used_app                 object
proximity_sensor              object
screen_brightness             object
steps_sensor                  object
total_rx_gb                   object
total_tx_gb                   object
wifi_frequency                object
wifi_link_speed               object
wifi_signal_strength          object
headphones                    object
volume_level_accessibility    object
volume_level_call             object
v

In [65]:
# check missing values in variables
df.isnull().sum()

active_notification_count      58
app_memory                      1
battery_power                   3
last_update_trigger            57
light_sensor                   89
pressure_sensor                56
app_rx_gb                     117
app_tx_gb                     161
battery_level                  34
battery_temperature            23
detected_activity             146
device_locked                 135
device_tracker                 98
doze_mode                     116
interactive                   121
last_used_app                 132
proximity_sensor              130
screen_brightness             117
steps_sensor                  145
total_rx_gb                   116
total_tx_gb                   149
wifi_frequency                208
wifi_link_speed               202
wifi_signal_strength          202
headphones                    218
volume_level_accessibility    205
volume_level_call             216
volume_level_music            205
music_active                  216
mobile_rx_gb  

In [66]:
unparsed_column = []
for column in df:
    try:
        df[f'{column}'] = df[f'{column}'].astype(np.float64)
    except Exception as e:
        unparsed_column.append(column)
        print(column)

print(f'# of failed parses: {len(unparsed_column)}')

last_update_trigger
detected_activity
device_locked
device_tracker
doze_mode
interactive
last_used_app
headphones
music_active
network_type
wifi_connection
audio_mode
phone_state
bluetooth_state
battery_state
charger_type
is_charging
battery_health
ble_transmitter
do_not_disturb_sensor
mobile_data
power_save
screen_off_timeout
speakerphone
wifi_state
# of failed parses: 25


In [67]:
df

Unnamed: 0_level_0,active_notification_count,app_memory,battery_power,last_update_trigger,light_sensor,pressure_sensor,app_rx_gb,app_tx_gb,battery_level,battery_temperature,...,charger_type,is_charging,battery_health,ble_transmitter,do_not_disturb_sensor,mobile_data,power_save,screen_off_timeout,speakerphone,wifi_state
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-01-27 00:00:00,1.0,0.022,0.0,SensorWorker,86.0,1014.6,,,,,...,,,,,,,,,,
2023-01-27 00:15:00,1.0,0.023,0.0,io.homeassistant.companion.android.UPDATE_SENSORS,6.0,1014.6,0.0202,0.0097,69.0,20.1,...,,,,,,,,,,
2023-01-27 00:30:00,3.0,0.024,0.0,io.homeassistant.companion.android.UPDATE_SENSORS,62.0,1014.7,0.0204,,67.0,30.8,...,,,,,,,,,,
2023-01-27 00:45:00,1.0,0.023,0.0,io.homeassistant.companion.android.UPDATE_SENSORS,8.0,1014.7,0.0210,0.0099,62.0,28.9,...,,,,,,,,,,
2023-01-27 01:00:00,1.0,0.021,0.0,android.os.action.DEVICE_IDLE_MODE_CHANGED,,1014.6,,,57.0,30.6,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-01-30 10:00:00,2.0,0.018,0.0,android.os.action.DEVICE_IDLE_MODE_CHANGED,920.0,1029.0,0.0546,0.0268,31.0,27.3,...,,,,,,,,,,
2023-01-30 10:15:00,2.0,0.018,0.0,android.os.action.DEVICE_IDLE_MODE_CHANGED,7.0,1029.2,0.0548,0.0269,25.0,25.8,...,,,,,,,,,,
2023-01-30 10:30:00,2.0,0.017,0.0,android.os.action.DEVICE_IDLE_MODE_CHANGED,246.0,1028.7,0.0550,0.0270,23.0,23.6,...,,,,,,,,,,
2023-01-30 10:45:00,1.0,0.017,0.0,android.intent.action.TIME_TICK,45.0,1029.1,0.0552,0.0271,20.0,22.6,...,,,,,,,,,,


In [68]:
print(df.info())
df.dtypes

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 222 entries, 2023-01-27 00:00:00 to 2023-01-30 11:00:00
Data columns (total 49 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   active_notification_count   164 non-null    float64
 1   app_memory                  221 non-null    float64
 2   battery_power               219 non-null    float64
 3   last_update_trigger         165 non-null    object 
 4   light_sensor                133 non-null    float64
 5   pressure_sensor             166 non-null    float64
 6   app_rx_gb                   105 non-null    float64
 7   app_tx_gb                   61 non-null     float64
 8   battery_level               188 non-null    float64
 9   battery_temperature         199 non-null    float64
 10  detected_activity           76 non-null     object 
 11  device_locked               87 non-null     object 
 12  device_tracker              124 non-null    object 
 13

active_notification_count     float64
app_memory                    float64
battery_power                 float64
last_update_trigger            object
light_sensor                  float64
pressure_sensor               float64
app_rx_gb                     float64
app_tx_gb                     float64
battery_level                 float64
battery_temperature           float64
detected_activity              object
device_locked                  object
device_tracker                 object
doze_mode                      object
interactive                    object
last_used_app                  object
proximity_sensor              float64
screen_brightness             float64
steps_sensor                  float64
total_rx_gb                   float64
total_tx_gb                   float64
wifi_frequency                float64
wifi_link_speed               float64
wifi_signal_strength          float64
headphones                     object
volume_level_accessibility    float64
volume_level