# Data Fusion project

## Load dataset

In [1]:
import numpy as np
import pandas as pd

# Load dataset
dataframe = pd.read_csv('dataset.csv')
dataframe

Unnamed: 0,entity_id,state,last_updated
0,device_tracker,,2023-01-20 15:47:16.741658
1,battery_level,46,2023-01-20 15:47:19.575981
2,battery_state,discharging,2023-01-20 15:47:19.639222
3,is_charging,off,2023-01-20 15:47:19.708801
4,charger_type,none,2023-01-20 15:47:19.774636
...,...,...,...
15234,app_memory,0.016,2023-01-25 09:42:35.545960
15235,battery_power,-1.7,2023-01-25 09:42:35.547854
15236,pressure_sensor,1026.3,2023-01-25 09:42:35.555762
15237,app_rx_gb,0.1487,2023-01-25 09:42:35.689290


In [2]:
print(dataframe.info())
dataframe.dtypes

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15239 entries, 0 to 15238
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   entity_id     15239 non-null  object
 1   state         14890 non-null  object
 2   last_updated  15239 non-null  object
dtypes: object(3)
memory usage: 357.3+ KB
None


entity_id       object
state           object
last_updated    object
dtype: object

In [3]:
dataframe['entity_id'].value_counts()

app_memory                    2006
battery_power                 1788
last_update_trigger           1554
light_sensor                  1157
interactive                    877
battery_temperature            851
pressure_sensor                848
screen_brightness              557
wifi_link_speed                555
steps_sensor                   492
active_notification_count      439
total_rx_gb                    389
detected_activity              381
proximity_sensor               323
device_locked                  293
battery_level                  293
last_used_app                  253
app_rx_gb                      228
total_tx_gb                    192
mobile_rx_gb                   165
volume_level_music             148
wifi_signal_strength           146
doze_mode                      139
volume_level_accessibility     137
app_tx_gb                      127
music_active                   119
wifi_frequency                 104
wifi_connection                 86
mobile_tx_gb        

## Data manipulation

In [4]:
# get all unique sensors
sensor_columns = dataframe['entity_id'].unique()
sensor_columns

array(['device_tracker', 'battery_level', 'battery_state', 'is_charging',
       'charger_type', 'battery_health', 'battery_temperature',
       'battery_power', 'detected_activity', 'app_rx_gb', 'app_tx_gb',
       'app_memory', 'audio_mode', 'headphones', 'music_active',
       'volume_level_call', 'volume_level_music', 'volume_level_system',
       'volume_level_accessibility', 'bluetooth_connection',
       'ble_transmitter', 'screen_brightness', 'screen_off_timeout',
       'do_not_disturb_sensor', 'accent_color', 'last_used_app',
       'last_update_trigger', 'light_sensor', 'wifi_connection',
       'wifi_link_speed', 'wifi_frequency', 'wifi_signal_strength',
       'network_type', 'speakerphone', 'bluetooth_state', 'device_locked',
       'mobile_data', 'wifi_state', 'active_notification_count',
       'phone_state', 'pressure_sensor', 'proximity_sensor',
       'steps_sensor', 'mobile_rx_gb', 'mobile_tx_gb', 'total_rx_gb',
       'total_tx_gb', 'interactive', 'doze_mode', 'pow

### Create new dataframe

In [5]:
# create new dataframe with sensors as column headers
df = pd.DataFrame(columns =[sensor_columns])
df.insert(0, 'last_updated', dataframe['last_updated'])
df

Unnamed: 0,last_updated,device_tracker,battery_level,battery_state,is_charging,charger_type,battery_health,battery_temperature,battery_power,detected_activity,...,pressure_sensor,proximity_sensor,steps_sensor,mobile_rx_gb,mobile_tx_gb,total_rx_gb,total_tx_gb,interactive,doze_mode,power_save
0,2023-01-20 15:47:16.741658,,,,,,,,,,...,,,,,,,,,,
1,2023-01-20 15:47:19.575981,,,,,,,,,,...,,,,,,,,,,
2,2023-01-20 15:47:19.639222,,,,,,,,,,...,,,,,,,,,,
3,2023-01-20 15:47:19.708801,,,,,,,,,,...,,,,,,,,,,
4,2023-01-20 15:47:19.774636,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15234,2023-01-25 09:42:35.545960,,,,,,,,,,...,,,,,,,,,,
15235,2023-01-25 09:42:35.547854,,,,,,,,,,...,,,,,,,,,,
15236,2023-01-25 09:42:35.555762,,,,,,,,,,...,,,,,,,,,,
15237,2023-01-25 09:42:35.689290,,,,,,,,,,...,,,,,,,,,,


### Fill new dataframe

In [6]:
for i, row in dataframe.iterrows():
    df.at[i, row['entity_id']] = row['state']

#df = df.set_index('last_updated')
df

Unnamed: 0,last_updated,device_tracker,battery_level,battery_state,is_charging,charger_type,battery_health,battery_temperature,battery_power,detected_activity,...,pressure_sensor,proximity_sensor,steps_sensor,mobile_rx_gb,mobile_tx_gb,total_rx_gb,total_tx_gb,interactive,doze_mode,power_save
0,2023-01-20 15:47:16.741658,,,,,,,,,,...,,,,,,,,,,
1,2023-01-20 15:47:19.575981,,46,,,,,,,,...,,,,,,,,,,
2,2023-01-20 15:47:19.639222,,,discharging,,,,,,,...,,,,,,,,,,
3,2023-01-20 15:47:19.708801,,,,off,,,,,,...,,,,,,,,,,
4,2023-01-20 15:47:19.774636,,,,,none,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15234,2023-01-25 09:42:35.545960,,,,,,,,,,...,,,,,,,,,,
15235,2023-01-25 09:42:35.547854,,,,,,,,-1.7,,...,,,,,,,,,,
15236,2023-01-25 09:42:35.555762,,,,,,,,,,...,1026.3,,,,,,,,,
15237,2023-01-25 09:42:35.689290,,,,,,,,,,...,,,,,,,,,,


In [7]:
print(df.info())
df.dtypes

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15239 entries, 0 to 15238
Data columns (total 51 columns):
 #   Column                         Non-Null Count  Dtype 
---  ------                         --------------  ----- 
 0   (last_updated,)                15239 non-null  object
 1   (device_tracker,)              0 non-null      object
 2   (battery_level,)               288 non-null    object
 3   (battery_state,)               31 non-null     object
 4   (is_charging,)                 31 non-null     object
 5   (charger_type,)                31 non-null     object
 6   (battery_health,)              16 non-null     object
 7   (battery_temperature,)         846 non-null    object
 8   (battery_power,)               1783 non-null   object
 9   (detected_activity,)           213 non-null    object
 10  (app_rx_gb,)                   223 non-null    object
 11  (app_tx_gb,)                   122 non-null    object
 12  (app_memory,)                  2001 non-null   object
 13  (

last_updated                  object
device_tracker                object
battery_level                 object
battery_state                 object
is_charging                   object
charger_type                  object
battery_health                object
battery_temperature           object
battery_power                 object
detected_activity             object
app_rx_gb                     object
app_tx_gb                     object
app_memory                    object
audio_mode                    object
headphones                    object
music_active                  object
volume_level_call             object
volume_level_music            object
volume_level_system           object
volume_level_accessibility    object
bluetooth_connection          object
ble_transmitter               object
screen_brightness             object
screen_off_timeout            object
do_not_disturb_sensor         object
accent_color                  object
last_used_app                 object
l

In [8]:
# check missing values in variables
df.isnull().sum()

last_updated                      0
device_tracker                15239
battery_level                 14951
battery_state                 15208
is_charging                   15208
charger_type                  15208
battery_health                15223
battery_temperature           14393
battery_power                 13456
detected_activity             15026
app_rx_gb                     15016
app_tx_gb                     15117
app_memory                    13238
audio_mode                    15219
headphones                    15211
music_active                  15123
volume_level_call             15194
volume_level_music            15094
volume_level_system           15239
volume_level_accessibility    15105
bluetooth_connection          15215
ble_transmitter               15220
screen_brightness             14685
screen_off_timeout            15239
do_not_disturb_sensor         15219
accent_color                  15239
last_used_app                 14988
last_update_trigger         

In [None]:
# df = df.astype({'device_tracker': str, 'battery_level': np.float64})
# df = df.astype(str)
# df.dtypes