In [None]:
import tensorflow as tf
import os
from tensorflow.keras import layers
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime
from time import time

In [None]:
class SensorActions:
    def __init__(self):
        self.columns = ['site_id', 'floor_id', 'floor', 'x_waypoint', 'y_waypoint', 
                        'timestamp', 'type', 'x', 'y' , 'z' ]
        self.extra_columns = ['xbias', 'ybias', 'zbias', 'accuracy']
        self.sensor_cols = ['x','y', 'z']
        self.sensor_extra_cols = self.extra_columns
        self.num_cols = 10
        self.max_slen = 0
        self.df = None
    
    def set_max_len(self, data):
        self.max_slen = max(map(len, data))
    
    def convert_to_df(self, data):
        if self.max_slen > self.num_cols:
            self.columns += self.extra_columns
        self.df = pd.DataFrame(data, columns=self.columns)
        if self.max_slen > self.num_cols:  # if uncalibrated sensor include xbias , ybias, zbias, accuracy
            #rectify unknown column
            rectify_col = ['TYPE_ACCELEROMETER', 'TYPE_MAGNETIC_FIELD', 'TYPE_GYROSCOPE', 'TYPE_ROTATION_VECTOR']
            self.df.loc[self.df['type'].isin(rectify_col),'accuracy'] = self.df.loc[self.df['type'].isin(rectify_col),'xbias']
            self.df.loc[self.df['type'].isin(rectify_col),'xbias'] = np.nan
            self.df.fillna(value=np.nan, inplace=True)
        self.df['timestamp'] = pd.to_datetime(self.df['timestamp'].astype('int64'), unit='ms')
        self.df.set_index(['site_id', 'floor_id', 'timestamp', 'floor', 'x_waypoint', 'y_waypoint'], inplace=True)  
        #sort by timestamp and site_id and floor
        self.df.sort_index(ascending=True, inplace=True)
    
    def sensor_data_by_ts(self):
        if self.max_slen > self.num_cols:  # if uncalibrated sensor include xbias , ybias, zbias
            self.sensor_cols += self.sensor_extra_cols
        self.df = self.df.pivot_table(values=self.sensor_cols, index=self.df.index.values,
                                  columns='type', aggfunc='first').reset_index()
        self.df.columns = [s1 + '_' + s2  for (s1, s2) in self.df.columns.tolist()]
        self.df[['site_id', 'floor_id', 'timestamp','floor', 'x_waypoint', 'y_waypoint']] = pd.DataFrame(self.df.index_.tolist(), index=self.df.index)
        self.df.set_index(['site_id','timestamp'], inplace=True)
        self.df.drop(columns='index_', inplace=True)
        return self.df
    
    def convert_to_numeric(self):
        #convert object dtype to float64 for processing
        for col in self.df.columns:
            if col not in ['floor', 'floor_id']:
                self.df[col] = pd.to_numeric(self.df[col], errors='coerce')
        return self.df
    
    def store_data(self, filename):
        self.df.to_csv(os.path.join(BUFFER_DIR, 'sensor_{}.csv'.format(filename)))
        
class WIFIActions():
    def __init__(self):
        self.df = None
        self.cols = ['site_id', 'floor_id','floor',
                     'waypoint_x', 'waypoint_y', 'timestamp', 'type',
                     'ssid', 'pssid', 'rssi', 'network_type', 'last_seen']
    def convert_to_df(self, data):
        self.df = pd.DataFrame(data, columns=self.cols)

    def store_data(self, filename):
        self.df.to_csv(os.path.join(BUFFER_DIR, 'wifi_{}.csv'.format(filename)))

class BEACONActions():
    def __init__(self):
        self.df = None
        self.cols = ['site_id', 'floor_id','floor', 'waypoint_x', 'waypoint_y', 'timestamp',
                      'type', 'uuid', 'minor_id' , 'major_id' ,'broadcasting_power', 'rssi',
                     'distance', 'mac_id','loopkup_timestamp']
    def convert_to_df(self, data):
        self.df = pd.DataFrame(data, columns=self.cols)
    def store_data(self, filename):
        self.df.to_csv(os.path.join(BUFFER_DIR, 'beacon_{}.csv'.format(filename)))

In [None]:
SOURCE_DIR = '/kaggle/input/indoor-location-navigation'
BUFFER_DIR = '/kaggle/working'
TRAIN = 'train'
METADATA = 'metadata'
MAX_FILES = 10
ANALYSIS_MODE = False
SENSOR_DATA_MODE = True
WIFI_DATA_MODE = True
BEACON_DATA_MODE = True
SITE_MAP_MODE = False

import re

floor_regex = re.compile('#\s+SiteID:(\w+)\s+SiteName:(.+)\s+FloorId:(\w+)\s+FloorName:(.+)')
device_regex = re.compile('#\s+Brand:(\w+)\s+Model:(.+)\s+AndroidName:([\d\.]+)\s+APILevel:(\d+)')
hardware_regex = re.compile('#\s+type:(\d+)\s+name:(.*)\s+version:(\w+)\s+vendor:(\w+)\s+resolution:([\w.]+)\s+power:([\d.]+)\s+maximumRange:([\d.]+)')
sensor_type = ['TYPE_ACCELEROMETER', 'TYPE_MAGNETIC_FIELD', 'TYPE_GYROSCOPE', 'TYPE_ROTATION_VECTOR',
              'TYPE_MAGNETIC_FIELD_UNCALIBRATED', 'TYPE_GYROSCOPE_UNCALIBRATED', 'TYPE_ACCELEROMETER_UNCALIBRATED']
sensor_nav_regex = re.compile('|'.join(sensor_type))
sensor_data = []
beacon_data = []
wifi_data = []

#from files we can assume multiple sensor reading after a waypoint are associated with waypoint
# add waypoint co-ordinates and floor as per time interval
# so the user arrives at a waypoint and then takes sensor reading

num_floor = 0
num_file = 0
site_map = []
BACTH_LIMIT = 500 # files
SAMPLE_SITE_ID = '5cd56b5ae2acfd2d33b58546'

def process_sensor_data(data, site_id):
    sa = SensorActions()
    sa.set_max_len(data)
    sa.convert_to_df(data)
    sa.sensor_data_by_ts()
    sa.convert_to_numeric()
    sa.store_data(site_id)
    del sa # free memory

def process_wifi_data(data, site_id):
    wa = WIFIActions()
    wa.convert_to_df(data)
    wa.store_data(site_id)
    del wa

def process_beacon_data(data, site_id):
    ba = BEACONActions()
    ba.convert_to_df(data)
    ba.store_data(site_id)
    del ba

for (root,dirs,files) in os.walk(os.path.join(*[SOURCE_DIR, TRAIN, SAMPLE_SITE_ID]), topdown=True):
    if files:
        floor_level = root.split('/')[-1] # this is floor level F1, F2, B1
        for detailf in files:
            num_file = num_file + 1
            if num_file % 1000 == 0:# memory limit
                process_sensor_data(sensor_data, SAMPLE_SITE_ID or site_id)
                sensor_data = [] #free memory
                process_wifi_data(wifi_data, SAMPLE_SITE_ID or site_id)
                wifi_data = [] #free memory
                process_beacon_data(beacon_data, SAMPLE_SITE_ID or site_id)
                beacon_data = [] #free memory
                print('processed {} files'.format(num_file))

            if num_file % BACTH_LIMIT == 0 and not ANALYSIS_MODE:
                print('storing data...')
                process_sensor_data()

            if num_file == MAX_FILES and ANALYSIS_MODE and not SITE_MAP_MODE: # stop while analysis
                continue
            with open(os.path.join(root, detailf), 'r') as f:
                line_no = 0
                waypoint_x = None # make waypoint varaible global for the file
                waypoint_y = None
                for line in f:
                    line_no = line_no + 1
                    if line.startswith('#'):
                        if 'Time' in line or 'Version' in line:
                            continue # skip start and end timestamp
                        elif 'FloorName' in line:
                            floor_details = floor_regex.search(line)
                            if floor_details:
                                site_id = floor_details.group(1)
                                site_name = floor_details.group(2)
                                floor_id = floor_details.group(3)
                                floor_name = floor_details.group(4)
                                if SITE_MAP_MODE:
                                    site_map.append([site_id, floor_id, floor_level])
                                    break
                        elif 'Brand' in line:
                            device_details = device_regex.search(line)
                            if device_details:
                                device_brand = device_details.group(1)
                                device_model = device_details.group(2)
                                andriod_ver = device_details.group(3)
                                device_api = device_details.group(4)
                        else:
                            hardware_details = hardware_regex.search(line)
                            if hardware_details:
                                hardware_type = hardware_details.group(1)
                                hardware_name = hardware_details.group(2)
                                hardware_version = hardware_details.group(3)
                                hardware_vendor = hardware_details.group(4)
                                hardware_resolution = hardware_details.group(5)
                                hardware_power = hardware_details.group(6)
                                hardware_max_range = hardware_details.group(7)
                    elif 'TYPE_WAYPOINT' in line:
                            waypoint = line.replace('\n', '').split('\t')
                            if len(waypoint) == 4:
                                waypoint_x = waypoint[2]
                                waypoint_y = waypoint[3]
                    elif sensor_nav_regex.search(line) and SENSOR_DATA_MODE:
                        #skip records where '\n' character is missing leading to multiple records in same line
                        if len(line.strip().split('\t')) > 9:
                            continue
                        # timestamp, type, x, y , z ,xbias, ybias, zbias, accuracy
                        sensor_data.append([site_id, floor_id, floor_level]+ [waypoint_x, waypoint_y] + line.strip().split('\t'))
                    elif 'TYPE_BEACON' in line and BEACON_DATA_MODE:
                        #handle 1 beacon and wifi data in one line
                        if len(re.findall('TYPE_BEACON', line)) == 1 and 'TYPE_WIFI' in line:
                            beacon_line, wifi_line = line.strip().split('TYPE_WIFI')
                            beacon_line, wifi_line = beacon_line.strip(), wifi_line.strip()
                            # restructure becon line
                            loopkup_timestamp = wifi_line.split('\t')[-1]
                            beacon_data.append([site_id, floor_id, floor_level]+ [waypoint_x, waypoint_y] + beacon_line.split('\t') + [loopkup_timestamp])
                            # restructure wifi line
                            timestamp = beacon_line.replace('\n', '').split('\t')[0]
                            wifi_data.append([site_id, floor_id, floor_level] + [waypoint_x, waypoint_y] + [timestamp] + wifi_line.split('\t')[:len(wifi_line.split('\t')) - 1])

                        elif len(re.findall('TYPE_BEACON', line)) == 1 and 'TYPE_WIFI' not in line:
                            # timestamp, type, uuid, minor_id , major_id , broadcasting_power, rssi, distance, mac_id, loopkup_timestamp
                            beacon_data.append([site_id, floor_id, floor_level]+ [waypoint_x, waypoint_y] + line.strip().split('\t'))
                        else:
                            # TODO handle multiple becon data with multiple wifi data in one line
                            pass
                    elif 'TYPE_WIFI' in line and WIFI_DATA_MODE:
                        if len(re.findall('TYPE_WIFI', line)) == 1:
                            # timestamp, type, wifi1, wifi2, latitute, longitude
                            wifi_data.append([site_id, floor_id, floor_level] + [waypoint_x, waypoint_y]+ line.strip().split('\t'))

#save remainder data
if sensor_data and not ANALYSIS_MODE:
    process_sensor_data(sensor_data, SAMPLE_SITE_ID or site_id)
    sensor_data = [] #free memory
    process_wifi_data(wifi_data, SAMPLE_SITE_ID or site_id)
    wifi_data = [] #free memory
    process_beacon_data(beacon_data, SAMPLE_SITE_ID or site_id)
    beacon_data = [] #free memory

In [None]:
sensor_df = pd.read_csv(os.path.join(BUFFER_DIR, 'sensor_{}.csv'.format(SAMPLE_SITE_ID)))
sensor_df['timestamp'] = pd.to_datetime(sensor_df['timestamp'])
sensor_df.set_index(['site_id', 'timestamp'], inplace=True)
sensor_df.head(2)

In [None]:
wifi_df = pd.read_csv(os.path.join(BUFFER_DIR, 'wifi_{}.csv'.format(SAMPLE_SITE_ID)))
wifi_df.head(2)

In [None]:
beacon_df = pd.read_csv(os.path.join(BUFFER_DIR, 'beacon_{}.csv'.format(SAMPLE_SITE_ID)))
beacon_df.head(2)

In [None]:
sensor_df.info()

In [None]:
if SITE_MAP_MODE:
    site_map_df = pd.DataFrame(site_map, columns=['site_id', 'floor_id', 'floor'])
    site_map_df.info()
    site_map_df.groupby('site_id').count()
#!os.remove("/kaggle/working/indoor-location-nav_5cd56b5ae2acfd2d33b58546.csv")

In [None]:
#there are Three predictions to be made:
#1) floor level (B1, F1) 
#2) site_path (floor_id + site_id) where site_id is provided in test files
#3) waypoints
# There are three dataset given
#1) sensor_dataset
#2) wifi_dataset
#3) becon_dataset
# I will be using sensor_dataset for waypoints
# wifi_dataset for site_id
# becon_dataset for floor level

#analysing sensor_dataset for waypoints
print(sensor_df['floor'][0])
sample_floor_id = sensor_df['floor_id'][0]
all_record_axis = ['x', 'y', 'z']
sensors = ['TYPE_GYROSCOPE', 'TYPE_ACCELEROMETER', 'TYPE_MAGNETIC_FIELD', 'TYPE_ROTATION_VECTOR', 'waypoint']
fig, ax  = plt.subplots(len(sensors),1,figsize=(15,15), sharex=True)
color = ['green', 'blue', 'red']
for s_no, s in enumerate(sensors):
    for a_no, a in enumerate(all_record_axis):
        if s == 'waypoint' and a == 'z':
            continue
        rolmean = sensor_df[sensor_df['floor_id'] == sample_floor_id]['{}_{}'.format(a, s)].resample('1000ms', level='timestamp').mean() #every second data
        ax[s_no].plot(rolmean, color=color[a_no])
    ax[s_no].set_xlabel('Timestamp')
    ax[s_no].set_ylabel(s) 
    

    locator = mdates.MinuteLocator(interval=2)
    ax[s_no].xaxis.set_major_locator(locator)
    formatter = mdates.DateFormatter('%H:%M:%S')
    ax[s_no].xaxis.set_major_formatter(formatter)

plt.xticks(rotation=70)
plt.grid(True)
plt.show()

In [None]:
# as analysing only one site drop site id from index
sensor_df.reset_index(level=0, drop=True, inplace=True)
sensor_df['timestamp'] = sensor_df.index
sensor_df.sort_index(inplace=True)
floor_df = sensor_df.groupby('floor').agg({'timestamp': ['first','last']})
floor_df = floor_df.reset_index()
floor_df.columns = ['floor', 'entry', 'exit']
floor_df['total_time_spent'] = floor_df['exit'] - floor_df['entry']
floor_df.sort_values(by='entry')
# customer navigated from floor F3 -> F4 -> F2 -> F1-> B1
# customer spend most time on floor F1

In [None]:
wp_df = sensor_df.groupby(['floor', 'x_waypoint', 'y_waypoint']).size().reset_index(name='counts')
print('there are total {} waypoints in site {}'.format(wp_df.shape[0], SAMPLE_SITE_ID))

In [None]:
floor_wp_map = {}
for floor in sensor_df['floor'].unique():
    wp_f_df = sensor_df[sensor_df['floor'] == floor].groupby(['x_waypoint', 'y_waypoint']).size().reset_index(name='counts')
    floor_wp_map[floor] = wp_f_df.shape[0]
    print('floor {} has {} waypoints'.format(floor, wp_f_df.shape[0]))
    
fig, ax = plt.subplots(figsize=(10, 10))
ax.bar(floor_wp_map.keys(), floor_wp_map.values())
ax.set_xlabel('floor')
ax.set_ylabel('number of waypoints')
plt.show()

In [None]:
#sensor records per waypoint
fig, axs = plt.subplots(5, 1, figsize=(10, 20))

for i, floor in enumerate(sensor_df['floor'].unique()):
    wp_f_df = sensor_df[sensor_df['floor'] == floor].groupby(['x_waypoint', 'y_waypoint']).size().reset_index(name='sensor_record_counts')
    wp_f_df['waypoint'] = wp_f_df['x_waypoint'].astype(str) + ',' + wp_f_df['y_waypoint'].astype(str)
    wp_f_df.plot(x='waypoint', y='sensor_record_counts', ax=axs[i], kind='bar', rot=75)

fig.tight_layout()
plt.show()

In [None]:
#analysis of single waypoint
sample_x_waypoint = sensor_df['x_waypoint'][1]
sample_y_waypoint = sensor_df['y_waypoint'][1]

sample_waypoint_df = sensor_df[(sensor_df['floor_id'] == sample_floor_id) &
          np.isclose(sensor_df['x_waypoint'], sample_x_waypoint) &
          np.isclose(sensor_df['y_waypoint'], sample_y_waypoint)]

sample_waypoint_df.reset_index(level=0, drop=True, inplace=True)

fig, ax  = plt.subplots(len(sensors),1,figsize=(15,15))
color = ['green', 'blue', 'red']
for s_no, s in enumerate(sensors):
    for a_no, a in enumerate(all_record_axis):
        if s == 'waypoint' and a == 'z':
            continue
        ax[s_no].plot(sample_waypoint_df['{}_{}'.format(a, s)], color=color[a_no])
    ax[s_no].set_xlabel('Seconds')
    ax[s_no].set_ylabel(s) 
    

    locator = mdates.SecondLocator(interval=1)
    ax[s_no].xaxis.set_major_locator(locator)
    formatter = mdates.DateFormatter('%S')
    ax[s_no].xaxis.set_major_formatter(formatter)
    
plt.grid(True)
plt.show()

In [None]:
#analysis of customer waypoint on floor F1 image
for (root,dirs,files) in os.walk(os.path.join(*[SOURCE_DIR, METADATA, SAMPLE_SITE_ID, 'F1']), topdown=True):
    print(files)