# Indoor Location & Navigation - Basic EDA - Traces and Features Visualization

In [None]:
import os
import json

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Defined tools

Here is some defined visualization and data loading tools what will be used later. Feel free to use it if you like =)

Trace parsing script (function `load_trace_as_dataframe`) taken from [this post](https://www.kaggle.com/c/indoor-location-navigation/discussion/215381).  
Thanks [Tolga Dincer](https://www.kaggle.com/tolgadincer) for this =)

In [None]:
def load_trace_as_dataframe(filepath):
    # Returns trace dataframe sorted by timestamp
    
    names = ['time', 'type'] + [f'col_{i}' for i in range(1, 9)]
    
    trace_df = pd.read_csv(
        filepath, sep='\t', comment='#', header=None, names=names
    )
    
    trace_df.sort_values(by='time', inplace=True)
    trace_df.reset_index(drop=True, inplace=True)
    return trace_df


def extract_feature_df(trace_df, 
                       feature_name, 
                       col_names=('x', 'y', 'z', 'accuracy')):
    
    ''' 
    Extracts feature dataframe from trace dataframe by feature name.
    
    Suitable for features: 
    ----------------------
        TYPE_WAYPOINT, if set col_names=('x', 'y'),
        TYPE_ACCELEROMETER,
        TYPE_GYROSCOPE,
        TYPE_MAGNETIC_FIELD, 
        TYPE_ROTATION_VECTOR,
        
        TYPE_ACCELEROMETER_UNCALIBRATED, 
                    if set col_names=('x', 'y', 'z', 'x_2', 'y_2', 'z_2', 'accuracy'),
                    
        TYPE_GYROSCOPE_UNCALIBRATED, 
                    if set col_names=('x', 'y', 'z', 'x_2', 'y_2', 'z_2', 'accuracy'),
                    
        TYPE_MAGNETIC_FIELD_UNCALIBRATED, 
                    if set col_names=('x', 'y', 'z', 'x_2', 'y_2', 'z_2', 'accuracy')
    '''
    
    feature_df = trace_df[trace_df['type'] == feature_name].copy()
    for i, col in enumerate(col_names, start=1):
        feature_df[col] = feature_df[f'col_{i}'].astype('float64')
        
    feature_df.drop(columns=[f'col_{i}' for i in range(1, 9)], inplace=True)
    feature_df.drop(columns=['type'], inplace=True)
    feature_df.reset_index(drop=True, inplace=True)
    
    return feature_df


def load_points(filepath):
    # Takes the path to the trace file.
    # Returns pandas dataframe which consists of device locations 
    # as x and y coordinates (values from TYPE_WAYPOINT) and their timestamps.
    
    trace_df = load_trace_as_dataframe(filepath)
    points_df = extract_feature_df(
        trace_df, 'TYPE_WAYPOINT', col_names=('x', 'y')
    )
    
    return points_df


def visualize_many_traces_on_the_map(traces_dataframes, map_image, width, height, 
                                     traces_filenames=None, 
                                     figsize=None):
    
    
    '''
    Draws traces on the floor map.
    
    Parameters
    ----------
        traces_dataframes: list of pandas DataFrames
            Each DataFrame should consist of device locations as x and y 
            coordinates and their timestamps.

        map_image : numpy.array
            Image of floor map.

        width : float,
            Width of floor. Should be taken from floor_info.json

        height : float, 
            Height of floor. Should be taken from floor_info.json

        traces_filenames : list of strings, optional, default: None
            List of filenames. Used to display the legend. 
            There will be no legend if you pass traces_filenames=None

        figsize : (float, float), optional, default: None
            Size of the result image in terms of matplotlib.
    
    '''
    
    fig = plt.figure(figsize=figsize)
    ax = plt.subplot(111)

    plt.imshow(map_image, extent=[0, width, 0, height])

    if traces_filenames:
        
        for filename, points in zip(traces_filenames, traces_dataframes):
            plt.scatter(points['x'], points['y'], label=filename)
            plt.plot(points['x'], points['y'])
            
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    else:
        for points in traces_dataframes:
            plt.scatter(points['x'], points['y'])
            plt.plot(points['x'], points['y'])


    plt.show()

    
def visualize_single_trace_on_the_map(points_df, map_image, width, height, 
                                      scaling_coef=0.3, figsize=None):
    
    '''
    Draws single trace on the floor map.
    
    Parameters
    ----------
        points_df: pandas DataFrame
            Should consist of device locations as x and y 
            coordinates and their timestamps.

        map_image : numpy.array
            Image of floor map.

        width : float,
            Width of floor. Should be taken from floor_info.json

        height : float, 
            Height of floor. Should be taken from floor_info.json

        scaling_coef : float
            Scaling Coefficient. 

        figsize : (float, float), optional, default: None
            Size of the result image in terms of matplotlib.
    
    '''
    
    fig = plt.figure(figsize=figsize)
    ax = plt.subplot(111)

    plt.imshow(map_image, extent=[0, width, 0, height])
    plt.plot(points_df['x'], points_df['y'], linewidth=5, linestyle='-', color='blue')

    for i in range(len(points_df)):
        ax.text(
            points_df.loc[i, 'x'], points_df.loc[i, 'y'], i, 
            ha="center", size=15, 
            bbox=dict(boxstyle="circle, pad=0.3", 
                      fc="cyan", lw=2)
        )
 
    x_min, x_max = points_df['x'].min(), points_df['x'].max()
    y_min, y_max = points_df['y'].min(), points_df['y'].max()

    ax.set_xlim(x_min - scaling_coef*(x_max - x_min), x_max + scaling_coef*(x_max - x_min))
    ax.set_ylim(y_min - scaling_coef*(y_max - y_min), y_max + scaling_coef*(y_max - y_min))

    plt.show()

    
def plot_trace_features(feature_df, timestamps=None, figsize=None):
    
    '''
    Plots the trace features.
    
    Parameters
    ----------
        feature_df : pandas DataFrame
            Can be exctracted from trace dataframe 
            using extract_feature_df function.
        
        timestamps : array-like, optional, default: None
            Array of timestamps. 
            Used to mark timestamps on the chart in the form of vertical lines.
            Pass timestamps=None if you don't want to use this feature.

        figsize : (float, float), optional, default: None
            Size of the result image in terms of matplotlib.
    
    Suitable for features: 
    ----------------------
        TYPE_ACCELEROMETER 
        TYPE_GYROSCOPE 
        TYPE_MAGNETIC_FIELD 
        TYPE_ROTATION_VECTOR 
        TYPE_ACCELEROMETER_UNCALIBRATED 
        TYPE_GYROSCOPE_UNCALIBRATED 
        TYPE_MAGNETIC_FIELD_UNCALIBRATED
    '''
    
    fig = plt.figure(figsize=figsize)
    ax = plt.subplot(111)

    for col in ['x', 'y', 'z']:
        plt.plot(feature_df['time'], feature_df[col], label=col)


    if points_df is not None:
        xmin, xmax, ymin, ymax = plt.axis()

        for i, timestamp in enumerate(timestamps):
            plt.axvline(x=timestamp, c='k', ls='--')

            ax.text(
            timestamp, ymax, i, 
            ha="center", size=15, 
            bbox=dict(boxstyle="circle, pad=0.3", 
                      fc="white", lw=2)
            )
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.show()

# Let's take a look into data directory.

In [None]:
data_path = '/kaggle/input/indoor-location-navigation'
os.listdir(data_path)

* **train** directory contains sites (shopping malls) directories that consists of floor direcories. And each floor directory contains txt files, that have information about paths (smarthones).

* **metedata** directory contains floor map, its size and geo inforamation for each site and each floor.

In this competition we should predict smartphone location: floor number and x, y coordinates (TYPE_WAYPOINT). 


In [None]:
floor = '5a0546857ecc773753327266/F1'
floor_metadata_dir = os.path.join(data_path, 'metadata', floor)
floor_train_dir = os.path.join(data_path, 'train', floor)

os.listdir(floor_metadata_dir)

In [None]:
# os.listdir(os.path.join(data_path, 'train'))[:10]

In [None]:
# os.listdir(floor_train_dir)[:5]

# Loading coordinates of paths points

In [None]:
paths = []
for filename in os.listdir(floor_train_dir):
    if '.txt' not in filename:
        continue
    points = load_points(os.path.join(floor_train_dir, filename))
    paths.append((filename, points))

paths = sorted(paths, key=lambda path: len(path[1]), reverse=True)
paths = paths[:20]


traces_dataframes = [trace for filename, trace in paths]
traces_filenames = [filename for filename, trace in paths]

# Loading floor map and its size

In [None]:
MAP_IMAGE = plt.imread(
    os.path.join(floor_metadata_dir, 'floor_image.png')
)

with open(os.path.join(floor_metadata_dir, 'floor_info.json')) as f:
    content = f.read()
    floor_info = json.loads(content)

MAP_HEIGHT = float(floor_info['map_info']['height'])
MAP_WIDTH = float(floor_info['map_info']['width'])
    
floor_info

# Visualization of Traces (Paths) on the Floor Map

In [None]:
visualize_many_traces_on_the_map(
    traces_dataframes, MAP_IMAGE, MAP_WIDTH, MAP_HEIGHT, 
    traces_filenames=traces_filenames, 
    figsize=(15, 12)
)

# Features Visualization

Let's take a closer look at single trace (path) and its attributes.

In [None]:
filename='5e15b0171506f2000638fe49.txt'
trace_filepath = os.path.join(floor_train_dir, filename)

trace_df = load_trace_as_dataframe(trace_filepath)
trace_df

Here is a table of the attributes we need to deal with in this competition. 

As you can see it contains feature names and its params. We will describe some of these features in more detail below.

Each feature can be extracted from trace as dataframe with params as columns. For this we are using function `extract_feature_df` which is suitable for all features except TYPE_WIFI and TYPE_BEACON.

The table is taken from [competition's official github repo](https://github.com/location-competition/indoor-location-competition-20). (I've slightly modified it just for a more compact view).

| Feature		                    | Values |       |        |        |                   |        |           |                                   |
|:----------------------------------|:------:|:-----:|:------:|:-------|:-----------------:|:------:|:---------:|:---------------------------------:|
|TYPE_WAYPOINT                      |X axis  |Y axis |       |         |                   |        |           |                                   |
|TYPE_ACCELEROMETER                 |X axis  |Y axis |Z axis |accuracy |                   |        |           |                                   |
|TYPE_GYROSCOPE                     |X axis  |Y axis |Z axis |accuracy |                   |        |           |                                   | 
|TYPE_MAGNETIC_FIELD                |X axis  |Y axis |Z axis |accuracy |                   |        |           |                                   | 
|TYPE_ROTATION_VECTOR               |X axis  |Y axis |Z axis |accuracy |                   |        |           |                                   |
|TYPE_ACCELEROMETER_UNCALIBRATED    |X axis  |Y axis |Z axis |X axis   |Y axis             |Z axis  |accuracy   |                                   |
|TYPE_GYROSCOPE_UNCALIBRATED        |X axis  |Y axis |Z axis |X axis   |Y axis             |Z axis  |accuracy   |                                   |
|TYPE_MAGNETIC_FIELD_UNCALIBRATED   |X axis  |Y axis |Z axis |X axis   |Y axis             |Z axis  |accuracy   |                                   |
|TYPE_WIFI                          |ssid    |bssid  |RSSI   |frequency|last seen timestamp|        |           |                                   |
|TYPE_BEACON                        |UUID    |MajorID|MinorID|Tx Power |RSSI               |Distance|MAC Address|same with Unix time, padding data  |


# TYPE_WAYPOINT

In [None]:
points_df = extract_feature_df(trace_df, 'TYPE_WAYPOINT', col_names=('x', 'y'))
points_df

In [None]:
visualize_single_trace_on_the_map(
    points_df, MAP_IMAGE, MAP_WIDTH, MAP_HEIGHT, figsize=(10, 8)
)

# TYPE_ACCELEROMETER

In [None]:
acc_df = extract_feature_df(trace_df, 'TYPE_ACCELEROMETER')
acc_df

In [None]:
plot_trace_features(acc_df, points_df['time'], figsize=(20, 5))

In [None]:
sns.pairplot(acc_df[['x', 'y', 'z']])
plt.show()

# TYPE_GYROSCOPE

In [None]:
gyro_df = extract_feature_df(trace_df, 'TYPE_GYROSCOPE')
gyro_df

In [None]:
plot_trace_features(gyro_df, points_df['time'], figsize=(20, 5))

In [None]:
sns.pairplot(gyro_df[['x', 'y', 'z']])
plt.show()

# TYPE_MAGNETIC_FIELD

In [None]:
magn_df = extract_feature_df(trace_df, 'TYPE_MAGNETIC_FIELD')
magn_df

In [None]:
plot_trace_features(magn_df, points_df['time'], figsize=(20, 5))

In [None]:
sns.pairplot(magn_df[['x', 'y', 'z']])
plt.show()

# TYPE_ROTATION_VECTOR

In [None]:
rot_df = extract_feature_df(trace_df, 'TYPE_ROTATION_VECTOR')
rot_df

In [None]:
plot_trace_features(rot_df, points_df['time'], figsize=(20, 5))

In [None]:
sns.pairplot(rot_df[['x', 'y', 'z']])
plt.show()

# References
* [Trace Parsing Script](https://www.kaggle.com/c/indoor-location-navigation/discussion/215381)
* [Competition's Github Repository](https://github.com/location-competition/indoor-location-competition-20)

## Hope this notebook will be helpful for you. I wish you good fortune in the competition!