### Introduction

Objectives of this notebook on Nomand dataset are as follows:
- Visulization the quality issues of Nomand dataset
- Implement a basic KF to fusing two sensors
- Compare the performance of the KF with the original data

### Visulization

In [None]:
# open csv

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# read csv
df_ = pd.read_csv(r'..\Data\Noman_logging\logging_mar3_gps.csv')

In [None]:
df_

Unnamed: 0,id,project_id,train_id,box_id,valid,latitude,longitude,altitude,speed,heading,timestamp
0,1040114929,4,12,2,0,0.000000,0.000000,0.000,0.00000,0.0000,2023-12-01 00:00:01
1,1040114930,4,40,1,1,54.970610,-1.604899,0.000,1517.87510,83.4600,2023-12-01 00:00:01
2,1040114931,4,12,2,0,0.000000,0.000000,0.000,0.00000,0.0000,2023-12-01 00:00:02
3,1040114932,4,40,1,1,54.970610,-1.604899,0.000,1517.87510,83.4600,2023-12-01 00:00:02
4,1040114933,4,40,1,1,54.970610,-1.604899,0.000,1520.88830,84.1700,2023-12-01 00:00:03
...,...,...,...,...,...,...,...,...,...,...,...
5037022,1045154844,4,27,2,1,50.059956,2.872531,118.593,282.98880,353.3864,2023-12-08 11:49:34
5037023,1045154845,4,17,1,1,50.705273,2.722421,19.207,286.51678,107.8355,2023-12-08 11:49:34
5037024,1045154846,4,14,2,1,52.181060,4.590192,4.800,78.18840,53.3200,2023-12-08 11:49:34
5037025,1045154847,4,21,1,1,50.876434,1.824230,26.800,242.48160,338.9800,2023-12-08 11:49:34


In [None]:
# add date
df_['timestamp'] = pd.to_datetime(df_['timestamp'])
df_['date'] = df_['timestamp'].dt.normalize()

# QC
## exclude invalid data
df = df_[df_['valid'] != 0]

In [7]:
import plotly.graph_objs as go
from ipywidgets import widgets, Output, VBox, HBox
from IPython.display import display
import plotly.io as pio
pio.mapbox_access_token = 'pk.eyJ1IjoibGl1aDg4NiIsImEiOiJjbGNoY21pcjcwbXU4M29xdTJvN3E2cDl0In0.J-SctLThqdB2fZhbXTmF-g'

# Assuming df is your DataFrame and it's already defined with the required columns

# Widgets for selecting Train ID, Box ID, and Date
train_id_widget = widgets.SelectMultiple(
    options=df['train_id'].unique(),
    description='Train ID:',
    disabled=False
)

box_id_widget = widgets.SelectMultiple(
    options=df['box_id'].unique(),
    description='Box ID:',
    disabled=False
)

date_widget = widgets.SelectMultiple(
    options=df['date'].unique(),
    description='Date:',
    disabled=False
)

# Output widget for the plots
out = Output()

# Initialize the FigureWidget with layout adjustments for side-by-side plots
placeholder_fig = go.FigureWidget(
    layout=go.Layout(
        width=1500,  # Set the overall width of the figure
        height=500,  # Set the overall height of the figure
        margin=dict(t=50, l=50, b=50, r=50),  # Adjust margins to prevent clipping
        mapbox=dict(  # Configure the geolocation plot domain
            domain=dict(x=[0, 0.30], y=[0, 1]),
            center=dict(lat=51.8, lon=2),
            style='open-street-map',  # Using OpenStreetMap style
            zoom=5,  # Adjust the zoom level as needed
        ),
        xaxis=dict(  # Configure the domain for the speed plot's x-axis
            domain=[0.35, 1],
            title='Time'
        ),
        yaxis=dict(  # Configure the domain for the speed plot's y-axis
            domain=[0, 1],
            title='Speed'
        )
    )
)

placeholder_fig.add_trace(go.Scattermapbox(
    lat=[],  # Latitude data will be set in the plot_data function
    lon=[],  # Longitude data will be set in the plot_data function
    mode='lines+markers',  # Display markers for each point
))

# Add a Scatter trace for the speed plot data
placeholder_fig.add_trace(go.Scatter(
    x=[],  # X data (Time) will be set in the plot_data function
    y=[],  # Y data (Speed) will be set in the plot_data function
    xaxis='x',  # Link this trace to the speed plot's x-axis
    yaxis='y',  # Link this trace to the speed plot's y-axis
    mode='lines',  # Display lines and markers
))

# Display the placeholder figure in the output widget
with out:
    out.clear_output()
    display(placeholder_fig)

def plot_data(train_ids, box_ids, dates):
    filtered_df = df[df['train_id'].isin(train_ids) & df['box_id'].isin(box_ids) & df['date'].isin(dates)]
    
    if filtered_df.empty:
        with out:
            print("No data to display for the selected filters.")
        return
    
    # Sort the filtered DataFrame by timestamp
    filtered_df = filtered_df.sort_values(by='timestamp')

    # Update geolocation plot in the placeholder figure
    placeholder_fig.data[0].lat = filtered_df['latitude']
    placeholder_fig.data[0].lon = filtered_df['longitude']
    
    # Update time chart of speed in the placeholder figure
    placeholder_fig.data[1].x = filtered_df['timestamp']
    placeholder_fig.data[1].y = filtered_df['speed']


# Bind the widgets to the plotting function
interactive_plot = widgets.interactive_output(plot_data, {
    'train_ids': train_id_widget,
    'box_ids': box_id_widget,
    'dates': date_widget
})

# Setup layout for the widgets and the output container
widget_box = HBox([train_id_widget, box_id_widget, date_widget])  # Arrange widgets horizontally
display(VBox([widget_box, out]))  # Display widgets above the output container


In [46]:
df.dtypes

id                     int64
project_id             int64
train_id               int64
box_id                 int64
valid                  int64
latitude             float64
longitude            float64
altitude             float64
speed                float64
heading              float64
timestamp     datetime64[ns]
date                  object
dtype: object

In [39]:
df.date

0          2023-12-01
1          2023-12-01
2          2023-12-01
3          2023-12-01
4          2023-12-01
              ...    
5037022    2023-12-08
5037023    2023-12-08
5037024    2023-12-08
5037025    2023-12-08
5037026    2023-12-08
Name: date, Length: 5037027, dtype: object

In [59]:
# Filter by train_id
train_filtered = df[(df['train_id'] == 12) & (df['box_id'] == 2)]

# Finally, check the date
print(train_filtered['date'] == pd.Timestamp('2023-12-01'))


0           True
2           True
5           True
6           True
8           True
           ...  
5035922    False
5036189    False
5036432    False
5036714    False
5036986    False
Name: date, Length: 146638, dtype: bool
