## Imports

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import datetime
import tqdm

#### <span style="color:green">Plot functions</span>

In [2]:
variables_dict = {'speed': 'Velocidad', 'latitude':'Lat', 'longitude':'lng', 'accPosition': 'Presión del acelerador',
                  'accX': 'Aceleración en X', 'accY': 'Aceleración en Y', 'accZ': 'Aceleración en Z',
                  'magX': 'Fuerza magnética en X', 'magY': 'Fuerza magnética en Y', 'magZ': 'Fuerza magnética en Z',
                  'velAngX': 'Velocidad angular en X', 'velAngY': 'Velocidad angular en Y', 'velAngZ': 'Velocidad angular en Z'}
units_dict = {'speed': 'm/s', 'latitude':'°', 'longitude':'°', 'accPosition': '% de presión',
              'accX': 'm/s\u00B2', 'accY': 'm/s\u00B2', 'accZ': 'm/s\u00B2',
              'magX': '\u03BC T', 'magY': '\u03BC T', 'magZ': '\u03BC T',
              'velAngX': 'rad/seg', 'velAngY': 'rad/seg', 'velAngZ': 'rad/seg'}


def update_layout(fig, chart_title: str, **kwargs):
    """Update layout for every chart

    Args:
        fig: A plotly figure
        chart_title (str): A title for chart

    Returns:
        fig: the updated layout for the plotly figure
    """
    # Get kwards data if is needed
    x_title = kwargs.get('xaxis_title', None)
    y_title = kwargs.get('yaxis_title', None)
    height_size = kwargs.get('height', 450)

    # Update layout of the figure
    fig.update_layout(
        title={'text': chart_title, 'x': 0.5},
        legend_title='Tipo de evento',
        xaxis_title=x_title,
        yaxis_title=y_title,
        template='plotly_white',
        autosize=True,
        height=height_size,
        font=dict(
            family="BlinkMacSystemFont,-apple-system,Segoe UI,Roboto,Oxygen,Ubuntu, \
                    Cantarell,Fira Sans,Droid Sans,Helvetica Neue,Helvetica,Arial,sans-serif",
            size=14,
            color="#363636"
        )
    )

    return fig


def line_chart(df, variable):
    """Line chart created with plotly graphic object

    Args:
        df (DataFrame): The dataframe that contains the data to plot
        variable (str): The specific variable to plot

    Returns:
        fig: A plotly figure
    """
    # Create a copy of the data frame to avoid losing important data
    near_crash_df = df.copy()
    no_crash_df = df.copy()
    x = df['timestamp']
    # Filter colums for eventClass
    # Set all variables where have a normal events to NaN
    near_crash_df.loc[df['eventClass'] == 0, variable] = None
    # Set all variables where have a near-crash events to NaN
    no_crash_df.loc[df['eventClass'] == 1, variable] = None

    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=x,
        y=no_crash_df[variable],
        name='Sin evento',
        marker_color='#00D1B1',
        text=no_crash_df["id"],
        hovertemplate="X = %{x}<br>Y = %{y}<br>ID = %{text}"
    ))
    fig.add_trace(go.Scatter(
        x=x,
        y=near_crash_df[variable],
        name='Near-Crash',
        marker_color='#FF385F',
        text=near_crash_df["id"],
        hovertemplate="X = %{x}<br>Y = %{y}<br>ID = %{text}"
    ))

    fig = update_layout(fig, f'Gráfico de linea de la {variables_dict[variable]}',
                        xaxis_title='Marca de Tiempo', yaxis_title=units_dict[variable])
    fig.update_yaxes(nticks=12)
    fig.update_xaxes(nticks=12, tickangle=45)

    return fig


## Cargar datos

In [105]:
csv_name = "./data/smartphone_02-Feb-2022-19-57_aceleración repentina_Data-Muy3Q2oTXSwbX4Xkp8f.csv"
df = pd.read_csv(csv_name)
df.head(5)

Unnamed: 0,id,accX,accY,accZ,eventClass,idTrip,idVehicle,latitude,longitude,magX,magY,magZ,route,speed,timestamp,velAngX,velAngY,velAngZ
0,31054,0.474078,1.481456,9.672387,0,2,Kia Picanto (Xiaomi Redmi Note 9S),0.834703,-77.649896,30.0375,2.7,32.081253,Aceleración repentina,17.712,2022-02-02 19:57:31.294000-05:00,0.0,0.001465,-0.000533
1,31055,0.44357,1.491626,9.634102,0,2,Kia Picanto (Xiaomi Redmi Note 9S),0.834703,-77.649896,29.981251,2.5875,32.15625,Aceleración repentina,17.712,2022-02-02 19:57:31.343000-05:00,0.000533,-0.000399,-0.000799
2,31056,0.442074,1.474876,9.640682,0,2,Kia Picanto (Xiaomi Redmi Note 9S),0.834703,-77.649896,29.906252,2.68125,32.34375,Aceleración repentina,17.712,2022-02-02 19:57:31.393000-05:00,0.000399,-0.000266,-0.000133
3,31057,0.485145,1.476671,9.670592,0,2,Kia Picanto (Xiaomi Redmi Note 9S),0.834703,-77.649896,30.187502,2.64375,32.456253,Aceleración repentina,17.712,2022-02-02 19:57:31.444000-05:00,0.000133,0.001065,0.0
4,31058,0.441177,1.48355,9.630812,0,2,Kia Picanto (Xiaomi Redmi Note 9S),0.834703,-77.649896,30.243752,2.68125,32.606251,Aceleración repentina,17.712,2022-02-02 19:57:31.494000-05:00,0.0,-0.000932,-0.000799


## Limpieza de datos

Para estos han sido identificadas ciertas condiciones a satisfacer:

### Mostrar datos en crudo

In [92]:
print("RAW DATA")
variables = ["speed", "accPosition", "accX", "accY", "accZ", "velAngX", "velAngY", "velAngZ", "magX", "magY", "magZ"]
for var in variables:
    fig = line_chart(df, var)
    fig.show()

RAW DATA


### Eliminar o agregar eventos de near-crash
Primero necesitamos encajar los eventos de *near-crash* donde corresponden, eliminando o agregando en la columna **eventClass** un 
<span style="color:red">"1"</span> si corresponde a un evento o un <span style="color:cyan">"0"</span> si no corresponde a este.

In [94]:
min_ID = 7798  # ID to start near-crash
max_ID = 7840  # ID to end near-crash

df["eventClass"] = 0
df.loc[(df["id"] >= min_ID) & (df["id"] <= max_ID), "eventClass"] = 1
print("near crash event")
line_chart(df, "velAngZ")

near crash event


### Corregir tiempos y offset en algunas variables
Algunos variables cineamticas estan desplazadas verticalmente por lo tanto necesitamos corregir este desface.

In [96]:
max_standby = 7660
#min_standby = 6436
var_with_offset = "accY"


offset = df.loc[df['id'] <= max_standby, var_with_offset].mean()
#offset = df.loc[(df['id'] <= max_standby) & (df["id"] >=min_standby), var_with_offset].mean()
df[var_with_offset] = df[var_with_offset] - offset
print("offset correction: ", offset)
line_chart(df, var_with_offset)

offset correction:  0.6778172446221886


##### Desplazar grafica horizontalmente

In [19]:
# Variables to shift
comparing_variable = "accY"
shift_variables = ["speed", "accPosition"]
shift_periods = [-163, 85]
fill_value = [2, 5.88]

#for v, p, f in zip(shift_variables, shift_periods, fill_value):
#    df[v] = df[v].shift(periods=p, fill_value=f)

fig1 = line_chart(df, comparing_variable)
fig1.show()
for v in shift_variables:
    fig = line_chart(df, v)
    fig.show()

### Interpolar algunas variables

La interpolación nos permite tener un muestreo general en los datos, obteniendo por ejemplo mas puntos en la variable de la velocidad. *(Solo en el caso de la Raspberry para la velocidad)*

In [55]:
# No implemented in smartphone device
max_zone = 5946
min_zone = 5944

interest_point = 5945

var_to_interpolate = "accY"
df.loc[df["id"] == interest_point, var_to_interpolate] = None
df.loc[(df['id'] <= max_zone-1) & (df["id"] >=min_zone+1), var_to_interpolate] = None
df[var_to_interpolate].interpolate(method="from_derivatives", inplace=True)
line_chart(df, var_to_interpolate)

### <span style="color:violet">Check final results</span>

In [97]:
# All variables: speed	accX	accY	accZ	magX	magY	magZ	velAngX	velAngY	velAngZ
#                eventClass	idTrip	idVehicle	latitude	longitude	route	id	timestamp
variables = ["speed", "accPosition", "accX", "accY", "accZ", "velAngX", "velAngY", "velAngZ", "magX", "magY", "magZ"]
#df.loc[df["id"]>41115]
for var in variables:
    fig = line_chart(df, var)
    fig.show()

# Guardar CSV corregido

In [107]:
#df.drop(['active', 'breakPosition'], axis=1, inplace=True)
df.to_csv(f"{csv_name}", index=False)

### Filtro Kalman

In [72]:
sz = len(df["magY"])

print(df["magY"].iloc[-1])
print(df["magY"][1])
Q = 0.05  # (Error cuadrático medio de la predicción del modelo) Cuanto mayor es el error, 
       # más cerca está la curva del filtro del valor observado; de lo contrario, está más cerca del valor de predicción del modelo

def get_D(ob_list):
    sz = len(ob_list)
    s = 0
    for i in range(0, sz):
        s = s + ob_list[i]
    avg = s / sz
    s1 = 0
    for i in range(0, sz):
        s1 = s1 + (ob_list[i] - avg) ** 2
    s2 = np.sqrt(s1 / sz)
    return s2

x_pst = np.zeros(sz)
err_pst = np.zeros(sz)
x_pri = np.zeros(sz)
err_pri = np.zeros(sz)
K = np.zeros(sz)

D = get_D(df["magY"])  # Error cuadrático medio de observación
x_pst[0] = df["magY"].iloc[0]

err_pst[0] = 0

for cnt in range(1, sz):
    x_pri[cnt] = x_pst[cnt - 1]
    err_pri[cnt] = err_pst[cnt - 1] + Q

    K[cnt] = err_pri[cnt] / (err_pri[cnt] + D)
    x_pst[cnt] = x_pri[cnt] + K[cnt] * (df["magY"][cnt] - x_pri[cnt])
    err_pst[cnt] = (1 - K[cnt]) * err_pri[cnt]



-70.7569241632231
-71.44277480131572


In [73]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df["timestamp"],
    y=df["magY"],
    name='No filtro',
    marker_color='#00D1B1',
    hovertemplate="X = %{x}<br>Y = %{y}"
))
fig.add_trace(go.Scatter(
    x=df["timestamp"],
    y=x_pst,
    name='Filtro',
    marker_color='#0000ff',
    hovertemplate="X = %{x}<br>Y = %{y}"
))
fig.show()