### Imputation using Kalman Filter

In [None]:
import pandas as pd
import numpy as np
from pykalman import KalmanFilter
from pandas.api.types import is_numeric_dtype


In [5]:
df = pd.read_csv("data_int_without_questionnaire.csv")

In [6]:
print(df.dtypes)

Distance (cm)                    float64
Illuminance (lx)                 float64
amplitude                        float64
frequency                        float64
Latitude (°)                     float64
Longitude (°)                    float64
Height (m)                       float64
Velocity (m/s)                   float64
Direction (°)                    float64
Horizontal Accuracy (m)          float64
Vertical Accuracy (m)            float64
Magnetic field x (µT)            float64
Magnetic field y (µT)            float64
Magnetic field z (µT)            float64
Acceleration x (m/s^2)           float64
Acceleration y (m/s^2)           float64
Acceleration z (m/s^2)           float64
Gyroscope x (rad/s)              float64
Gyroscope y (rad/s)              float64
Gyroscope z (rad/s)              float64
Pressure (hPa)                   float64
Linear Acceleration x (m/s^2)    float64
Linear Acceleration y (m/s^2)    float64
Linear Acceleration z (m/s^2)    float64
Common time (s) 

In [None]:
def one_d_kalman_filter(df):
    """basic 1d kalman filterfunction based on the function provided by the professor (sigh)"""
    for column in df.columns:
        if not is_numeric_dtype(df[column]):
            continue
        else:
            kf =  KalmanFilter(transition_matrices=[[1]], observation_matrices=[[1]]) #transition and observation matrices for computations
            masked_values = np.ma.masked_invalid(df[column].values.astype(np.float32))

            kf_params = kf.em(masked_values,n_iter=10) #optimizes Q and R (the noise estimates) and so on of the kalman filter to improve outlier detection and imputation
            imputed_data, covariances = kf_params.smooth(masked_values) #applies the filter
            df[column] = imputed_data
    return df


        

In [None]:
def multivariate_kalman_filter(df):
    """updates function from professor to multivariate kalman filter"""
    columns_to_impute = [column for column in df.columns if  is_numeric_dtype(df[column])]
    kf =KalmanFilter(
        transition_matrices = np.eye(len(columns_to_impute)), #initialize transition matrix to just next state
        observation_matrices = np.eye(len(columns_to_impute)), #initialize observation matrix to just next state
        transition_covariance =np.eye(len(columns_to_impute))*0.5, #initialize Q to moderate uncertainty about transition model
        observation_covariance=np.eye(len(columns_to_impute)) *0.5)#initialize R to moderate uncertainty about measurements 
    
    masked_df = np.ma.masked_invalid(df[columns_to_impute].values.astype(np.float32))
    kf_params = kf.em(masked_df,n_iter=5) #optimizes Q and R (the noise estimates) and so on of the kalman filter to improve outlier detection and imputation
    imputed_data, covariances = kf_params.smooth(masked_df) #applies the filter
    for i, column in enumerate(columns_to_impute):
        df[column] = imputed_data[:,i]
    return df
        