In [290]:
import altair as alt
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from pathlib import Path

In [291]:
def read_experiment_data(exp_folder: Path):
    data = []
    files = sorted(list(map(lambda x: (int(x.name.split('_')[1].split('.')[0]), x), list(exp_folder.glob('*.csv')))))

    for dist, file in files:
        df = pd.read_csv(file, header=None)
        for _, (t, v, us) in df.iterrows():
            data.append((t, dist, 1/v, us))
    df = pd.DataFrame(data, columns=["t", "dist", "IR", 'Ultrasound'])
    return df
            
df_1 = read_experiment_data(Path('exp_1/'))
df_2 = read_experiment_data(Path('exp_2/'))
df = pd.concat([df_1, df_2])

In [292]:
def visualize_results(df, sensor):
    return (alt.Chart(df).encode(
        x=alt.X('dist:Q', title='Distance, cm'),
        y=alt.Y(f'min({sensor}):Q', scale=alt.Scale(zero=False), title='Boxplot, mV' if sensor == 'IR' else 'Boxplot, cm'),
        y2=f'max({sensor}):Q',
        tooltip=['dist', f'min({sensor})', f'max({sensor})']
    ).mark_rule() + alt.Chart(df).encode(
        x='dist:Q',
        y=f'median({sensor}):Q',
        tooltip=['dist', f'min({sensor})', f'max({sensor})']
    ).mark_point(size=1)).properties(
        title=f'Correspondence between distance and {sensor} measurements'
    )

In [293]:
(
    (visualize_results(df_1, 'IR') | visualize_results(df_2, 'IR'))
    &
    (visualize_results(df_1, 'Ultrasound') | visualize_results(df_2, 'Ultrasound'))
)

  for col_name, dtype in df.dtypes.iteritems():


For infrared sensor, the fitting can be done. Ultrasound can compared exactly

In [294]:
dist = df_1['dist'].values
mv = df_1['IR'].values

a,b = np.polyfit(np.log(dist), np.log(mv), deg=1)

pred_mv = np.exp(b + a*np.log(dist))

pred_dist = np.exp((np.log(mv) - b)/a)

int_df = pd.DataFrame({'dist_orig': dist, 'mv_orig': mv, 'mv_fitted': pred_mv, 'dist_pred': pred_dist}).reset_index()
int_df_mv = pd.wide_to_long(int_df, stubnames='mv', i=['index', 'dist_orig', 'dist_pred'], j='mV_type', sep='_', suffix=r'\w+').reset_index()

int_df['err'] = int_df['dist_orig'] - int_df['dist_pred']
group = int_df.groupby('dist_orig')
stats_df = pd.DataFrame({"min_err": group.min()['err'], 
                         "max_err": group.max()['err'],  
                         "mean_err": group.mean()['err'], 
                         "median_err": group.quantile(0.5)['err'],
                         "quantile_25_err": group.quantile(0.25)['err'],
                         "quantile_75_err": group.quantile(0.75)['err']
                        }).reset_index()

base =  alt.Chart(stats_df).encode(
        x=alt.X('dist_orig', title='Distance, cm'),
)

(
    alt.Chart(int_df_mv).encode(
        x=alt.X('dist_orig', title='Distance, cm'),
        y=alt.Y('mean(mv):Q', scale=alt.Scale(zero=False), title='mV'),
        color=alt.Color('mV_type', legend=alt.Legend(orient='top-left'))
    ).mark_line().properties(title='Comparison between IR values and exponential fitting') |
    
    (
        base.encode(
            y=alt.Y('min_err:Q', scale=alt.Scale(zero=False), title='Error, cm'),
            y2=alt.Y2('max_err:Q'),
        ).mark_rule() + 
        base.encode(
            y=alt.Y('quantile_25_err:Q', scale=alt.Scale(zero=False), title='Error, cm'),
            y2=alt.Y2('quantile_75_err:Q',),
        ).mark_rule(strokeWidth=3, color='blue') +
        base.encode(
            y=alt.Y('median_err:Q', scale=alt.Scale(zero=False), title='Error, cm'),
        ).mark_point()
    ).properties(title='Error rate of distance predicted by IR voltage')
)

  for col_name, dtype in df.dtypes.iteritems():


In [342]:
from sklearn.linear_model import LinearRegression

# Using median filter
int_df = df_2.groupby('dist').median().reset_index()

train_df = df_2[(df_2.dist >= 20) & (df_2.dist <= 40)]
train_df['diff'] = train_df['Ultrasound'] - train_df['dist']

model = LinearRegression()
model.fit(train_df['dist'].values.reshape(-1, 1), train_df['diff'].values)

diff_pred = model.predict(int_df['dist'].values.reshape(-1, 1))
int_df['Ultrasound_calibrated'] =  int_df['Ultrasound'] - diff_pred 
int_df['err'] = int_df['dist'] - int_df['Ultrasound']

def visualize_error(int_df, title=''):
    group = int_df.groupby('dist')
    stats_df = pd.DataFrame({"min_err": group.min()['err'], 
                             "max_err": group.max()['err'],  
                             "mean_err": group.mean()['err'], 
                             "median_err": group.quantile(0.5)['err'],
                             "quantile_25_err": group.quantile(0.25)['err'],
                             "quantile_75_err": group.quantile(0.75)['err']
                            }).reset_index()

    base =  alt.Chart(stats_df).encode(
            x=alt.X('dist', title='Distance, cm'),
            tooltip='dist'
    )

    return (
        base.encode(
            y=alt.Y('min_err:Q', scale=alt.Scale(zero=False), title='Error, cm'),
            y2=alt.Y2('max_err:Q'),
        ).mark_rule() + 
        base.encode(
            y=alt.Y('quantile_25_err:Q', scale=alt.Scale(zero=False), title='Error, cm'),
            y2=alt.Y2('quantile_75_err:Q',),
        ).mark_rule(strokeWidth=3, color='blue') +
        base.encode(
            y=alt.Y('median_err:Q', scale=alt.Scale(zero=False), title='Error, cm'),
        ).mark_point()
    ).properties(title=title)

int_df_calibrated = int_df.copy()
int_df_calibrated['err'] = int_df['dist'] - int_df['Ultrasound_calibrated']

visualize_error(int_df, 'Error for Ultrasound distance estimation after Median Filter') | visualize_error(int_df_calibrated, title='Calibrated for temperature effects by fitting Linear Regression')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['diff'] = train_df['Ultrasound'] - train_df['dist']
  for col_name, dtype in df.dtypes.iteritems():
