In [1]:
import numpy as np
import pandas as pd
import altair as alt
import sys
cur_dir = os.getcwd()
SRC_PATH = cur_dir[
    : cur_dir.index("arm_balance") + len("arm_balance")
]
if SRC_PATH not in sys.path:
    sys.path.append(SRC_PATH)
    
from src.utils.utils import *
alt.data_transformers.enable('data_server')
SAMPLE_DATA_PATH = '../../data/raw_data/Cu.csv'
sample_data_frame = pd.read_csv(SAMPLE_DATA_PATH) 

In [2]:
sample_data_frame.columns = ['','joint 1','joint 2','joint 3','joint 4','joint 5','joint 6','y']

In [3]:
hist = alt.Chart(sample_data_frame).mark_bar().encode(
     alt.X(alt.repeat(), type='quantitative', bin=alt.Bin(maxbins=60)),
     y='count()',
).properties(
    width=300,
    height=200
).repeat(
    ['joint 1','joint 2','joint 3','joint 4','joint 5','joint 6'],
    columns=3
)
save_chart(hist, '../../report/assets/joints.png')
hist

In [4]:
from scipy.stats import normaltest
joints = ['joint 1','joint 2','joint 3','joint 4','joint 5','joint 6']
test_stats = [normaltest(sample_data_frame[j].tolist()) for j in joints]

In [5]:
test_stats

[NormaltestResult(statistic=50.98775561111557, pvalue=8.475192021133844e-12),
 NormaltestResult(statistic=5056.619171930607, pvalue=0.0),
 NormaltestResult(statistic=1833.2597834674914, pvalue=0.0),
 NormaltestResult(statistic=8.016749538935988, pvalue=0.018162890145910007),
 NormaltestResult(statistic=1328.4076951615998, pvalue=3.466842698069377e-289),
 NormaltestResult(statistic=25.7409689181122, pvalue=2.5728805270846933e-06)]

Given the test statistics, we can conclude with 95% confidence that the null hypothesis "joint torques comes from a normal distribution" can be rejected.

In [6]:
stdevs = []
DATA_DIR = '../../data/raw_data/'
for f in os.listdir(DATA_DIR):
    stdevs.append((np.std(pd.read_csv(DATA_DIR+f), axis=0).to_numpy().tolist()+[pd.read_csv(DATA_DIR+f)['y'][1]]))

In [7]:
std_plot_df = pd.DataFrame(stdevs).drop(columns = [0]).drop(columns = [7]).melt(id_vars=8)
std_plot_df.columns = ['y', 'variable', 'value']
std_plot_df['variable'] = std_plot_df['variable'].apply(str)
std_plot = alt.Chart(std_plot_df).mark_point(opacity=0.5).encode(
     alt.X('y', title ='Weight (g)'),
     alt.Y('value', title = 'Standard Deviation'),
     color = 'variable'
).properties(
    title = 'Standard Deviation In Relationship to Weight'
)

plot = std_plot+ std_plot.transform_regression('y', 'value',groupby=['variable']).mark_line(size=3)
save_chart(plot,'../../report/assets/std_to_weight.png')
plot

Below is the standard deviation in relation to the data size. We see that the standard deviation stablizes near 1000 data points, which motivated us to take groups of 1000 torques as one estimate

In [8]:
import random
import matplotlib.pyplot as plt
stdevs = []
joint_2 = sample_data_frame['joint 2'].tolist()
lims = range(len(joint_2))
sampled_data = []
for i in lims:
    sampled_data.append(joint_2[i])
    stdevs.append(np.std(sampled_data))
source = pd.DataFrame({'Sample size' : lims,
                       'Standard deviation (Nm)': stdevs})
# plt.plot(lims, stdevs)
# plt.xlabel('Sample size')
# plt.ylabel('Standard deviation (Nm)')
# plt.show()


In [9]:
single_std = alt.Chart(source).mark_line(size=1.5).encode(
     alt.X('Sample size', title ='Sample size'),
     alt.Y('Standard deviation (Nm)', title = 'Standard deviation (Nm)')
).properties(
    title = 'Standard Deviation In Relationship to Sample Size'
)
save_chart(single_std,'../../report/assets/std_sample.png')
single_std

Thus, we process the data using averages of groups of 1000

In [10]:

files = os.listdir(DATA_DIR)
np.random.seed(123)
np.random.shuffle(files)
df = None
group_by_count = 1000 # has to be 10000-divisble 

for data in files:
    content = pd.read_csv(os.path.join(DATA_DIR, data))
    for i in range(len(content)//group_by_count):
        grouped_content = content[i*group_by_count: (i+1)*group_by_count]
        grouped_content = pd.DataFrame(pd.DataFrame(np.average(grouped_content,      axis=0).reshape(1,8), 
                                       columns=content.columns), 
                                       columns=content.columns)
        df = pd.concat([df, grouped_content])
df = df.iloc[: , 1:].reset_index().iloc[:,1:]
df.to_csv('../../data/grouped_data/grouped_data.csv', index=False)