# Data analysis: Raw dataset
This notebook gives a overview of the raw dataset of the smartpunch data collection.

In [None]:
import json
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [None]:
from timeseries_helpers import timeseries_plotter

In [None]:
with open('data/complete_smartpunch_dataset_7606punches.json', 'r') as f:
    ds = json.load(f)

In [None]:
def data_to_list(raws, axis='x'):
    data = []
    for e in raws:
        data.append(e[axis])
    return data

In [None]:
def data_to_list2(raws, axis='x'):
    return [e[axis] for e in raws]

In [None]:
ds[0]

In [None]:
ds[0].keys()
buff = [(i,e['label'],e['hand'],e['periodNS']) for i,e in enumerate(ds)]

In [None]:
buff[0][3]
periodSum = 0
minPeriod = 1000000000000
shortestBorder = 50000000
minIdx = 0
for i,el in enumerate(buff):
    periodSum += el[3]
    if (el[3] < minPeriod) and (el[3] > shortestBorder):
        minPeriod = el[3]
        minIdx = i
    if el[3] < shortestBorder:
        print("Found irregular short period length of {}ms at index: {}".format(el[3]/1000000,i))
periodAve = periodSum/len(buff)
periodAveMS = periodAve/1000000
sampleAve = periodAveMS/10
minPeriodMS = minPeriod/1000000
print("Ave period len: {}ms with a ave of {} with the shortest period: {}ms (at index: {})".format(periodAveMS,sampleAve,minPeriodMS,minIdx))

In [None]:
ds[5953]

In [None]:
def plot_ds_element(ds_e):
    raws = ds_e['raws']
    raws[0]
    x = data_to_list2(raws, axis='x')
    y = data_to_list2(raws, axis='y')
    z = data_to_list2(raws, axis='z')
    t = np.arange(0, len(x))

    fig, ax = plt.subplots(figsize=(20,10))
    ax.plot(t, x, label='x')
    ax.plot(t, y, label='y')
    ax.plot(t, z, label='z')

    ax.set(xlabel='datastamps', ylabel='a (in m/s²)',
           title='single dataset element of type: '+ds_e['label'])
    ax.grid()
    plt.legend()
    plt.show()
    titl = ds_e['annotator'] + '_'+ds_e['label']+'_'+ds_e['hand']+'.png';
    fig.savefig(titl)

In [None]:
def plot_ds(ds,saveImage=False,imageName="no_name"):
    x,y,z = [],[],[]
    for ds_e in ds:
        raws = ds_e['raws']
        raws[0]
        x += data_to_list2(raws, axis='x')
        y += data_to_list2(raws, axis='y')
        z += data_to_list2(raws, axis='z')
    
    t = np.arange(0, len(x))

    fig, ax = plt.subplots(figsize=(20,10))
    ax.plot(t, x, 'r.',label='x-Achse')
    ax.plot(t, y, 'b*',label='y-Achse')
    ax.plot(t, z, 'g^',label='z-Achse')

    ax.set(xlabel='Abtastwerte', ylabel='a (in m/s²)',
           title='Rohdaten eines Upper-Cuts')
    ax.grid()
    plt.legend()
    if saveImage:
        if imageName.endswith('.png'):
            plt.savefig(imageName)
        else:
            name = imageName+'.png'
            plt.savefig(name)
    plt.show()

In [None]:
plot_ds(ds[0:1],saveImage=True,imageName='accelero_punch_raw_data.png') # show only the first punch

In [None]:
ds[0]['count']

In [None]:
plot_ds_element(ds[11])

In [None]:
plot_ds_element(ds[9])

In [None]:
raws = ds[1]['raws']
raws[2]
print(raws[0])

In [None]:
def print_label_stamp_ratio():
    labels = []

    # seperate unique labels and count ratio
    for el in ds:
        if not any(el['label'] in seperatedList for seperatedList in labels):
                labels.append([el['label'],len(el['raws']),1])

        else:
                for label in labels:
                    if  label[0] == el['label']:
                        label[1] += len(el['raws'])
                        label[2] += 1
    for stamps in labels:
        stamps[1] = stamps[1]/stamps[2]
        
   # convert list to dataframe for plotting the dataframe
    df = pd.DataFrame(labels,columns=['label','datastamps avg','abs count']) 
    df.drop('abs count',axis=1,inplace =True)
    print(df)
  # plot label-stamp ratio
    ax = sns.barplot(x=df['label'],y=df['datastamps avg'],data=df)
    ax.set(xlabel='Klasse (links+rechts)', ylabel='Anzahl der durchschnittl. Abtastwerte pro Schlag')
    plt.savefig('label_stamp_ratio.png')
print_label_stamp_ratio()                    

In [None]:
def print_label_period_ratio():
    labels = []

    # seperate unique labels and count ratio
    for el in ds:
        if not any(el['label'] in seperatedList for seperatedList in labels):
                labels.append([el['label'],el['periodNS'],1])

        else:
                for label in labels:
                    if  label[0] == el['label']:
                        label[1] += el['periodNS']
                        label[2] += 1
    for period in labels:
        period[1] = period[1]/period[2]
        period[1] = period[1]/1000000
        
   # convert list to dataframe for plotting the dataframe
    df = pd.DataFrame(labels,columns=['label','period (ms) avg','abs count']) 
    df.drop('abs count',axis=1,inplace =True)
    print(df)
  # plot label-stamp ratio
    ax = sns.barplot(x=df['label'],y=df['period (ms) avg'],data=df)
    ax.set(xlabel='Klasse (links+rechts)', ylabel='Durchschnittl. Periodendauer (in ms)')
    plt.savefig('label_periodLength_ratio.png')
print_label_period_ratio()  

In [None]:
def extract_elementraws_from_dataset(ds):
    x = []
    y = []
    z = []
     
    for el in ds:
          for raw in el['raws']:
                x.append(raw['x'])
                y.append(raw['y'])
                z.append(raw['z'])
    # convert2Series
    s_x = pd.Series(data=x,name='x')
    s_y = pd.Series(data=y,name='y')
    s_z = pd.Series(data=z,name='z')
    return [s_x,s_y,s_z]
    

# Differences between left and right hand data
The following section shows the accelerometer values depending on the boxers hand. It can be seen that the types vary in their sign.

## Punch: Frontal with left hand

In [None]:
raws = extract_elementraws_from_dataset(ds[0:1])
sns.distplot(raws[0],axlabel="a (x-Achse) in m/s²")

## Punch: Frontal with right hand

In [None]:
raws = extract_elementraws_from_dataset(ds[34:35])
sns.distplot(raws[0])

# Variation between the variables x,y,z


In [None]:
raws = extract_elementraws_from_dataset(ds[1:2])
plt.figure(figsize=(10,8))
plt.subplot(1,3,1)
raws[0].plot.box(grid=1)
plt.subplot(1,3,2)
raws[1].plot.box(grid=1)
plt.subplot(1,3,3)
raws[2].plot.box(grid=1)

In [None]:
def universal_plotter_for_single_axis(list_of_datasets,
                                                                       list_of_axis_to_plot,
                                                                       list_of_dataset_legend_titles,
                                                                       plot_title, y_axis_label, x_axis_label,
                                                                       figSizeTupel = (20,10)):
    """Plots a specific axis of multiple datasets of an given array.
           Converting mechanism is made for the notation style of the smartPunch project.

    Args:
        list_of_datasets (list): List of datasets
        
        list_of_axis_to_plot (list): List of strings containig the axis to plot for the dataset of specific array index
    
        list_of_dataset_legend_titles (list): List strings containing the legend titles for each dataset axis.
        
        plot_title, y_axis_label , x_axis_label (str): Plot title, x and y plot axis labels
        
        figSizeTupel (tupel): Figure size
        
    Returns:
        void
    """   
    
    fig, ax = plt.subplots(figsize=figSizeTupel)

    idx = 0;
    for curDataSet in list_of_datasets:
        ax.plot(curDataSet['raws']['timestamp'].values,curDataSet[list_of_axis_to_plot[idx]], label=list_of_dataset_legend_titles[idx])
        idx+=1;
    
    ax.set(xlabel=x_axis_label, ylabel=y_axis_label,
           title=plot_title)
    ax.grid()
    plt.legend()
    plt.show()

In [None]:
# timeseries_plotter.universal_plotter_for_single_axis(ds[0],['x'],['Legend title'],plot_title='Titel hier',y_axis_label='y-Achsen Label',x_axis_label='x-Achsen Label')

In [None]:
universal_plotter_for_single_axis([ds[0]],['x'],['Legend title'],plot_title='Titel hier',y_axis_label='y-Achsen Label',x_axis_label='x-Achsen Label')

In [None]:
ds[0]['timestamp'].values

In [None]:
ds[0]