In [None]:
import numpy as np
from scipy.signal import argrelextrema
from scipy.signal import find_peaks,detrend
import csv
import math
import matplotlib.pyplot as plt
from matplotlib.offsetbox import AnchoredText
import glob
from scipy.signal import kaiserord, lfilter, firwin
import os
import shutil
from random import shuffle
from math import floor
import pandas as pd
from pprint import pprint

In [None]:
def get_data(file_name):
    actual_steps=pd.read_csv(file_name,skiprows=2,nrows=1).values[0][1]
    new_df = pd.read_csv(file_name,skiprows=5)
    x_arr=np.array(new_df["ACC X"])
    y_arr=np.array(new_df["ACC Y"])
    z_arr=np.array(new_df["ACC Z"])
    mag_arr = np.sqrt(x_arr**2 + y_arr**2 + z_arr**2)              
    mean_mag_arr=mag_arr-np.mean(mag_arr)
    new_df['Mean Magnitude']=mean_mag_arr
    new_df['Time']=new_df["Time [sec]"]-new_df["Time [sec]"].iloc[[0]].values[0] 
    t_arr=np.array(new_df["Time"]) 
    
    sample_rate = None
    for i in range(len(t_arr)):
        if t_arr[i]-t_arr[0]>1.0:
            sample_rate=i
            break
    duration=new_df['Time'].iloc[[-1]].values[0]
    data.append([file_name,duration,sample_rate,actual_steps])       

In [None]:
def get_file_list_from_dir(datadir):
    all_files=glob.glob(datadir+"/*.csv") 
    return all_files

def randomize_files(file_list):
    shuffle(file_list)
    
def get_training_and_testing_sets(file_list):
    split = 0.7
    split_index = floor(len(file_list) * split)
    training = file_list[:split_index]
    testing = file_list[split_index:]
    return training, testing

def fourier_extrapolation(x, n_harm):
    n = x.size
    t = np.arange(0, n)
    p = np.polyfit(t, x, 1)  # find linear trend in x
    x_notrend = x - p[0] * t  # detrended x
    x_freqdom = np.fft.fft(x_notrend)  # detrended x in frequency domain
    f = np.fft.fftfreq(n)  # frequencies
    indexes = list(range(n))
    # sort indexes by frequency, lower -> higher
    indexes.sort(key=lambda i: np.absolute(f[i]))

    t = np.arange(0, n)
    restored_sig = np.zeros(t.size)
    for i in indexes[:1 + n_harm * 2]:
        ampli = np.absolute(x_freqdom[i]) / n  # amplitude
        phase = np.angle(x_freqdom[i])  # phase
        restored_sig += ampli * np.cos(2 * np.pi * f[i] * t + phase)
    return restored_sig + p[0] * t

def get_mag_mean_filter_kernel5(file_name,mode,k_val=None,isPrint=False):
    actual_steps=pd.read_csv(file_name,skiprows=2,nrows=1).values[0][1]
    new_df = pd.read_csv(file_name,skiprows=5)
    x_arr=np.array(new_df["ACC X"])
    y_arr=np.array(new_df["ACC Y"])
    z_arr=np.array(new_df["ACC Z"])
    mag_arr = np.sqrt(x_arr**2 + y_arr**2 + z_arr**2)              
    mean_mag_arr=mag_arr-np.mean(mag_arr)
    new_df['Mean Magnitude']=mean_mag_arr
    new_df['Time']=new_df["Time [sec]"]-new_df["Time [sec]"].iloc[[0]].values[0] 
    t_arr=np.array(new_df["Time"])   
   
    
    # Convolution
    filter_window_size=7
    if mode == "running":
        filter_window_size=5
    mag_mean_filter_kernel5 = np.convolve(mean_mag_arr, 
                           np.ones((filter_window_size,))/filter_window_size, 
                           mode='valid')


    
    sample_rate = None
    for i in range(len(t_arr)):
        if t_arr[i]-t_arr[0]>1.0:
            sample_rate=i
            break    
    signal=mag_mean_filter_kernel5
    if sample_rate<10.0:
        sample_rate=10.0

    nsamples = len(mag_mean_filter_kernel5)
    t = np.arange(nsamples) / sample_rate

    # The Nyquist rate of the signal acc to Nyquist Theorem.
    nyq_rate = sample_rate / 2.0
    width = 5.0/nyq_rate
    if mode=="walking":
        ripple_db = 50.0
        N, beta = kaiserord(ripple_db, width)
        if sample_rate<15.0:
            cutoff_hz=4.5
        else:
            cutoff_hz=3.0
# FIR filter        
    if mode=="running":
        ripple_db = 50.0
        N, beta = kaiserord(ripple_db, width)
        if sample_rate<15.0:
            cutoff_hz=4.5
        else:
            cutoff_hz=4.0
    taps = firwin(N, cutoff_hz/nyq_rate, window=('kaiser', beta))

    signal = lfilter(taps, 1.0, mag_mean_filter_kernel5)
        
    ax=None
    
    if isPrint:
            #  Plotting
        fig = plt.figure(figsize=(25,5))
        ax = fig.subplots()
        ax.plot(mean_mag_arr,label = 'Mean Magnutude Signal',linestyle='-.',color='gray',alpha=0.4)
        ax.plot(mag_mean_filter_kernel5,label = 'Signal after Convo',linestyle='--',color='orange')

        ax.plot(signal,label = 'Signal after FIR Filter',color='green')
    
    return signal,actual_steps,sample_rate,k_val,ax

def get_all_maxima(data):
    # Returns all posible maxima of the signal
    peaks=[]
    n=len(data)-1
    i=1
    while i< n:
        if data[i - 1] < data[i]:
            i_ahead = i + 1
            while i_ahead < n and data[i_ahead] == data[i]:
                i_ahead += 1
            if data[i_ahead] < data[i]:
                peaks.append( (i + i_ahead - 1) // 2)
                i = i_ahead
        i += 1

    return np.array(peaks,dtype=np.intp)

def get_peaks_by_threshold(data, maxima_pos, threshold):
    # Returns maxima of the signal filtered by threshold
    data=np.array(data)
    maxima_pos=np.array(maxima_pos)
    count=0
    peaks=[]
    peaks_pos=[]
    for i in range(1,len(data)-1):
        if i in maxima_pos:
            left=data[i-1]
            right=data[i+1]
            peak=data[i]
            min_val=min(peak-right,peak-left)
            if min_val>=threshold:
                peaks.append(peak)
                peaks_pos.append(i)
                count+=1
    return np.array(peaks_pos),np.array(peaks)

def calc_threshold(data,actual_steps):
    min_diff=None
    best_thresh=None
    calc_peaks=None
    for threshold in np.arange(0,0.1,0.001):
        maxima_pos = get_all_maxima(data)
        peaks_pos,peaks = get_peaks_by_threshold(data, maxima_pos, threshold)
        diff=abs(actual_steps-len(peaks))
        if min_diff is None or min_diff>diff:
            min_diff=diff
            best_thresh=threshold
            calc_peaks=len(peaks)
    return best_thresh


def set_threshold_dict(sample_rate,threshold_dict,data,actual_steps):
    if sample_rate is None or sample_rate<=5:
        threshold_dict[5].append(calc_threshold(data,actual_steps))
    elif sample_rate<=10:
        threshold_dict[10].append(calc_threshold(data,actual_steps))
    elif sample_rate<=20:
        threshold_dict[20].append(calc_threshold(data,actual_steps))
    elif sample_rate<=30:
        threshold_dict[30].append(calc_threshold(data,actual_steps))
    elif sample_rate<=40:
        threshold_dict[40].append(calc_threshold(data,actual_steps))
    elif sample_rate<=50:
        threshold_dict[50].append(calc_threshold(data,actual_steps))
    elif sample_rate<=60:
        threshold_dict[60].append(calc_threshold(data,actual_steps))
    elif sample_rate<=70:
        threshold_dict[70].append(calc_threshold(data,actual_steps))
    elif sample_rate<=80:
        threshold_dict[80].append(calc_threshold(data,actual_steps))
    elif sample_rate<=90:
        threshold_dict[90].append(calc_threshold(data,actual_steps))
    else:
        threshold_dict[100].append(calc_threshold(data,actual_steps))
        
def get_thershold(threshold_dict,sample_rate):
    if sample_rate is None or sample_rate<=5:
        return threshold_dict[5]
    elif sample_rate<=10:
        return threshold_dict[10]
    elif sample_rate<=20:
        return threshold_dict[20]
    elif sample_rate<=30:
        return threshold_dict[30]
    elif sample_rate<=40:
        return threshold_dict[40]
    elif sample_rate<=50:
        return threshold_dict[50]
    elif sample_rate<=60:
        return threshold_dict[60]
    elif sample_rate<=70:
        return threshold_dict[70]
    elif sample_rate<=80:
        return threshold_dict[80]
    elif sample_rate<=90:
        return threshold_dict[90]
    else:
        return threshold_dict[100]


        
def check_algo(mode, getSignal,train,test):    
    threshold_dict={5:[],10:[],15:[],20:[],25:[],30:[],35:[],40:[],50:[],60:[],70:[],80:[],90:[],100:[]}
    for file in train:
        data,actual_steps,sample_rate,k_val,_=getSignal(file,mode,None,False)
        plt.close('all')
        set_threshold_dict(sample_rate,threshold_dict,data,actual_steps)
    optimal_threshold_dict={}
    for key,val in threshold_dict.items():
        if len(threshold_dict[key])!=0:
            optimal_thresh=np.array(threshold_dict[key]).mean()
            optimal_threshold_dict[key]=optimal_thresh
        else:
            optimal_threshold_dict[key]=0
        
    print(optimal_threshold_dict)
    
    tot_diff=0
    all_real_steps=0
    all_calc_steps=0
    
    for file in test:
        data,actual_steps,sample_rate,k_val,ax=getSignal(file,mode,k_val,True)
        maxima_pos = get_all_maxima(data)
        threshold=get_thershold(optimal_threshold_dict,sample_rate)
        peaks_pos,peaks = get_peaks_by_threshold(data, maxima_pos, threshold)
        diff=abs(actual_steps-len(peaks))
        #adding text inside the plot
        anchored_text = AnchoredText(file+" sample rate is: "+str(sample_rate)+
                                    "\n COUNT OF ACTUAL STEPS: "+str(actual_steps)+
                                     "\n COUNT OF CALCULATED STEPS: "+str(len(peaks))+
                                     "\n Difference: "+str(diff), loc=2)
        ax.add_artist(anchored_text)

        
        ax.scatter(peaks_pos,peaks, color = 'r', s = 15, marker = 'D', label = 'Maxima')
        ax.legend(loc=1)
        ax.grid()
        plt.savefig(file+".png")
        plt.show()
        all_real_steps+=actual_steps
        all_calc_steps+=len(peaks)
        tot_diff+=diff
        print("-"*50)
 
    print("Avarage Difference: ",(tot_diff/len(test)))
    print("Difference in %",(tot_diff*100/all_real_steps))
    print("All Actual Steps: ",all_real_steps)
    print("Tot Difference: ",tot_diff)
    
    print("-"*50)


## Count Walking Steps: Data Analyzing

In [None]:
files=get_file_list_from_dir("data/walk_all")
files.sort()
data=[]
for file in files:
    get_data(file)
    
df=pd.DataFrame(data,columns=["Name","Duration","SR","Steps"])
df['Name'] = df['Name'].str.replace("data/walk_all","")
df['Name'] = df['Name'].str.replace("\\","")
# df['Name'] = df['Name'].str.replace(".csv","")
df.sort_values(by=['Duration',"Steps"])


In [None]:
plt.rcParams["figure.figsize"] = (5,5)
df.boxplot(column=['Steps'],showmeans=True)  
plt.show()

In [None]:
# Set the figure size
plt.rcParams["figure.figsize"] = (20,20)
plt.rcParams["figure.autolayout"] = True

col = np.where(df['Duration']<50,'r',np.where(df["Steps"]<95,'b','k'))
# Scatter plot
ax = df.plot.scatter(x='Duration', y='Steps', alpha=0.7,s=50,c=col)

# Annotate each data point
for i, txt in enumerate(df["Name"]):
    ax.annotate(txt, (df['Duration'].iat[i]+0.05, df["Steps"].iat[i]),xytext=(10,-5),textcoords='offset points',
                family='sans-serif', fontsize=20, color='darkslategrey')
plt.xticks(np.arange(20, 100, 5))
plt.yticks(np.arange(65, 140, 5))
plt.grid()
plt.show()

In [None]:
num_bins = 50   
fig = plt.figure(figsize=(10,5))
plt.hist(df["SR"], num_bins, color ='green',alpha = 0.5)

plt.title('Historgtam of Data Records by Sample Rate ',
          fontweight ="bold")

plt.xticks(np.arange(0, 40, 2))
plt.yticks(np.arange(0, 12, 2))
    
plt.xlabel('Sample Rate in hz')  
plt.ylabel('Number of Data Records')
plt.show()

In [None]:
num_bins = 50  
fig = plt.figure(figsize=(10,5))
plt.hist(df["Duration"], num_bins, color ='blue',alpha = 0.5)

plt.title('Historgtam of Data Records by Duration ',
          fontweight ="bold")

plt.xticks(np.arange(20, 105, 5))
plt.yticks(np.arange(0, 30, 2))
    
plt.xlabel('Duration in sec')  
plt.ylabel('Number of Data Records')
plt.show()

In [None]:
num_bins = 50  
fig = plt.figure(figsize=(10,5))
plt.hist(df["Steps"], num_bins, color ='gold',alpha = 0.5)

plt.title('Historgtam of Data Records by Steps Counted ',
          fontweight ="bold")

plt.xticks(np.arange(60, 150, 10))
plt.yticks(np.arange(0, 10, 2))
    
plt.xlabel('Number of Steps Counted')  
plt.ylabel('Number of Data Records')
plt.show()

In [None]:
# Data with low Sample Rate
low_sr_df=df.sort_values(by=['SR'])
low_sr_df=low_sr_df[low_sr_df['SR'] <=5]
low_sr_list=list(low_sr_df["Name"])
low_sr_df

In [None]:
# DATA THAT WAS RECORDED FOR LESS THEN A MINUTE
low_dur_df = df[df['Duration'] <=50]
low_dur_list=list(low_dur_df["Name"])
low_dur_df

In [None]:
# Move data with low Sample Rate of Duration less then a minute to another directory.
unused=low_dur_list+low_sr_list
destination = "data/walk_all/unused"
if not os.path.exists(destination):
    os.makedirs(destination)
for file_name in unused:
    try:
        shutil.move(os.path.join("data/walk_all", file_name), destination)
    except:
        pass
pprint(unused)

In [None]:
files=get_file_list_from_dir("data/walk_all")
files.sort()
data=[]
for file in files:
    get_data(file)
    
df=pd.DataFrame(data,columns=["Name","Duration","SR","Steps"])
df['Name'] = df['Name'].str.replace("data/walk_all","")
df['Name'] = df['Name'].str.replace("\\","")
df['Name'] = df['Name'].str.replace(".csv","")
df.sort_values(by=['Duration',"Steps"])

## Count Walking Steps: Steps Counting Algorithm

In [None]:
files=get_file_list_from_dir("data/walk_all")
randomize_files(files)
train,test=get_training_and_testing_sets(files)
train.sort()
test.sort()
print(len(test),len(train))

In [None]:
check_algo("walking",get_mag_mean_filter_kernel5,train,test)

## Count Running Steps: Data Analyzing

In [None]:
files=get_file_list_from_dir("data/run_all")
files.sort()
data=[]
for file in files:
    get_data(file)
    
df=pd.DataFrame(data,columns=["Name","Duration","SR","Steps"])
df['Name'] = df['Name'].str.replace("data/run_all","")
df['Name'] = df['Name'].str.replace("\\","")
# df['Name'] = df['Name'].str.replace(".csv","")
df.sort_values(by=['Duration',"Steps"])


In [None]:
plt.rcParams["figure.figsize"] = (5,5)
df.boxplot(column=['Steps'],showmeans=True)  
plt.show()

In [None]:
# Set the figure size
plt.rcParams["figure.figsize"] = (20,20)
plt.rcParams["figure.autolayout"] = True

col = np.where(df['Duration']<50,'r',np.where(df["Steps"]<139,'b','k'))
# Scatter plot
ax = df.plot.scatter(x='Duration', y='Steps', alpha=0.7,s=50,c=col)

# Annotate each data point
for i, txt in enumerate(df["Name"]):
#         ax.annotate(k, v,
#                 xytext=(10,-5), textcoords='offset points',
#                 family='sans-serif', fontsize=18, color='darkslategrey')
    ax.annotate(txt, (df['Duration'].iat[i]+0.05, df["Steps"].iat[i]),xytext=(10,-5),textcoords='offset points',
                family='sans-serif', fontsize=20, color='darkslategrey')
plt.xticks(np.arange(0, 150, 5))
plt.yticks(np.arange(65, 200, 5))
plt.grid()
plt.show()

In [None]:
num_bins = 50   
fig = plt.figure(figsize=(10,5))
plt.hist(df["SR"], num_bins, color ='green',alpha = 0.5)

plt.title('Historgtam of Data Records by Sample Rate ',
          fontweight ="bold")

plt.xticks(np.arange(0, 40, 2))
plt.yticks(np.arange(0, 12, 2))
    
plt.xlabel('Sample Rate in hz')  
plt.ylabel('Number of Data Records')
plt.show()

In [None]:
num_bins = 50  
fig = plt.figure(figsize=(10,5))
plt.hist(df["Duration"], num_bins, color ='blue',alpha = 0.5)

plt.title('Historgtam of Data Records by Duration ',
          fontweight ="bold")

plt.xticks(np.arange(20, 160, 5))
plt.yticks(np.arange(0, 30, 2))
    
plt.xlabel('Duration in sec')  
plt.ylabel('Number of Data Records')
plt.show()

In [None]:
num_bins = 50  
fig = plt.figure(figsize=(10,5))
plt.hist(df["Steps"], num_bins, color ='gold',alpha = 0.5)

plt.title('Historgtam of Data Records by Steps Counted ',
          fontweight ="bold")

plt.xticks(np.arange(60, 200, 10))
plt.yticks(np.arange(0, 10, 2))
    
plt.xlabel('Number of Steps Counted')  
plt.ylabel('Number of Data Records')
plt.show()

In [None]:
# Data with low Sample Rate
low_sr_df=df.sort_values(by=['SR'])
low_sr_df=low_sr_df[low_sr_df['SR'] <=5]
low_sr_list=list(low_sr_df["Name"])
low_sr_df

In [None]:
# DATA THAT WAS RECORDED FOR LESS THEN A MINUTE
low_dur_df = df[df['Duration'] <=50]
low_dur_list=list(low_dur_df["Name"])
low_dur_df

In [None]:
# Move data with low Sample Rate of Duration less then a minute to another directory.
unused=low_dur_list+low_sr_list
destination = "data/run_all/unused"
if not os.path.exists(destination):
    os.makedirs(destination)
for file_name in unused:
    try:
        shutil.move(os.path.join("data/run_all", file_name), destination)
    except:
        pass
pprint(unused)

In [None]:
files=get_file_list_from_dir("data/run_all")
files.sort()
data=[]
for file in files:
    get_data(file)
    
df=pd.DataFrame(data,columns=["Name","Duration","SR","Steps"])
df['Name'] = df['Name'].str.replace("data/run_all","")
df['Name'] = df['Name'].str.replace("\\","")
df['Name'] = df['Name'].str.replace(".csv","")
df.sort_values(by=['Duration',"Steps"]).head()

## Count Running Steps: Steps Counting Algorithm

In [None]:
files=get_file_list_from_dir("data/run_all")
randomize_files(files)
train,test=get_training_and_testing_sets(files)
train.sort()
test.sort()
print(len(test),len(train))

In [None]:
check_algo("running",get_mag_mean_filter_kernel5,train,test)