### **Notebook for cleaning velocity data**
#### Author: Hannah Nevel

##### Data needs to be in the following format to run this program:
- csv file
- column names above each set of values
- file located on local device

##### Import necessary libraries and data

In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px


##### Define needed variables

In [33]:
#Change all backslashes in file path to double back slashes
file_location = 'C:\\Users\\HannahNevel\\OneDrive - STF Technologies LLC\\Desktop\\DOE RheoSurfR\\Motor Data\\velocity_error_data_05_03_2023.csv'

#input velocity threshold value, all data values below this point will be removed
velocity_threshold = 0.7

#assign as a string the name of the first data column
first_col = 'AMP 1 Velocity (mm/min)'

#assign as a string the name of the time column
time_col = 'Time (ms)'

#assign a new filename for the cleaned data
newfile = 'velocity_data_05_03_2023_cleaned.csv'

##### Define function to open and import csv or xlsx file, create data frame, and drop missing/null values

In [34]:
def import_csv_file(filepath):
    #file_opening = open(csvfilepath)
    file = pd.read_csv(filepath)

    file_data = pd.DataFrame(file).dropna()
    return file_data

def import_xlsx_file(filepath):
    #file_opening = open(csvfilepath)
    file = pd.read_excel(filepath)

    file_data = pd.DataFrame(file).dropna()
    return file_data

imported_vel_data = import_csv_file(file_location)

#### Clean Data

##### Define function to shift time points to the first velocity value above given threshold

In [35]:
def shift_time_col(imported_data_frame, vel_thresh, vel_col_name, time_col_name):
    index_list = imported_data_frame[imported_data_frame[vel_col_name] < vel_thresh].index
    time_value = imported_data_frame[time_col_name][len(index_list)-1]

    for item in imported_data_frame[time_col_name]:
        imported_data_frame[time_col_name] = imported_data_frame[time_col_name].replace(item, item-time_value)

##### Define function to remove data points where velocity is less than defined threshold

In [36]:
def remove_vals(imported_data_frame, vel_thresh, column):
    formatted = imported_data_frame.drop(imported_data_frame[imported_data_frame[column] < vel_thresh].index, inplace=True)
    return formatted

##### Run data through cleaning functions, save as new csv and print

In [37]:
shift_time_col(imported_vel_data, velocity_threshold, first_col, time_col)
remove_vals(imported_vel_data, velocity_threshold, first_col)

print(imported_vel_data)

imported_vel_data.to_csv(newfile)

      AMP 1 Velocity (mm/min)  AMP 2 Velocity (mm/min)  Time (ms)  \
17                      0.800                    0.894        109   
18                      0.908                    1.006        219   
19                      0.908                    1.006        331   
20                      1.070                    1.030        343   
21                      1.209                    0.987        453   
...                       ...                      ...        ...   
3615                    1.149                    0.973     322583   
3616                    0.923                    0.991     322603   
3617                    0.937                    1.016     322714   
3618                    0.846                    1.000     322834   
3619                    0.846                    1.000     322953   

      Target Velocity (mm/min)  
17                           1  
18                           1  
19                           1  
20                           1  
21    

#### Data Visualization

In [38]:
#imported_vel_data.iloc[17:3603].plot(x='Time (ms)', y='AMP 1 Velocity (mm/min)', kind='scatter')