# This notebook is designed to read in the raw .cvs files produced by the laser triangulation sensor used to measure displancemt of the pendulum thrust stand deployed in the university of Southampon's main vacuum chamber.

In [84]:
"""
Created on Sun Aug 23 12:36:10 2020
@author: Thomas Munro-O'Brien
"""
#import useful libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import glob
from scipy.misc import derivative as deriv
from scipy import signal

# Hopefully, the only input needed from you for this code to work, is to the "Path" name, and "save_as", then run through the rest of the code and the output will be saved.

The 'Path' should be the directory path used to store the test data and/or the previous years data. The 'save_as' will be the name of the output .csv file which will be saved in the same location as the python code.

*It is important to use forwardslashes instead as backslashes as a backslash is a speciel charactor for python. Alternatively two backslashes can be used.*

The asterix is known as a wild card charactor in python so all files in the format of 'anything'.csv will be selected. If the files are spread over multiple folders "/ * / * .csv" can be used. 

If a file with the same name as the save_as already exists, it will be overwritten.

If you trying to run this on mac the syntax of file paths might be different and could lead to some issues.

In [85]:
Path = 'C:/Users/tfmo1\\OneDrive - University of Southampton/Documents/Work Soton/Calibartion data 16-06-2021/*/*.csv'
save_as = 'Calibration data 16-06-2021'
#For lab 7 and 8 the data is placed into sub folders, this can be pathed with an additional /* before the .csv

In [86]:
file_name_list = [] #List used to store file names for later use
y_diff_list = [] #List used to store output before being written to .csv file
test_name = []

Time to define the function that will read the data within the file.

In [94]:
def get_Y_dif(file_name_to_test):
    df = pd.read_csv(file_name_to_test,skiprows=[0,1,2,3,4], delimiter=",", header=None, usecols=[0,1], names=['Time Stamp', 'Distance [mm]'])
    
    """Depending on if the data is new or from past years, that used a different laser, the files are formatted differently.
    The try options attempts to read it as the new laser but if fails, uses the old laser format. 'fc' is a value that 
    you might want to change if the code is not filtering the data well. """
    
    X = df['Time Stamp']
    Y = df['Distance [mm]']
    
    fs = 5000 # Sampling frequency
    #If data reduction was used the sampling frequency will need to be changed
    fc = 0.750 # Cut-off frequency of the filter

        
    t = np.arange(0,Y.size,1)# Generate the time vector 
    w = fc / (fs / 2) # Normalize the frequency
    
    b, a = signal.butter(4, w, 'low') #Applies a Butterworths low pass filter
    
    output = signal.filtfilt(b, a, Y)
    
    output_dif = np.diff(output) #Differentiates the filtered signal
    
    Title = file_name_to_test.split('/')[-1] #Makes a title
    
    xy = np.array([t,output])
    xy_dif = np.array([t[:-1],output_dif])

    """Due to the way the Butterworth signal filter works it is benifical to skip the begining
    and ends of the data whilst analysing."""
    
    skip_left = int(round((0.1*output_dif.size),0)) #Skips the first 10% of the output
    
    skip_right = output_dif.size - int(round((0.1*output_dif.size),0)) #Skips the last 10% of the output
    
    max_index = np.argmax(output_dif[skip_left:skip_right]) + skip_left #Finds the index of the point of max rate of change
    a,b = xy[:,:max_index],xy[:,max_index:] #Splits the output about the point of max rate of change
    
    """It is also good to remove the transitional period from the data before averaging, as such, the last 10% of the
    left, and first 10% of the right is skipped."""
    
    a_skip = int(len(a[1,:]) * 0.25) #Skips the last 10% of the left partition
    b_skip = int(len(b[1,:]) * 0.25) #Skips the first 10% of the right partition

    a_range = np.arange(a_skip,int((len(a[1,:])-a_skip))-1,1)
    b_range = np.arange(b_skip,int((len(b[1,:])-b_skip))-1,1)

    a_sum = 0
    b_sum = 0

    for i in a_range:
        a_sum += a[1,i]

    for i in b_range:
        b_sum += b[1,i]

    mean_a = a_sum/len(a_range) #The mean y_dis of the left partition
    mean_b = b_sum/len(b_range) #The mean y_dis of the right partition

    Y_dif = mean_b - mean_a 

    save_fig = True #Can be changed if you do not want to save the figures

    plt.title(Title + " :" + str(round(Y_dif,5)) + "mm")
    plt.plot(t,output)
    plt.plot([a[0,a_skip],a[0,(len(a[1,:])-a_skip)]],[mean_a,mean_a])
    plt.plot([b[0,b_skip],b[0,(len(b[1,:])-b_skip)]],[mean_b,mean_b])
    if save_fig == True:
        plt.savefig("{}.png".format(file_name_to_test))
    plt.show()
    return(abs(Y_dif))

Now to direct python to go to where the files are saved and to iterate over each .csv file its finds

In [95]:
for file_name in glob.iglob(Path):
    file_name_list.append(file_name)

In [96]:
for i in file_name_list:
    y_diff = get_Y_dif(i)
    list_name = i.split('\\')
    name =  list_name[-1] +" "+ list_name[-2]
    test_name.append(name)
    print(name,y_diff)
    y_diff_list.append(y_diff)

FileNotFoundError: [Errno 2] No such file or directory: 'D:/Calibartion data 16-06-2021\\Pyramid run\\protocol_optoNCDT ILD1750_2021-06-16_15-48-37.250.csv'

To save the output data the file names and y_diff values are compiled into a dictionary and then into a panda dataframe. Once in the form of a dataframe, it can be easily save as a .csv file. 

In [90]:
dic = {"Y_dif (mm)":y_diff_list,"File_name":test_name}
df = pd.DataFrame(dic)
df.to_csv("{}.csv".format(save_as),index=False)

Now a file that contains the y_diff values in one column and the file name in the next.

If the dataframe is empty, the most likely issue is that path is incorrect.

If the raw data was saved under file name in the formate "00 m_a 00 m_c 00 voltage 00 current .csv" it can be easily decoded. By opening the outputed .csv in MS Excel and then using the "Text to Columns" function found on the data tab, the file name column can be turned into individual columns by selecting the correct deliminator, in the case suggested above the deliminator is space.