In [104]:
import os
import pandas as pd
import numpy as np
from numpy.random import normal

# The folllowing can be used 
os.chdir('/Users/nikkivanhandel/CS6400/mfgdb/CS6400-MfgDB/')
path = '//Users/nikkivanhandel/Downloads/NIST_24/NIST_23/XYPT Commands/'

The following data set is from the NIST Additive Manufacturing Metrology Testbed. The data comprises 250 layers in a single build. A selection of layers is used for demonstration purposes 

https://www.nist.gov/publications/process-monitoring-dataset-additive-manufacturing-metrology-testbed-ammt-overhang-part

In the original file, absolute time is not given. Rather, each row corresponds to a command given at 100 kHz; each row is 1 microsecond apart. These values are what the machine is "told" to do rather than waht it actually does. 

 While the values for true position and  files for this build are available, the records are very large and difficult to acquire. Instead, we use the programmed data and add noise to treat it as collected data. To use this for the example, we will downsample to data to 100 Hz and add normally distributed noise approximated to the machinery accuracy. 

In [107]:
# Channel names (x position, yposition, power, trigger (laser on or off))
start_time = pd.to_datetime('12-06-2024 14:00')  # Time that build starts
dwell = pd.to_timedelta(5, unit='s') # time between layers
start_id = 0
freq = 100000  # Original Data frequency
ds_rate = 1000 # Downsampling degree

layer_start  = start_time
all_data = pd.DataFrame([], columns=['RecordID', 'BuildID', 'SensorSerial', 'Value', 'Date', 'Time'])

file = os.path.join(path, f'L{i:04d}.csv')
df = pd.read_csv(file, names=['X', 'Y', 'P', 'T']) # Read data locally 
df.iloc[10000:12000, :]

Unnamed: 0,X,Y,P,T
10000,-6.0,14.588129,100.0,0.0
10001,-6.0,14.597122,100.0,2.0
10002,-6.0,14.606115,100.0,0.0
10003,-6.0,14.615108,100.0,0.0
10004,-6.0,14.624101,100.0,0.0
...,...,...,...,...
11995,3.0,16.664200,0.0,0.0
11996,3.0,16.658750,0.0,0.0
11997,3.0,16.653200,0.0,0.0
11998,3.0,16.647550,0.0,0.0


In [110]:
# Read 20 layesr
for i in range(1,20): # Per layer 

    file = os.path.join(path, f'L{i:04d}.csv')
    df = pd.read_csv(file, names=['X', 'Y', 'P', 'T']) # Read data locally 
    df.drop('T', axis=1, inplace=True) # Trigger command isn't imporant 

    # Downsample from 100,000 Hz to 50 Hz, each time incrememnt is 0.01s apart
    down = df.loc[::ds_rate].reset_index(drop=True)

    k = len(down) # Find values per layer
    # increment time
    down['Time'] = pd.date_range(start=layer_start,
                                 end=layer_start + pd.to_timedelta(k*ds_rate/freq, 
                                                                unit='s'),
                                 periods=k)
    down['Date'] = down['Time'].dt.date
    down['Time'] = down['Time'].dt.time

    # Apply noise to signals based on whats reasonable for the measurement
    down.X  = down.X + normal(0, 0.01, k)
    down.Y = down.Y + normal(0, 0.01, k)
    down.P = down.P + normal(0, 0.1, k)
    
    layer_start += dwell # Increment due to interlayer dwell 
    # Add each channel as a block of inserts
    for chan, serial in zip(['X', 'Y', 'P'], [4,5,3]):
        # RecordID	BuildID	SensorSerial	Value	Date	Time
        context = pd.DataFrame(np.arange(start_id, start_id+k), columns=['RecordID'])
        context['BuildID'] = 'B096'
        context['SensorSerial'] = serial
        channel = down.loc[:, [chan, 'Date', 'Time']] 
        channel.rename(columns={chan: "Value"}, inplace=True)
        all_data = pd.concat([all_data,pd.concat([context, channel], axis=1)], 
                   axis=0)
        start_id = start_id + k 
        

all_data.to_csv(os.path.join('0_preprocessing', 'clean_data', 'daq.csv'), header=False, index=False)