In [31]:
import pandas as pd

# Read the DataFrame from the HDF5 file
df = pd.read_hdf(r"C:\Users\GHB\Desktop\SCREW PROJECT\Machine Learning\Test.h5", key='test_dataframe')

print(df)



              Source  Time (ms)          Type     Value
0      i030520235006      0.000  Nset (1/min)  0.000000
1      i030520235006      0.000   Angle (deg)  0.000000
2      i030520235006      0.000    Depth (mm)  0.119141
3      i030520235007      0.000  Nset (1/min)  0.000000
4      i030520234001      0.000    Depth (mm)  0.121094
...              ...        ...           ...       ...
59102  t030520235006   2098.991  TCP_rx (rad)  1.221000
59103  t030520235006   2098.991    TCP_z (mm)  1.500000
59104  t030520235006   2098.991  TCP_rz (rad) -1.209000
59105  t030520235006   2098.991    TCP_y (mm)  1.000000
59106  t030520235006   2098.991  TCP_ry (rad)  1.162000

[59107 rows x 4 columns]


In [32]:
# Create a new column 'Type' containing the first letter of each entry in the 'Source' column
df['Data source'] = df['Source'].str[0]
print(df['Data source'])



0        i
1        i
2        i
3        i
4        i
        ..
59102    t
59103    t
59104    t
59105    t
59106    t
Name: Data source, Length: 59107, dtype: object


In [33]:
#Calculating the sample rate of the screwdriver data

import numpy as np

screwdriver_data = df[df['Data source'] == 'i']
# Group the screwdriver_data based on the 'Source' column (screw ID)
grouped_screwdriver_data = screwdriver_data.groupby(['Source', 'Type'])

# Initialize a list to store the sampling rates for each screw
sampling_rates = []

# Iterate through the groups (unique screws)
for screw_id, screw_data in grouped_screwdriver_data:
    # Calculate the differences between consecutive time values
    time_diff = np.diff(screw_data['Time (ms)'])

    # Compute the average time difference (delta) for the group
    average_time_delta = np.mean(time_diff)

    # Calculate the sampling rate for the group as the reciprocal of the average time difference
    sampling_rate = 1000 / average_time_delta
    sampling_rates.append(sampling_rate)

# Calculate the overall average sampling rate across all groups
average_sampling_rate = round(np.mean(sampling_rates))

print(f'Average sampling rate: {average_sampling_rate} Hz')


Average sampling rate: 1000 Hz


In [34]:
# Resampling the robot data

import pandas as pd
import numpy as np


# Separate robot and screwdriver data
df_robot = df[df['Data source'] == 't']

# Get unique robot IDs and sensor types
unique_ids = df_robot['Source'].unique()
unique_types = df_robot['Type'].unique()

# Create an empty DataFrame to store resampled data
df_robot_resampled = pd.DataFrame()

for id in unique_ids:
    for sensor_type in unique_types:
        # Subset DataFrame for each unique ID and sensor type
        df_id_type = df_robot[(df_robot['Source'] == id) & (df_robot['Type'] == sensor_type)]
        
        # Skip to next iteration if the DataFrame is empty
        if df_id_type.empty:
            continue
        
        # Define the new time array for 1000Hz, from start to end of old time
        new_time = np.arange(df_id_type['Time (ms)'].min(), df_id_type['Time (ms)'].max(), 1)
        
        # Use linear interpolation to get the new values
        new_values = np.interp(new_time, df_id_type['Time (ms)'].values, df_id_type['Value'].values)
        
        # Create a new DataFrame for the resampled data
        df_id_type_resampled = pd.DataFrame({
            'Source': id,
            'Time (ms)': new_time,
            'Type': sensor_type,
            'Value': new_values
        })
        
        # Append resampled DataFrame to the main resampled DataFrame
        df_robot_resampled = df_robot_resampled.append(df_id_type_resampled,ignore_index=True)

In [35]:
df_robot_resampled.to_csv(r"C:\Users\GHB\Desktop\SCREW PROJECT\Machine Learning\Test\resampled.csv")

In [14]:
# Creating a new resampled dataset

df_screwdriver = df[df['Data source'] == 'i']

# Concatenate the screwdriver data and the resampled robot data
df_resampled = pd.concat([df_screwdriver, df_robot_resampled], ignore_index=True)
df_resampled['Data source'] = df_resampled['Source'].str[0]
print(df_resampled)

              Source  Time (ms)          Type     Value Data source
0      i030520235006      0.000  Nset (1/min)  0.000000           i
1      i030520235006      0.000   Angle (deg)  0.000000           i
2      i030520235006      0.000    Depth (mm)  0.119141           i
3      i030520235007      0.000  Nset (1/min)  0.000000           i
4      i030520234001      0.000    Depth (mm)  0.121094           i
...              ...        ...           ...       ...         ...
91428  t030520235007   1818.985  TCP_rz (rad) -1.210000           t
91429  t030520235007   1819.985  TCP_rz (rad) -1.210000           t
91430  t030520235007   1820.985  TCP_rz (rad) -1.210000           t
91431  t030520235007   1821.985  TCP_rz (rad) -1.210000           t
91432  t030520235007   1822.985  TCP_rz (rad) -1.210000           t

[91433 rows x 5 columns]


In [15]:
df_resampled.to_csv(r"C:\Users\GHB\Desktop\SCREW PROJECT\Machine Learning\Test\resampled.csv")

In [7]:
# Usiing a Min-Max scaler based on the Type of measurement


from sklearn.preprocessing import MinMaxScaler

# Initialize a MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))

# Apply MinMaxScaler separately to each type of sensor measurement
for sensor_type in df_resampled['Type'].unique():
    # Filter the data for the current sensor type
    mask = (df_resampled['Type'] == sensor_type)

    # Normalize the 'Value' column for the current sensor type
    df_resampled.loc[mask, 'Value'] = scaler.fit_transform(df_resampled[mask][['Value']])

In [8]:
print(df_resampled)

              Source  Time (ms)          Type     Value Data source
0      i030520235006      0.000  Nset (1/min)  0.000000           i
1      i030520235006      0.000   Angle (deg)  0.000000           i
2      i030520235006      0.000    Depth (mm)  0.000153           i
3      i030520235007      0.000  Nset (1/min)  0.000000           i
4      i030520234001      0.000    Depth (mm)  0.000183           i
...              ...        ...           ...       ...         ...
91428  t030520235007   1818.985  TCP_rz (rad)  0.000000           t
91429  t030520235007   1819.985  TCP_rz (rad)  0.000000           t
91430  t030520235007   1820.985  TCP_rz (rad)  0.000000           t
91431  t030520235007   1821.985  TCP_rz (rad)  0.000000           t
91432  t030520235007   1822.985  TCP_rz (rad)  0.000000           t

[91433 rows x 5 columns]
