In [7]:
import os
import subprocess
from pathlib import Path
import pprint

import scipy.io

import cupy as cp
import cudf
import numpy as np
import tables
import pickle
import json
import gzip
import re
import gc
import itertools

from IPython.display import display

import datetime
import dateutil
import time

import math

import pandas as pd

import torch




# Clear the GPU memory
with cp.cuda.Device(0):
    cp.get_default_memory_pool().free_all_blocks()

# Check if there is a 'df' variable in the environment and delete it
if 'df' in locals():
    del df
    gc.collect()

print('Starting Script...')


# Set the path to the folder where the csv file is located
# path_folder_csv = Path('/mnt/shared/real_single_cell_data')
path_folder_csv = Path('/mnt/c/Users/sthur/OneDrive/Documents/School Graduate 2024 Spring/CSC_561_Machine_Learning/semester_project/project_files/lg_cell_data/single_cell_data/data_for_NN')

# Set the file name of the csv to load
# file_name = 'dataq_07-16-2024-13-20-00_TPLT_0.2C_0.02steps.csv'
file_name = 'cell_01_TPLT_0p2C_3139Ah.csv'

# Full path to the csv file
path_file_csv = path_folder_csv / file_name

# Define the column names for the csv file
column_names = ['Time', 'Voltage', 'Current', 'Temperature', 'Temperature_Reference', 'Date', 'Clock_Time']

# Load the csv file into a cudf dataframe, skipping the first 5 rows and using the column names defined above
df = cudf.read_csv(path_file_csv, skiprows=5, names=column_names)

Starting Script...


In [8]:
# Define the pulse parameters to look for in the data
dV_threshold = -0.01 # Looking for decrease in voltage of this magnitude to qualify as starting point of a pulse
dI_threshold = 0.1 # Looking for increase in current of this magnitude to qualify as starting point of a pulse
load_pulse_threshold = 0.2 # Looking for a value from the 'Current' column that is greater than this threshold to qualify as a load pulse
load_pause_threshold = 0.0 # Looking for a value from the 'Current' column that is less than or equal to this threshold to qualify as a load pause
load_pulse_true_value = -0.6191 # The true value of 'Current' in Amps from the B&K Precision Electronic Load that indicates a load pulse
Capacity_initialized = 3.139 # The capacity of the battery in Amp-hours obtained from a full charge and discharge cycle
AHC_nominal = 3.139 # The nominal capacity of the battery in Amp-hours
C_rate_initialized = 0.2 # The C-rate of the battery during the test based on the nominal capacity of the battery

# Create a function that will be used to sort cudf dataframes by the 'Time' column
def sort_df(df):
    return df.sort_values(by='Time')

# Sort the dataframe by the 'Time' column using the function defined above
df = sort_df(df)

# Calculate the sampling interval by taking the difference between the first two 'Time' values
sampling_interval = df['Time'].iloc[1] - df['Time'].iloc[0]
print(f'Sampling Interval: {sampling_interval}')

# Calculate the number of rows it takes for 10 seconds to pass at the sampling interval and store it in a variable called 'num_rows_10_seconds'
num_rows_10_seconds = int(10 / sampling_interval)
print(f'Number of Rows for 10 Seconds: {num_rows_10_seconds}')

# Calculate the number of rows it takes for 30 minutes to pass at the sampling interval and store it in a variable called 'num_rows_30_minutes'
num_rows_30_minutes = int((30 * 60) / sampling_interval)
print(f'Number of Rows for 30 Minutes: {num_rows_30_minutes}')

# Store a tolerance value for the number of additional rows to look past the 10 second window
num_rows_tolerance = 20

# Sort the dataframe by the 'Time' column using the function defined above
df = sort_df(df)



# First, create a 'SOH' column by dividing the 'Capacity_initialized' value by the 'AHC_nominal' value
df['SOH'] = Capacity_initialized / AHC_nominal




# The values in the 'Current' column are voltage values from the current sensor, which are not accurate.
# Instead of converting the values, I know the true current values of the pulses from the B&K Precision electronic load.
# So for all data in the 'Current' column, if the value is <= load_pulse_threshold, replace it with 0.0, otherwise replace it with load_pulse_true_value.
# This will give the true current values for the pulses.
df['Current'] = cp.where(df['Current'] <= load_pulse_threshold, 0.0, load_pulse_true_value)
df = sort_df(df)


# Calculate the instantaneous Ah column by multiplying the 'Current' column by the sampling interval, then dividing by 3600 to convert from seconds to hours
df['Ah_instantaneous'] = df['Current'] * (sampling_interval / 3600)
df = sort_df(df)

# Calculate the cumulative Ah column by taking the cumulative sum of the 'Ah_instantaneous' column
df['Ah'] = df['Ah_instantaneous'].cumsum()
df = sort_df(df)

# Create a new column called 'Ah_remaining' which is the difference between the 'Ah' column and the 'Capacity_initialized' value
df['Ah_remaining'] = Capacity_initialized + df['Ah']
df = sort_df(df)

# Create a new column called 'SOC' which is the 'Ah_remaining' column divided by the 'Capacity_initialized' value
df['SOC'] = df['Ah_remaining'] / Capacity_initialized
df = sort_df(df)

# Create a new column called 'Capacity_initialized' which is the 'Capacity_initialized' value
df['Capacity_initialized'] = Capacity_initialized
df = sort_df(df)

# Create a new column called 'C_rate_initialized' which is the 'C_rate_initialized' value
df['C_rate_initialized'] = C_rate_initialized
df = sort_df(df)







# Choose a number of rows to search above and below any non-zero 'Current' value to determine if it is a false positive
num_rows_search = 5

# Add a column to the dataframe called 'Current_shifted_down' which is the 'Current' column shifted down by num_rows_search rows
df['Current_shifted_down'] = df['Current'].shift(num_rows_search)
df = sort_df(df)

# Add a column to the dataframe called 'Current_shifted_up' which is the 'Current' column shifted up by num_rows_search
df['Current_shifted_up'] = df['Current'].shift(-num_rows_search)
df = sort_df(df)


# Look through the 'Current' column for non-zero values that have a corresponding 'Current_shifted_down' AND 'Current_shifted_up' that are both zero, 
# If these conditions are met, then set the 'Current' value for that row to 0.0.
filtered_df = df.query('(Current != 0) & (Current_shifted_down == 0) & (Current_shifted_up == 0)').assign(Current=0.0)

# Rename the 'Current' column in the filtered dataframe to 'Current_filtered'
filtered_df = filtered_df.rename(columns={'Current': 'Current_filtered'})

# Merge the 'filtered_df' dataframe with the original 'df' dataframe on the common columns.
# This operation adds the 'Current_filtered' column to the original 'df' dataframe, aligning each filtered current value with the corresponding row.
# The merge is done with a 'left' join to ensure all rows from the original 'df' dataframe are retained, even if they don't have a corresponding 'Current_filtered' value.
common_columns = ['Time', 'Voltage', 'Temperature', 'Temperature_Reference', 'Date', 'Clock_Time']
df = df.merge(filtered_df[common_columns + ['Current_filtered']], on=common_columns, how='left')

# Replace the 'Current' column in 'df' with 'Current_filtered' where it exists
df['Current'] = df['Current_filtered'].fillna(df['Current'])

# Drop the 'Current_filtered' column as it's no longer needed
df = df.drop(columns=['Current_filtered'])

# Sort the dataframe by the 'Time' column using the function defined above
df = sort_df(df)

















# Add a column to the dataframe called 'dI' which is the difference between the current row and the previous row in the 'Current' column
df['dI'] = df['Current'].diff()
df = sort_df(df)

# Add a column to the dataframe called 'dI_shifted_down' which is the 'dI' column shifted down by 1 row
df['dI_shifted_down'] = df['dI'].shift(1)
df = sort_df(df)

# Add a column called 'I_shifted_up_10_seconds' which is the 'Current' column shifted up by the number of rows it takes for 10 seconds to pass + the tolerance value
df['I_shifted_up_10_seconds'] = df['Current'].shift(-(num_rows_10_seconds + num_rows_tolerance))
df = sort_df(df)

# Add a column called 'I_shifted_up_20_seconds' which is the 'Current' column shifted up by the number of rows it takes for 20 seconds to pass + the tolerance value
df['I_shifted_up_20_seconds'] = df['Current'].shift(-2*(num_rows_10_seconds + num_rows_tolerance))
df = sort_df(df)

# Add a column called 'I_shifted_up_30_seconds' which is the 'Current' column shifted up by the number of rows it takes for 30 seconds to pass + the tolerance value
df['I_shifted_up_30_seconds'] = df['Current'].shift(-3*(num_rows_10_seconds + num_rows_tolerance))
df = sort_df(df)

# Add a column called 'TPLT_detected' which returns True if the following conditions are met:
# 1. The 'dI' column is NOT zero.
# 2. The 'dI_shifted_down' column IS zero.
# 3. The 'I_shifted_up_10_seconds' column IS zero.
# 4. The 'I_shifted_up_20_seconds' column is NOT zero.
# 5. The 'I_shifted_up_30_seconds' column IS zero.
df['TPLT_detected'] = (df['dI'] != 0) & (df['dI_shifted_down'] == 0) & (df['I_shifted_up_10_seconds'] == 0) & (df['I_shifted_up_20_seconds'] != 0) & (df['I_shifted_up_30_seconds'] == 0)
df = sort_df(df)




# Count the number of TRUE values in the 'TPLT_detected' column and store it in a variable called 'num_TPLT_detected'
num_TPLT_detected = df['TPLT_detected'].sum()
print(f'Number of TPLT Detected: {num_TPLT_detected}')
df = sort_df(df)










# # Grab the first 3 hours of data and save it to a csv file in the path_folder_csv directory
# data_segment = num_rows_30_minutes*6
# df = sort_df(df)
# df.head(data_segment).to_csv(path_folder_csv / 'TPLT_data_segment.csv', index=False)





# # Create a pandas dataframe from the cudf dataframe
# df_pd = df.to_pandas()

# # Identify the indices of rows where 'TPLT_detected' is True
# tpl_detected_indices = df_pd.index[df_pd['TPLT_detected'] == True].tolist()

# # Initialize an empty list to store the segments
# segments = []

# # For each index, grab 300 rows before and after that index
# for idx in tpl_detected_indices:
#     start_idx = max(0, idx - 300)
#     end_idx = min(len(df_pd), idx + 300)
#     segment = df_pd.iloc[start_idx:end_idx]
#     segments.append(segment)

# # Concatenate all segments into a single DataFrame
# concatenated_segments = pd.concat(segments)

# # Save the concatenated DataFrame to a CSV file in the path_folder_csv directory
# concatenated_segments.to_csv(path_folder_csv / 'TPLT_segments.csv', index=False)



Sampling Interval: 0.039999999999054126
Number of Rows for 10 Seconds: 250
Number of Rows for 30 Minutes: 45000
Number of TPLT Detected: 52


In [9]:
# Extract a list of the indices where the TPLT events are detected and call it 'TPLT_event_indices'
TPLT_event_indices = df[df['TPLT_detected'] == True].index.to_pandas().tolist()

# Add a column called 'first_pulse_range' and initialize it with False values.
df['first_pulse_range'] = False

# Add a column called 'pause_range' and initialize it with False values.
df['pause_range'] = False

# Add a column called 'second_pulse_range' and initialize it with False values.
df['second_pulse_range'] = False

# Add a column called 'OCV_range' and initialize it with False values.
df['OCV_range'] = False

# Add a column called 'TPLT_group' to the cudf dataframe and initialize it with NaN values.
df['TPLT_group'] = cp.nan

# Create a function to get the data surrounding each TPLT event
def extract_TPLT_data(df, TPLT_event_indices, num_rows_before, num_rows_for_OCV, num_rows_for_pulse):
    TPLT_data_list = []

    # Create an iterator variable to designate the group numbers
    group_num = 0

    # Create variables to designate the range of rows to search through for each range of data
    buffer = 10
    first_pulse_search_range_end = num_rows_for_pulse + buffer

    pause_search_range_start = num_rows_for_pulse - buffer
    pause_search_range_end = first_pulse_search_range_end + num_rows_for_pulse + buffer

    second_pulse_search_range_start = pause_search_range_start + num_rows_for_pulse - buffer
    second_pulse_search_range_end = pause_search_range_end + num_rows_for_pulse + buffer

    OCV_search_range_start = second_pulse_search_range_start + num_rows_for_pulse - buffer
    OCV_search_range_end = second_pulse_search_range_end + num_rows_for_OCV + buffer

    for index in TPLT_event_indices:
        # Use the range variables from above this for loop to set the start and end indices for each range of data
        first_pulse_range_start = index - buffer
        first_pulse_range_end = index + first_pulse_search_range_end

        pause_range_start = index + pause_search_range_start
        pause_range_end = index + pause_search_range_end

        second_pulse_range_start = index + second_pulse_search_range_start
        second_pulse_range_end = index + second_pulse_search_range_end

        OCV_range_start = index + OCV_search_range_start
        OCV_range_end = index + OCV_search_range_end

        # First, set the absolute starting point for the range of data that will be extracted
        start_idx = max(index - num_rows_before, 0)

        # Next, search the first pulse range for any rows that have a 'Current' value that is NOT equal to zero, and set the 'first_pulse_range' to True for those rows.
        first_pulse_non_zero = df.loc[first_pulse_range_start:first_pulse_range_end, 'Current'] != 0
        if first_pulse_non_zero.any():
            df.loc[first_pulse_range_start:first_pulse_range_end, 'first_pulse_range'] = first_pulse_non_zero
            
            

        # Next, search the pause range for any rows that have a 'Current' value that IS equal to zero, and set the 'pause_range' to True for those rows.
        pause_zero = df.loc[pause_range_start:pause_range_end, 'Current'] == 0
        if pause_zero.any():
            df.loc[pause_range_start:pause_range_end, 'pause_range'] = pause_zero
            

        # Next, search the second pulse range for any rows that have a 'Current' value that is NOT equal to zero, and set the 'second_pulse_range' to True for those rows.
        second_pulse_non_zero = df.loc[second_pulse_range_start:second_pulse_range_end, 'Current'] != 0
        if second_pulse_non_zero.any():
            df.loc[second_pulse_range_start:second_pulse_range_end, 'second_pulse_range'] = second_pulse_non_zero
            

        # Next, search the OCV range for any rows that have a 'Current' value that IS equal to zero, and set the 'OCV_range' to True for those rows.
        OCV_zero = df.loc[OCV_range_start:OCV_range_end, 'Current'] == 0
        if OCV_zero.any():
            df.loc[OCV_range_start:OCV_range_end, 'OCV_range'] = OCV_zero
            

        # Now search the OCV range again, and find the last row that had 'OCV_range' set to True, and set the end index to that row.
        OCV_true_indices = df.loc[OCV_range_start:OCV_range_end].query('OCV_range == True').index
        if not OCV_true_indices.empty:
            end_idx = OCV_true_indices[-1]
        else:
            end_idx = OCV_range_end

        # Set the 'TPLT_group' column to the group number for the range of data extracted
        df.loc[start_idx:end_idx, 'TPLT_group'] = group_num

        # Increment the group number
        group_num += 1

        # Append the data from the start index to the end index to the TPLT_data_list
        TPLT_data_list.append(df[start_idx:end_idx])


    return cudf.concat(TPLT_data_list)

# Extract the TPLT data for each TPLT event with 10 seconds before and 30 minutes after
TPLT_data = extract_TPLT_data(df, TPLT_event_indices, num_rows_10_seconds, num_rows_30_minutes, num_rows_10_seconds)

# Print the number of TPLT_groups
print(f'Number of TPLT groups: {len(TPLT_data["TPLT_group"].unique())}')

Number of TPLT groups: 52


In [10]:
# Next, we will add a new column called 'TPLT_Vmax' and initialize it with NaN values, then we will group the TPLT data by the 'TPLT_group' column and do the following for each group:
# 1. Find the data for the pause between the pulses.
# 2. Find the maximum voltage value in the range between the two pulses.
# 3. Replace all NaN values in the group with the maximum voltage value found in step 2.
# Recall that the dataframe is a cudf dataframe, so we will use the cudf groupby function to group the data, and the aggregation functions to calculate the required values.

# First, query 'TPLT_data' to filter the rows where 'pause_range' is True and store it in a variable called 'pause_data'
pause_data = TPLT_data.query("pause_range == True")

# Now, group the 'pause_data' dataframe by the 'TPLT_group' column, which will create a GroupBy object in cudf, which allows for aggregation operations to be performed on the groups.
# Use the 'max' aggregation function to find the maximum voltage value in the 'Voltage' column for each group, and store the result in a variable called 'max_voltage_in_pause'.
# Ensure to reset the index for merging later.
max_voltage_in_pause = pause_data.groupby('TPLT_group').agg({'Voltage': 'max'}).reset_index()

# Rename the 'Voltage' column to 'TPLT_Vmax' in the 'max_voltage_in_pause' dataframe to clearly indicate that this column represents the maximum voltage value for each group.
# This step makes the DataFrame ready for merging with the original TPLT data by aligning the column names.
max_voltage_in_pause = max_voltage_in_pause.rename(columns={'Voltage': 'TPLT_Vmax'})

# Merge the 'max_voltage_in_pause' dataframe with the original 'TPLT_data' dataframe on the 'TPLT_group' column.
# This operation adds the 'TPLT_Vmax' column to the original 'TPLT_data' dataframe, aligning each 'TPLT_Vmax' value with the corresponding group.
# The merge is done with a 'left' join to ensure all rows from the original 'TPLT_data' dataframe are retained, even if they don't have a corresponding 'TPLT_Vmax' value.
TPLT_data = TPLT_data.merge(max_voltage_in_pause, on='TPLT_group', how='left')

# Fill NaN values in the 'TPLT_Vmax' column. This step is necessary because the first row of the DataFrame and 
# any groups that did not meet the filtering criteria will have NaN values for 'TPLT_Vmax'. 
# Using 'ffill' (forward fill) propagates the last valid observation forward to next valid. 
# This assumes that the 'TPLT_Vmax' value of the first group is representative for any initial NaN values, providing a reasonable estimate for 'TPLT_Vmax' where the direct calculation was not applicable.
TPLT_data['TPLT_Vmax'] = TPLT_data['TPLT_Vmax'].ffill()

# Sort the 'TPLT_data' dataframe by the 'Time' column which should start with the lowest Time value and end with the highest Time value.
TPLT_data = TPLT_data.sort_values(['Time'])




# Repeat the same process for creating the 'TPLT_Vmax' column, except now do it for a new column called 'TPLT_Vmin' which will store the minimum voltage value in the range of the 2nd pulse.
# Query 'TPLT_data' to filter the rows where 'second_pulse_range' is True and store it in a variable called 'second_pulse_data'
second_pulse_data = TPLT_data.query("second_pulse_range == True")

# Group the 'second_pulse_data' dataframe by the 'TPLT_group' column and find the minimum voltage value in the 'Voltage' column for each group.
# Store the result in a variable called 'min_voltage_in_second_pulse'.
min_voltage_in_second_pulse = second_pulse_data.groupby('TPLT_group').agg({'Voltage': 'min'}).reset_index()

# Rename the 'Voltage' column to 'TPLT_Vmin' in the 'min_voltage_in_second_pulse' dataframe.
min_voltage_in_second_pulse = min_voltage_in_second_pulse.rename(columns={'Voltage': 'TPLT_Vmin'})

# Merge the 'min_voltage_in_second_pulse' dataframe with the original 'TPLT_data' dataframe on the 'TPLT_group' column.
TPLT_data = TPLT_data.merge(min_voltage_in_second_pulse, on='TPLT_group', how='left')

# Fill NaN values in the 'TPLT_Vmin' column using 'ffill'.
TPLT_data['TPLT_Vmin'] = TPLT_data['TPLT_Vmin'].ffill()

# Sort the 'TPLT_data' dataframe by the 'Time' column.
TPLT_data = TPLT_data.sort_values(['Time'])

# Calculate the 'TPLT_delta_V2' column by subtracting the 'TPLT_Vmin' column from the 'TPLT_Vmax' column.
TPLT_data['TPLT_delta_V2'] = TPLT_data['TPLT_Vmax'] - TPLT_data['TPLT_Vmin']

# Sort the 'TPLT_data' dataframe by the 'Time' column.
TPLT_data = TPLT_data.sort_values(['Time'])





# Repeat the same process for creating the 'TPLT_Vmax' column, except now do it for a new column called 'OCV' which will store the maximum voltage value in the range of the OCV.
# Query 'TPLT_data' to filter the rows where 'OCV_range' is True and store it in a variable called 'OCV_data'
OCV_data = TPLT_data.query("OCV_range == True")

# Group the 'OCV_data' dataframe by the 'TPLT_group' column and find the maximum voltage value in the 'Voltage' column for each group.
# Store the result in a variable called 'max_voltage_in_OCV'.
max_voltage_in_OCV = OCV_data.groupby('TPLT_group').agg({'Voltage': 'max'}).reset_index()

# Rename the 'Voltage' column to 'OCV' in the 'max_voltage_in_OCV' dataframe.
max_voltage_in_OCV = max_voltage_in_OCV.rename(columns={'Voltage': 'OCV'})

# Merge the 'max_voltage_in_OCV' dataframe with the original 'TPLT_data' dataframe on the 'TPLT_group' column.
TPLT_data = TPLT_data.merge(max_voltage_in_OCV, on='TPLT_group', how='left')

# Fill NaN values in the 'OCV' column using 'ffill'.
TPLT_data['OCV'] = TPLT_data['OCV'].ffill()

# Sort the 'TPLT_data' dataframe by the 'Time' column.
TPLT_data = TPLT_data.sort_values(['Time'])




# The 'C_rate_initialized' value is the C-rate of the battery during the test based on the nominal capacity of the battery.
# The calculation for AHC is as follows:
# AHC = I/C_rate
# where I is the current in Amperes and C_rate is the C-rate.
# Therefore, C_rate = I/AHC, and the C_rate_based_on_AHC_initialized column will be calculated using this formula,
# where I is the 'Current' column and AHC is the 'Capacity_initialized' column.
# Since all TPLT tests will only use one load value, we can use load_pulse_true_value for I, and Capacity_initialized for AHC.
# Calculate the 'C_rate_based_on_AHC_initialized' column by dividing load_pulse_true_value by Capacity_initialized.
# Use the absolute value of load_pulse_true_value since it is negative.
TPLT_data['C_rate_based_on_AHC_initialized'] = cp.abs(load_pulse_true_value) / Capacity_initialized


# Sort the 'TPLT_data' dataframe by the 'Time' column.
TPLT_data = TPLT_data.sort_values(['Time'])




# Find all groups that have SOC column values less than 0.12 and remove those groups from the dataframe.
TPLT_data = TPLT_data.query('SOC >= 0.12')





# # Grab the data for the first two TPLT_groups and save it to a csv file in the path_folder_csv directory
# TPLT_data.query("TPLT_group == 0 or TPLT_group == 1").to_csv(path_folder_csv / 'TPLT_data_first_two_groups.csv')

# # Calculate the maximum value of 'TPLT_group'
# max_group = TPLT_data['TPLT_group'].max()

# # Grab the data for the last two TPLT_groups and save it to a csv file in the path_folder_csv directory
# TPLT_data.query(f"TPLT_group == {max_group - 1} or TPLT_group == {max_group}").to_csv(path_folder_csv / 'TPLT_data_last_two_groups.csv')

In [11]:
# Create another cudf for exporting which has the data from the following columns of the TPLT_data dataframe in this order:
# 'Time', 'Voltage', 'Current', 'SOC', 'Capacity_initialized', 'C_rate_initialized', 'OCV', 'TPLT_Vmax', 'TPLT_delta_V2', 'C_rate_based_on_AHC_initialized', 'SOH', 'TPLT_group'
# Also, rename 'TPLT_Vmax' to 'tplt_V_max' and 'TPLT_delta_V2' to 'tplt_delta_V_2' and 'TPLT_group' to 'dataset_idx' to match the expected input format for the NN.
TPLT_data_export = TPLT_data[['Time', 'Voltage', 'Current', 'SOC', 'Capacity_initialized', 'C_rate_initialized', 'OCV', 'TPLT_Vmax', 'TPLT_delta_V2', 'C_rate_based_on_AHC_initialized', 'SOH', 'TPLT_group']]
TPLT_data_export = TPLT_data_export.rename(columns={'TPLT_Vmax': 'tplt_V_max', 'TPLT_delta_V2': 'tplt_delta_V_2', 'TPLT_group': 'dataset_idx'})

# Save the TPLT_data_export cudf to a csv file in the path_folder_csv directory
TPLT_data_export.to_csv(path_folder_csv / 'TPLT_data_export.csv', index=False)


In [12]:
# Convert the 'TPLT_data_export' cudf to a NumPy array and then to a PyTorch tensor
TPLT_data_tensor = torch.tensor(TPLT_data_export.to_pandas().values, dtype=torch.float32)

# Define the base output file name
output_file_base = "TPLT_data_tensor"

# Remove the '.csv' extension from file_name if it exists
file_name = file_name.replace('.csv', '')

# Append the `file_name` variable to the output file name
output_file_name = f"{output_file_base}_{file_name}.pt"

# Save the PyTorch tensor to a file using torch.save and save it to the path_folder_csv directory
torch.save(TPLT_data_tensor, path_folder_csv / output_file_name)

# Print a message indicating that the data has been saved
print(f'TPLT Data Saved Successfully as {output_file_name}!')

TPLT Data Saved Successfully!
